/*
 * Copyright (C) 2012 by
 *   MetraLabs GmbH (MLAB), GERMANY
 * and
 *   Neuroinformatics and Cognitive Robotics Labs (NICR) at TU Ilmenau, GERMANY
 * All rights reserved.
 *
 * Contact: info@mira-project.org
 *
 * Commercial Usage:
 *   Licensees holding valid commercial licenses may use this file in
 *   accordance with the commercial license agreement provided with the
 *   software or, alternatively, in accordance with the terms contained in
 *   a written agreement between you and MLAB or NICR.
 *
 * GNU General Public License Usage:
 *   Alternatively, this file may be used under the terms of the GNU
 *   General Public License version 3.0 as published by the Free Software
 *   Foundation and appearing in the file LICENSE.GPL3 included in the
 *   packaging of this file. Please review the following information to
 *   ensure the GNU General Public License version 3.0 requirements will be
 *   met: http://www.gnu.org/copyleft/gpl.html.
 *   Alternatively you may (at your option) use any later version of the GNU
 *   General Public License if such license has been publicly approved by
 *   MLAB and NICR (or its successors, if any).
 *
 * IN NO EVENT SHALL "MLAB" OR "NICR" BE LIABLE TO ANY PARTY FOR DIRECT,
 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
 * THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF "MLAB" OR
 * "NICR" HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * "MLAB" AND "NICR" SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND "MLAB" AND "NICR" HAVE NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS OR MODIFICATIONS.
 */

/**
 * @file Profiler.h
 *    Macros and classes for profiling code.
 *
 * @author Erik Einhorn
 * @date   2010/11/18
 */

#ifndef _MIRA_PROFILER_H_
#define _MIRA_PROFILER_H_

#ifdef _MSC_VER // Microsoft Visual C++
	#include <intrin.h>  // for rdtsc intrinsic
	#pragma intrinsic(__rdtsc)
#endif

#include <string>
#include <map>

#ifndef Q_MOC_RUN
#include <boost/optional.hpp>
#endif

#include <thread/Thread.h>
#include <thread/Spinlock.h>
#include <platform/Types.h>
#include <utils/Time.h>
#include <utils/Singleton.h>

namespace mira {

///////////////////////////////////////////////////////////////////////////////
// MAIN DEFINES

#ifdef MIRA_PROFILER_ENABLED
#  define MIRA_PROFILE_BEGIN(id) _MIRA_PROFILE_BEGIN(id)
#  define MIRA_PROFILE_END(id)   _MIRA_PROFILE_END  (id)
#  define MIRA_PROFILE_SCOPE(id) _MIRA_PROFILE_SCOPE(id)
#else
#  define MIRA_PROFILE_BEGIN(id)
#  define MIRA_PROFILE_END(id)
#  define MIRA_PROFILE_SCOPE(id)
#endif


#ifndef MIRA_PROFILER_LEVEL
#  define MIRA_PROFILER_LEVEL 3
#endif

#if MIRA_PROFILER_LEVEL >= 1
#  define MIRA_PROFILE_BEGIN1(id) MIRA_PROFILE_BEGIN(id)
#  define MIRA_PROFILE_END1(id) MIRA_PROFILE_END(id)
#  define MIRA_PROFILE_SCOPE1(id) MIRA_PROFILE_SCOPE(id)
#else
#  define MIRA_PROFILE_BEGIN1(id)
#  define MIRA_PROFILE_END1(id)
#  define MIRA_PROFILE_SCOPE1(id)
#endif

#if MIRA_PROFILER_LEVEL >= 2
#  define MIRA_PROFILE_BEGIN2(id) MIRA_PROFILE_BEGIN(id)
#  define MIRA_PROFILE_END2(id) MIRA_PROFILE_END(id)
#  define MIRA_PROFILE_SCOPE2(id) MIRA_PROFILE_SCOPE(id)
#else
#  define MIRA_PROFILE_BEGIN2(id)
#  define MIRA_PROFILE_END2(id)
#  define MIRA_PROFILE_SCOPE2(id)
#endif

#if MIRA_PROFILER_LEVEL >= 3
#  define MIRA_PROFILE_BEGIN3(id) MIRA_PROFILE_BEGIN(id)
#  define MIRA_PROFILE_END3(id) MIRA_PROFILE_END(id)
#  define MIRA_PROFILE_SCOPE3(id) MIRA_PROFILE_SCOPE(id)
#else
#  define MIRA_PROFILE_BEGIN3(id)
#  define MIRA_PROFILE_END3(id)
#  define MIRA_PROFILE_SCOPE3(id)
#endif

/////////////////////////////////////////////////////////////////////////////
// internal macros

#define __MIRA_PROFILE_BEGIN(id)                                                 \
	static mira::Profiler::Node* _node_##id =                                    \
		mira::Profiler::instance().newProfileNode(#id,__FILE__, __LINE__);       \
	mira::Profiler::Node* _parent_##id =                                         \
		mira::Profiler::instance().beginHierarchy(_node_##id,__FILE__, __LINE__);\
	uint64 _start_##id = mira::Profiler::getCycleCount();

#define _MIRA_PROFILE_BEGIN(id) __MIRA_PROFILE_BEGIN(id)

#define __MIRA_PROFILE_END(id)                                                   \
	uint64 _duration_##id = mira::Profiler::getCycleCount() - _start_##id;       \
	mira::Profiler::instance().endHierarchy(_node_##id, _parent_##id,            \
	                                        _duration_##id, __FILE__, __LINE__);

#define _MIRA_PROFILE_END(id) __MIRA_PROFILE_END(id)

#define __MIRA_PROFILE_SCOPE(id)                                                 \
	static mira::Profiler::Node* _node_##id =                                    \
			mira::Profiler::instance().newProfileNode(#id,__FILE__, __LINE__);   \
	mira::Profiler::Scope _scope_##id(_node_##id,__FILE__, __LINE__);

#define _MIRA_PROFILE_SCOPE(id) __MIRA_PROFILE_SCOPE(id)

/**
 * The main Profiler class. This class is used as singleton. The global
 * instance can be obtained using Profiler::instance().
 *
 *  USAGE:
 *
 *      Add a pair of MIRA_PROFILE_BEGIN and MIRA_PROFILE_END macros around
 *      the code you want to profile, or add the MIRA_PROFILE_SCOPE macro
 *      into the scope of code you want to profile (see example below).
 *
 *      To enable profiling add the following compiler option:
 *      \code
 *          -DMIRA_PROFILER_ENABLED
 *      \endcode
 *      e.g. by adding
 *      \code
 *      ADD_DEFINITIONS(-DMIRA_PROFILER_ENABLED)
 *      \endcode
 *      to your CMakeLists.txt file.
 *
 *  EXAMPLE:
 *
 *  \code
 *      #include <utils/Profiler.h>
 *
 *      void test()
 *      {
 *          MIRA_PROFILE_BEGIN(function_test);
 *          int sum=0;
 *          for(int i=0; i<10; ++i)
 *          {
 *              MIRA_PROFILE_SCOPE(in_for_loop);
 *              sum+=i; // the code we want to measure
 *          }
 *          MIRA_PROFILE_END(function_test);
 *      }
 *
 *      // Write the report to the output directory.
 *      // Note: The directory must exist
 *
 *      Profiler::writeReport("./reportdir", "dot");
 *
 *      // choose the desired image output format (default: png)
 *      // Note: "dot -T?" returns a list of possible formats
 *
 *      Profiler::writeReport("./reportdir", "dot", "pdf");
 *
 *      // or if no GraphViz dot tool is available:
 *      // Profiler::writeReport("./reportdir");
 *  \endcode
 *
 *  Within the for-loop the MIRA_PROFILE_SCOPE macro is used to
 *  profile the execution time of the code within the for-loop.
 *  However, sometimes you need more control where to place the start and ending
 *  point of the timing measurements. In these cases you can use the
 *  MIRA_PROFILE_BEGIN and MIRA_PROFILE_END macro pair.
 *
 *  @note If you want to profile a module or class that is used through a MIRA framework
 *  by running e.g. mira/miracenter, these tools have a CLI parameter
 *  '--enable-profiler' which can be used to trigger the report writing on process exit.
 *  If used that way, there is no need to call Profiler::writeReport in own code.
 */
class MIRA_BASE_EXPORT Profiler : public EagerSingleton<Profiler>
{
public:

	///@cond INTERNAL

	/// A node within the hierarchy of callers/callees containing the profiling results.
	struct Node
	{
		struct ChildLink
		{
			ChildLink() : count(0), totalCycles(0), avgCycles(0.0), M2cycles(0.0), child(NULL) {}

			/// total number of calls
			uint32 count;

			/// total number of cycles spent in this child node
			uint64 totalCycles;

			double avgCycles;
			double M2cycles;

			/// pointer to child node
			Node* child;
		};

		Node() : id(-1), filename(NULL), line(0), count(0), totalCycles(0), avgCycles(0.0), M2cycles(0.0) {}

		/// The user defined identifier
		std::string name;

		/// A unique ID
		int32 id;

		/// The file name of the source file where BEGIN_PROFILE is called
		const char* filename;

		/// The corresponding line number
		uint32 line;

		/// total number of calls
		uint32 count;

		/// total number of cycles spent in this node
		uint64 totalCycles;

		double avgCycles;
		double M2cycles;

		/// array with all children
		std::vector<ChildLink> children;

		Spinlock spinlock;
	};

	struct Report;
	struct ReportNode;

	///@endcond INTERNAL

public:

	Profiler();
	~Profiler();

public:
	static void writeReport(const std::string& directory,
	                        const std::string& dotCommand="",
	                        const std::string& imgFormat="png") {
		Profiler::instance().writeReportInternal(directory, dotCommand, imgFormat);
	}

public:

	/**
	 * Returns the set or calibrated CPU speed (in cycles/second i.e. Hz).
	 * If this function is called the first time AND setCPUSpeed was not called
	 * before, the CPU speed will be calibrated automatically by sleeping one
	 * second.
	 */
	double getCPUSpeed();

	/// Overwrites the calibrated CPU speed with the specified value (in Hz!).
	void setCPUSpeed(double speed) { mCPUSpeed.reset(speed); }

public:

	// returns the current cycle count
	static uint64 getCycleCount();

public:
	///@cond INTERNAL

	// methods used internally be the profile macros

	/**
	 * Creates a new node in the caller/callee hierarchy
	 * (called by MIRA_PROFILE_BEGIN).
	 */
	Node* newProfileNode(const std::string& name,
	                     const char* filename, uint32 line);

	/**
	 * Marks the start of a profile section (called by MIRA_PROFILE_BEGIN).
	 */
	Node* beginHierarchy(Node* node, const char* filename, uint32 line);

	/**
	 * Marks the end of a profile section and passes the time in cycles that
	 * was spend within this section (called by MIRA_PROFILE_END).
	 */
	void endHierarchy(Node* node, Node* prevNode, uint64 cycles,
	                  const char* filename, uint32 line);

	///@endcond INTERNAL

public:

	// helper for MIRA_PROFILE_SCOPE, which starts timing in constructor
	// and stops it automatically in destructor when leaving the scope
	struct Scope
	{
		Scope(Node* node, const char* filename, uint32 line) :
			mNode(node), mFilename(filename), mLine(line),
			mParent(instance().beginHierarchy(node,filename, line)),
			mStart(getCycleCount()) {}

		~Scope()
		{
			uint64 duration = getCycleCount() - mStart;
			instance().endHierarchy(mNode, mParent, duration, mFilename, mLine);
		}

	private:
		Node* mNode;
		const char* mFilename;
		int mLine;
		Node* mParent;
		uint64 mStart;
	};

private:

	typedef std::map<std::string, Node*> NodeMap;

	// the calibrated or specified cpu speed (in Hz)
	boost::optional<double> mCPUSpeed;

	boost::mutex mMutex;

	// the collection of all profile nodes
	NodeMap mNodes;

	// counter to generate a unique IDs
	int32 mNextProfileID;

	struct ThreadInfo;
	friend struct ThreadInfo;

	typedef boost::shared_ptr<ThreadInfo> ThreadInfoPtr;
	boost::thread_specific_ptr<ThreadInfoPtr> mThreadInfo;
	std::vector<ThreadInfoPtr> mThreads;

	static void threadInfoCleanupFn(ThreadInfo* t);

	ThreadInfo* getThreadInfo();

private:

	void writeReportInternal(const std::string& directory, 
	                         std::string dotCommand,
	                         const std::string& imgFormat);

	ReportNode* buildReportNode(Report* report, const Node* node);
	void buildThreadReport(Report* report, const Node* node);
};

///////////////////////////////////////////////////////////////////////////////

inline uint64 Profiler::getCycleCount()
{
#ifdef _MSC_VER
// Microsoft Visual C++
# ifdef _WIN64
	return cycles = __rdtsc();
# else
	uint64_t cycles;
	__asm {
		rdtsc
		mov DWORD PTR[cycles]  , eax
		mov DWORD PTR[cycles+4], edx
	}
	return cycles
# endif
#else
// Linux, GCC
# if defined(MIRA_ARCH_X86)
#  if defined(MIRA_ARCH64)
	// x86 64-bit
	unsigned hi, lo;
	asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));
	return ( (uint64)lo) | (((uint64)hi)<<32 );
#  else
	// x86 32-bit
	uint64 cycles;
	asm volatile ("rdtsc" : "=A" (cycles));
	return cycles;
#  endif
# elif defined(MIRA_ARCH_ARM)
#  if defined(MIRA_ARCH64)
	// ARM 64-bit
	uint64 cycles;
	asm volatile ("mrs %0, cntvct_el0" : "=r" (cycles));
	return cycles;
#  else
	// ARM 32-bit
	return 0;
#  endif
# else
	return 0;
# endif
#endif
}

///////////////////////////////////////////////////////////////////////////////

}

#endif
