routine_tracer_funcstats.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. #include "routine_tracer_funcstats.h"
  2. #include "simulator.h"
  3. #include "core_manager.h"
  4. #include "thread.h"
  5. #include "core.h"
  6. #include "performance_model.h"
  7. #include "log.h"
  8. #include "stats.h"
  9. #include "cache_efficiency_tracker.h"
  10. #include "utils.h"
  11. #include <sstream>
  12. RoutineTracerFunctionStats::RtnThread::RtnThread(RoutineTracerFunctionStats::RtnMaster *master, Thread *thread)
  13. : RoutineTracerThread(thread)
  14. , m_master(master)
  15. , m_current_eip(0)
  16. {
  17. }
  18. void RoutineTracerFunctionStats::RtnThread::functionEnter(IntPtr eip, IntPtr callEip)
  19. {
  20. functionBegin(eip);
  21. }
  22. void RoutineTracerFunctionStats::RtnThread::functionExit(IntPtr eip)
  23. {
  24. functionEnd(eip, true);
  25. }
  26. void RoutineTracerFunctionStats::RtnThread::functionChildEnter(IntPtr eip, IntPtr eip_child)
  27. {
  28. functionEnd(eip, false);
  29. }
  30. void RoutineTracerFunctionStats::RtnThread::functionChildExit(IntPtr eip, IntPtr eip_child)
  31. {
  32. functionBegin(eip);
  33. }
  34. void RoutineTracerFunctionStats::RtnThread::functionBeginHelper(IntPtr eip, RtnValues& values_start)
  35. {
  36. m_current_eip = eip;
  37. const ThreadStatsManager::ThreadStatTypeList& types = Sim()->getThreadStatsManager()->getThreadStatTypes();
  38. for(ThreadStatsManager::ThreadStatTypeList::const_iterator it = types.begin(); it != types.end(); ++it)
  39. {
  40. values_start[*it] = getThreadStat(*it);
  41. }
  42. }
  43. void RoutineTracerFunctionStats::RtnThread::functionEndHelper(IntPtr eip, UInt64 count)
  44. {
  45. RtnValues values;
  46. const ThreadStatsManager::ThreadStatTypeList& types = Sim()->getThreadStatsManager()->getThreadStatTypes();
  47. for(ThreadStatsManager::ThreadStatTypeList::const_iterator it = types.begin(); it != types.end(); ++it)
  48. {
  49. values[*it] = getThreadStat(*it) - m_values_start[*it];
  50. }
  51. m_master->updateRoutine(eip, count, values);
  52. }
  53. void RoutineTracerFunctionStats::RtnThread::functionEndFullHelper(const CallStack &stack, UInt64 count)
  54. {
  55. RtnValues values;
  56. const ThreadStatsManager::ThreadStatTypeList& types = Sim()->getThreadStatsManager()->getThreadStatTypes();
  57. for(auto it = types.begin(); it != types.end(); ++it)
  58. {
  59. values[*it] = getThreadStat(*it) - m_values_start_full[stack][*it];
  60. }
  61. m_master->updateRoutineFull(stack, count, values);
  62. }
  63. void RoutineTracerFunctionStats::RtnThread::functionBegin(IntPtr eip)
  64. {
  65. Sim()->getThreadStatsManager()->update(m_thread->getId());
  66. functionBeginHelper(eip, m_values_start);
  67. if (m_stack.size())
  68. functionBeginHelper(eip, m_values_start_full[m_stack]);
  69. }
  70. void RoutineTracerFunctionStats::RtnThread::functionEnd(IntPtr eip, bool is_function_start)
  71. {
  72. Sim()->getThreadStatsManager()->update(m_thread->getId());
  73. functionEndHelper(eip, is_function_start ? 1 : 0);
  74. if (m_stack.size())
  75. functionEndFullHelper(m_stack, is_function_start ? 1 : 0);
  76. }
  77. UInt64 RoutineTracerFunctionStats::RtnThread::getThreadStat(ThreadStatsManager::ThreadStatType type)
  78. {
  79. return Sim()->getThreadStatsManager()->getThreadStatistic(m_thread->getId(), type);
  80. }
  81. UInt64 RoutineTracerFunctionStats::RtnThread::getCurrentRoutineId()
  82. {
  83. ScopedLock sl(m_lock);
  84. if (m_stack.size())
  85. return (UInt64)m_master->getRoutineFullPtr(m_stack);
  86. else
  87. return 0;
  88. }
  89. RoutineTracerFunctionStats::RtnMaster::RtnMaster()
  90. {
  91. ThreadStatNamedStat::registerStat("fp_addsub", "interval_timer", "uop_fp_addsub");
  92. ThreadStatNamedStat::registerStat("fp_muldiv", "interval_timer", "uop_fp_muldiv");
  93. ThreadStatNamedStat::registerStat("l2miss", "L2", "load-misses");
  94. ThreadStatNamedStat::registerStat("l3miss", "L3", "load-misses");
  95. ThreadStatAggregates::registerStats();
  96. if (ThreadStatNamedStat::registerStat("cpiBase", "interval_timer", "cpiBase") == ThreadStatsManager::INVALID)
  97. ThreadStatNamedStat::registerStat("cpiBase", "rob_timer", "cpiBase");
  98. if (ThreadStatNamedStat::registerStat("cpiBranchPredictor", "interval_timer", "cpiBranchPredictor") == ThreadStatsManager::INVALID)
  99. ThreadStatNamedStat::registerStat("cpiBranchPredictor", "rob_timer", "cpiBranchPredictor");
  100. ThreadStatCpiMem::registerStat();
  101. Sim()->getConfig()->setCacheEfficiencyCallbacks(__ce_get_owner, NULL, __ce_notify_evict, (UInt64)this);
  102. }
  103. RoutineTracerFunctionStats::RtnMaster::~RtnMaster()
  104. {
  105. writeResults(Sim()->getConfig()->formatOutputFileName("sim.rtntrace").c_str());
  106. writeResultsFull(Sim()->getConfig()->formatOutputFileName("sim.rtntracefull").c_str());
  107. }
  108. UInt64 RoutineTracerFunctionStats::RtnMaster::ce_get_owner(core_id_t core_id, UInt64 address)
  109. {
  110. Thread *thread = Sim()->getCoreManager()->getCoreFromID(core_id)->getThread();
  111. if (thread && m_threads.count(thread->getId()))
  112. return m_threads[thread->getId()]->getCurrentRoutineId();
  113. else
  114. return 0;
  115. }
  116. void RoutineTracerFunctionStats::RtnMaster::ce_notify_evict(bool on_roi_end, UInt64 owner, UInt64 evictor, CacheBlockInfo::BitsUsedType bits_used, UInt32 bits_total)
  117. {
  118. if (owner == 0)
  119. return;
  120. ScopedLock sl(m_lock);
  121. RoutineTracerFunctionStats::Routine* rtn = (RoutineTracerFunctionStats::Routine*)owner;
  122. LOG_ASSERT_ERROR(m_routines.count(rtn->m_eip) != 0, "Routine not found (rtn %lx, eip %lx)", rtn, rtn->m_eip);
  123. m_routines[rtn->m_eip]->m_bits_used += countBits(bits_used);
  124. m_routines[rtn->m_eip]->m_bits_total += bits_total;
  125. rtn->m_bits_used += countBits(bits_used);
  126. rtn->m_bits_total += bits_total;
  127. }
  128. RoutineTracerThread* RoutineTracerFunctionStats::RtnMaster::getThreadHandler(Thread *thread)
  129. {
  130. RtnThread* thread_handler = new RtnThread(this, thread);
  131. m_threads[thread->getId()] = thread_handler;
  132. return thread_handler;
  133. }
  134. void RoutineTracerFunctionStats::RtnMaster::addRoutine(IntPtr eip, const char *name, const char *imgname, IntPtr offset, int column, int line, const char *filename)
  135. {
  136. ScopedLock sl(m_lock);
  137. if (m_routines.count(eip) == 0)
  138. {
  139. m_routines[eip] = new RoutineTracerFunctionStats::Routine(eip, name, imgname, offset, column, line, filename);
  140. }
  141. else if (m_routines[eip]->isProvisional())
  142. {
  143. m_routines[eip]->updateLocation(name, imgname, offset, column, line, filename);
  144. m_routines[eip]->setProvisional(false);
  145. }
  146. }
  147. bool RoutineTracerFunctionStats::RtnMaster::hasRoutine(IntPtr eip)
  148. {
  149. ScopedLock sl(m_lock);
  150. return m_routines.count(eip) > 0;
  151. }
  152. void RoutineTracerFunctionStats::RtnMaster::updateRoutine(IntPtr eip, UInt64 calls, RtnValues values)
  153. {
  154. ScopedLock sl(m_lock);
  155. if (m_routines.count(eip) == 0)
  156. {
  157. // Another thread must have done the instrumentation and set the function information,
  158. // but it's still going through the (SIFT) pipe. Create a provisional record now to hold the statistics,
  159. // we will update the name/location information once it arrives.
  160. m_routines[eip] = new RoutineTracerFunctionStats::Routine(eip, "(unknown)", "(unknown)", 0, 0, 0, "");
  161. m_routines[eip]->setProvisional(true);
  162. }
  163. LOG_ASSERT_ERROR(m_routines.count(eip), "Routine %lx not found", eip);
  164. m_routines[eip]->m_calls += calls;
  165. for(RtnValues::iterator it = values.begin(); it != values.end(); ++it)
  166. {
  167. m_routines[eip]->m_values[it->first] += it->second;
  168. }
  169. }
  170. RoutineTracerFunctionStats::Routine* RoutineTracerFunctionStats::RtnMaster::getRoutineFullPtr(const CallStack& stack)
  171. {
  172. ScopedLock sl(m_lock);
  173. if (m_routines.count(stack.back()) == 0)
  174. {
  175. m_routines[stack.back()] = new RoutineTracerFunctionStats::Routine(stack.back(), "(unknown)", "(unknown)", 0, 0, 0, "");
  176. m_routines[stack.back()]->setProvisional(true);
  177. }
  178. if (m_callstack_routines.count(stack) == 0)
  179. {
  180. m_callstack_routines[stack] = new RoutineTracerFunctionStats::Routine(*m_routines[stack.back()]);
  181. }
  182. return m_callstack_routines[stack];
  183. }
  184. void RoutineTracerFunctionStats::RtnMaster::updateRoutineFull(const CallStack& stack, UInt64 calls, RtnValues values)
  185. {
  186. updateRoutineFull(getRoutineFullPtr(stack), calls, values);
  187. }
  188. void RoutineTracerFunctionStats::RtnMaster::updateRoutineFull(RoutineTracerFunctionStats::Routine* rtn, UInt64 calls, RtnValues values)
  189. {
  190. ScopedLock sl(m_lock);
  191. rtn->m_calls += calls;
  192. for(auto it = values.begin(); it != values.end(); ++it)
  193. {
  194. rtn->m_values[it->first] += it->second;
  195. }
  196. }
  197. void RoutineTracerFunctionStats::RtnMaster::writeResults(const char *filename)
  198. {
  199. FILE *fp = fopen(filename, "w");
  200. const ThreadStatsManager::ThreadStatTypeList& types = Sim()->getThreadStatsManager()->getThreadStatTypes();
  201. fprintf(fp, "eip\tname\tsource\tcalls\tbits_used\tbits_total");
  202. for(ThreadStatsManager::ThreadStatTypeList::const_iterator it = types.begin(); it != types.end(); ++it)
  203. fprintf(fp, "\t%s", Sim()->getThreadStatsManager()->getThreadStatName(*it));
  204. fprintf(fp, "\n");
  205. for(RoutineMap::iterator it = m_routines.begin(); it != m_routines.end(); ++it)
  206. {
  207. fprintf(fp, "%" PRIxPTR "\t%s\t%s\t%" PRId64 "\t%" PRId64 "\t%" PRId64,
  208. it->second->m_eip, it->second->m_name, it->second->m_location,
  209. it->second->m_calls, it->second->m_bits_used, it->second->m_bits_total);
  210. for(ThreadStatsManager::ThreadStatTypeList::const_iterator jt = types.begin(); jt != types.end(); ++jt)
  211. fprintf(fp, "\t%" PRId64, it->second->m_values[*jt]);
  212. fprintf(fp, "\n");
  213. }
  214. fclose(fp);
  215. }
  216. void RoutineTracerFunctionStats::RtnMaster::writeResultsFull(const char *filename)
  217. {
  218. FILE *fp = fopen(filename, "w");
  219. const ThreadStatsManager::ThreadStatTypeList& types = Sim()->getThreadStatsManager()->getThreadStatTypes();
  220. // header line
  221. fprintf(fp, "stack\tcalls\tbits_used\tbits_total");
  222. for(ThreadStatsManager::ThreadStatTypeList::const_iterator it = types.begin(); it != types.end(); ++it)
  223. fprintf(fp, "\t%s", Sim()->getThreadStatsManager()->getThreadStatName(*it));
  224. fprintf(fp, "\n");
  225. // first print all routine names
  226. for(RoutineMap::iterator it = m_routines.begin(); it != m_routines.end(); ++it)
  227. {
  228. if (it->second->m_calls)
  229. fprintf(fp, ":%" PRIxPTR "\t%s\t%s\n", it->second->m_eip, it->second->m_name, it->second->m_location);
  230. }
  231. // now print context-aware statistics
  232. for(auto it = m_callstack_routines.begin(); it != m_callstack_routines.end(); ++it)
  233. {
  234. if (it->second->m_calls)
  235. {
  236. std::ostringstream s;
  237. s << std::hex << it->first.front();
  238. for (auto kt = ++it->first.begin(); kt != it->first.end(); ++kt)
  239. {
  240. s << ":" << std::hex << *kt << std::dec;
  241. }
  242. fprintf(fp, "%s\t%" PRId64 "\t%" PRId64 "\t%" PRId64,
  243. s.str().c_str(), it->second->m_calls, it->second->m_bits_used, it->second->m_bits_total);
  244. for(ThreadStatsManager::ThreadStatTypeList::const_iterator jt = types.begin(); jt != types.end(); ++jt)
  245. fprintf(fp, "\t%" PRId64, it->second->m_values[*jt]);
  246. fprintf(fp, "\n");
  247. }
  248. }
  249. fclose(fp);
  250. }
  251. // Helper class to provide global icount/time statistics
  252. class ThreadStatAggregates
  253. {
  254. public:
  255. static void registerStats();
  256. private:
  257. static UInt64 callback(ThreadStatsManager::ThreadStatType type, thread_id_t thread_id, Core *core, UInt64 user);
  258. };
  259. void RoutineTracerFunctionStats::ThreadStatAggregates::registerStats()
  260. {
  261. Sim()->getThreadStatsManager()->registerThreadStatMetric(ThreadStatsManager::DYNAMIC, "global_instructions", callback, (UInt64)GLOBAL_INSTRUCTIONS);
  262. Sim()->getThreadStatsManager()->registerThreadStatMetric(ThreadStatsManager::DYNAMIC, "global_nonidle_elapsed_time", callback, (UInt64)GLOBAL_NONIDLE_ELAPSED_TIME);
  263. }
  264. UInt64 RoutineTracerFunctionStats::ThreadStatAggregates::callback(ThreadStatsManager::ThreadStatType type, thread_id_t thread_id, Core *core, UInt64 user)
  265. {
  266. UInt64 result = 0;
  267. switch(user)
  268. {
  269. case GLOBAL_INSTRUCTIONS:
  270. for(core_id_t core_id = 0; core_id < (core_id_t)Sim()->getConfig()->getApplicationCores(); ++core_id)
  271. result += Sim()->getCoreManager()->getCoreFromID(core_id)->getPerformanceModel()->getInstructionCount();
  272. return result;
  273. case GLOBAL_NONIDLE_ELAPSED_TIME:
  274. for(core_id_t core_id = 0; core_id < (core_id_t)Sim()->getConfig()->getApplicationCores(); ++core_id)
  275. result += Sim()->getCoreManager()->getCoreFromID(core_id)->getPerformanceModel()->getNonIdleElapsedTime().getFS();
  276. return result;
  277. default:
  278. LOG_PRINT_ERROR("Unexpected user value %d", user);
  279. }
  280. }
  281. // Helper class to provide simplified cpiMem component
  282. ThreadStatsManager::ThreadStatType RoutineTracerFunctionStats::ThreadStatCpiMem::registerStat()
  283. {
  284. ThreadStatCpiMem *tsns = new ThreadStatCpiMem();
  285. return Sim()->getThreadStatsManager()->registerThreadStatMetric(ThreadStatsManager::DYNAMIC, "cpiMem", callback, (UInt64)tsns);
  286. }
  287. RoutineTracerFunctionStats::ThreadStatCpiMem::ThreadStatCpiMem()
  288. : m_stats(Sim()->getConfig()->getApplicationCores())
  289. {
  290. for(core_id_t core_id = 0; core_id < (core_id_t)Sim()->getConfig()->getApplicationCores(); ++core_id)
  291. {
  292. for (int h = HitWhere::WHERE_FIRST ; h < HitWhere::NUM_HITWHERES ; h++)
  293. {
  294. if (HitWhereIsValid((HitWhere::where_t)h))
  295. {
  296. String metricName = "cpiDataCache" + String(HitWhereString((HitWhere::where_t)h));
  297. StatsMetricBase *m = Sim()->getStatsManager()->getMetricObject("interval_timer", core_id, metricName);
  298. if (!m)
  299. m = Sim()->getStatsManager()->getMetricObject("rob_timer", core_id, metricName);
  300. LOG_ASSERT_ERROR(m != NULL, "Invalid statistic %s.%d.%s", "interval_timer", core_id, metricName.c_str());
  301. m_stats[core_id].push_back(m);
  302. }
  303. }
  304. }
  305. }
  306. UInt64 RoutineTracerFunctionStats::ThreadStatCpiMem::callback(ThreadStatsManager::ThreadStatType type, thread_id_t thread_id, Core *core, UInt64 user)
  307. {
  308. ThreadStatCpiMem* tsns = (ThreadStatCpiMem*)user;
  309. std::vector<StatsMetricBase*>& stats = tsns->m_stats[core->getId()];
  310. UInt64 result = 0;
  311. for(std::vector<StatsMetricBase*>::iterator it = stats.begin(); it != stats.end(); ++it)
  312. result += (*it)->recordMetric();
  313. return result;
  314. }