PageRenderTime 41ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/indra/llcommon/llfasttimer_class.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 921 lines | 637 code | 146 blank | 138 comment | 82 complexity | 3575a057ad377336f2d3f1a669921d71 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. * @file llfasttimer_class.cpp
  3. * @brief Implementation of the fast timer.
  4. *
  5. * $LicenseInfo:firstyear=2004&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #include "linden_common.h"
  27. #include "llfasttimer.h"
  28. #include "llmemory.h"
  29. #include "llprocessor.h"
  30. #include "llsingleton.h"
  31. #include "lltreeiterators.h"
  32. #include "llsdserialize.h"
  33. #include <boost/bind.hpp>
  34. #if LL_WINDOWS
  35. #include "lltimer.h"
  36. #elif LL_LINUX || LL_SOLARIS
  37. #include <sys/time.h>
  38. #include <sched.h>
  39. #include "lltimer.h"
  40. #elif LL_DARWIN
  41. #include <sys/time.h>
  42. #include "lltimer.h" // get_clock_count()
  43. #else
  44. #error "architecture not supported"
  45. #endif
  46. //////////////////////////////////////////////////////////////////////////////
  47. // statics
  48. S32 LLFastTimer::sCurFrameIndex = -1;
  49. S32 LLFastTimer::sLastFrameIndex = -1;
  50. U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64();
  51. bool LLFastTimer::sPauseHistory = 0;
  52. bool LLFastTimer::sResetHistory = 0;
  53. LLFastTimer::CurTimerData LLFastTimer::sCurTimerData;
  54. BOOL LLFastTimer::sLog = FALSE;
  55. std::string LLFastTimer::sLogName = "";
  56. BOOL LLFastTimer::sMetricLog = FALSE;
  57. LLMutex* LLFastTimer::sLogLock = NULL;
  58. std::queue<LLSD> LLFastTimer::sLogQueue;
  59. #define USE_RDTSC 0
  60. #if LL_LINUX || LL_SOLARIS
  61. U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
  62. #else
  63. U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution
  64. #endif
  65. std::vector<LLFastTimer::FrameState>* LLFastTimer::sTimerInfos = NULL;
  66. U64 LLFastTimer::sTimerCycles = 0;
  67. U32 LLFastTimer::sTimerCalls = 0;
  68. // FIXME: move these declarations to the relevant modules
  69. // helper functions
  70. typedef LLTreeDFSPostIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_bottom_up_iterator_t;
  71. static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::NamedTimer& id)
  72. {
  73. return timer_tree_bottom_up_iterator_t(&id,
  74. boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1),
  75. boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
  76. }
  77. static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up()
  78. {
  79. return timer_tree_bottom_up_iterator_t();
  80. }
  81. typedef LLTreeDFSIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_dfs_iterator_t;
  82. static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::NamedTimer& id)
  83. {
  84. return timer_tree_dfs_iterator_t(&id,
  85. boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1),
  86. boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
  87. }
  88. static timer_tree_dfs_iterator_t end_timer_tree()
  89. {
  90. return timer_tree_dfs_iterator_t();
  91. }
  92. // factory class that creates NamedTimers via static DeclareTimer objects
  93. class NamedTimerFactory : public LLSingleton<NamedTimerFactory>
  94. {
  95. public:
  96. NamedTimerFactory()
  97. : mActiveTimerRoot(NULL),
  98. mTimerRoot(NULL),
  99. mAppTimer(NULL),
  100. mRootFrameState(NULL)
  101. {}
  102. /*virtual */ void initSingleton()
  103. {
  104. mTimerRoot = new LLFastTimer::NamedTimer("root");
  105. mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame");
  106. mActiveTimerRoot->setCollapsed(false);
  107. mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot);
  108. mRootFrameState->mParent = &mTimerRoot->getFrameState();
  109. mActiveTimerRoot->setParent(mTimerRoot);
  110. mAppTimer = new LLFastTimer(mRootFrameState);
  111. }
  112. ~NamedTimerFactory()
  113. {
  114. std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer());
  115. delete mAppTimer;
  116. delete mActiveTimerRoot;
  117. delete mTimerRoot;
  118. delete mRootFrameState;
  119. }
  120. LLFastTimer::NamedTimer& createNamedTimer(const std::string& name)
  121. {
  122. timer_map_t::iterator found_it = mTimers.find(name);
  123. if (found_it != mTimers.end())
  124. {
  125. return *found_it->second;
  126. }
  127. LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name);
  128. timer->setParent(mTimerRoot);
  129. mTimers.insert(std::make_pair(name, timer));
  130. return *timer;
  131. }
  132. LLFastTimer::NamedTimer* getTimerByName(const std::string& name)
  133. {
  134. timer_map_t::iterator found_it = mTimers.find(name);
  135. if (found_it != mTimers.end())
  136. {
  137. return found_it->second;
  138. }
  139. return NULL;
  140. }
  141. LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; }
  142. LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; }
  143. const LLFastTimer* getAppTimer() { return mAppTimer; }
  144. LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; }
  145. typedef std::map<std::string, LLFastTimer::NamedTimer*> timer_map_t;
  146. timer_map_t::iterator beginTimers() { return mTimers.begin(); }
  147. timer_map_t::iterator endTimers() { return mTimers.end(); }
  148. S32 timerCount() { return mTimers.size(); }
  149. private:
  150. timer_map_t mTimers;
  151. LLFastTimer::NamedTimer* mActiveTimerRoot;
  152. LLFastTimer::NamedTimer* mTimerRoot;
  153. LLFastTimer* mAppTimer;
  154. LLFastTimer::FrameState* mRootFrameState;
  155. };
  156. void update_cached_pointers_if_changed()
  157. {
  158. // detect when elements have moved and update cached pointers
  159. static LLFastTimer::FrameState* sFirstTimerAddress = NULL;
  160. if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress)
  161. {
  162. LLFastTimer::DeclareTimer::updateCachedPointers();
  163. }
  164. sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin());
  165. }
  166. LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open )
  167. : mTimer(NamedTimerFactory::instance().createNamedTimer(name))
  168. {
  169. mTimer.setCollapsed(!open);
  170. mFrameState = &mTimer.getFrameState();
  171. update_cached_pointers_if_changed();
  172. }
  173. LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name)
  174. : mTimer(NamedTimerFactory::instance().createNamedTimer(name))
  175. {
  176. mFrameState = &mTimer.getFrameState();
  177. update_cached_pointers_if_changed();
  178. }
  179. // static
  180. void LLFastTimer::DeclareTimer::updateCachedPointers()
  181. {
  182. // propagate frame state pointers to timer declarations
  183. for (instance_iter it = beginInstances(); it != endInstances(); ++it)
  184. {
  185. // update cached pointer
  186. it->mFrameState = &it->mTimer.getFrameState();
  187. }
  188. // also update frame states of timers on stack
  189. LLFastTimer* cur_timerp = LLFastTimer::sCurTimerData.mCurTimer;
  190. while(cur_timerp->mLastTimerData.mCurTimer != cur_timerp)
  191. {
  192. cur_timerp->mFrameState = &cur_timerp->mFrameState->mTimer->getFrameState();
  193. cur_timerp = cur_timerp->mLastTimerData.mCurTimer;
  194. }
  195. }
  196. //static
  197. #if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
  198. U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
  199. {
  200. return sClockResolution >> 8;
  201. }
  202. #else // windows or x86-mac or x86-linux or x86-solaris
  203. U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
  204. {
  205. #if USE_RDTSC || !LL_WINDOWS
  206. //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
  207. static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
  208. // we drop the low-order byte in our timers, so report a lower frequency
  209. #else
  210. // If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
  211. // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
  212. // since that would change displayed MHz stats for CPUs
  213. static bool firstcall = true;
  214. static U64 sCPUClockFrequency;
  215. if (firstcall)
  216. {
  217. QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
  218. firstcall = false;
  219. }
  220. #endif
  221. return sCPUClockFrequency >> 8;
  222. }
  223. #endif
  224. LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp)
  225. : mActiveCount(0),
  226. mCalls(0),
  227. mSelfTimeCounter(0),
  228. mParent(NULL),
  229. mLastCaller(NULL),
  230. mMoveUpTree(false),
  231. mTimer(timerp)
  232. {}
  233. LLFastTimer::NamedTimer::NamedTimer(const std::string& name)
  234. : mName(name),
  235. mCollapsed(true),
  236. mParent(NULL),
  237. mTotalTimeCounter(0),
  238. mCountAverage(0),
  239. mCallAverage(0),
  240. mNeedsSorting(false)
  241. {
  242. info_list_t& frame_state_list = getFrameStateList();
  243. mFrameStateIndex = frame_state_list.size();
  244. getFrameStateList().push_back(FrameState(this));
  245. mCountHistory = new U32[HISTORY_NUM];
  246. memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
  247. mCallHistory = new U32[HISTORY_NUM];
  248. memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
  249. }
  250. LLFastTimer::NamedTimer::~NamedTimer()
  251. {
  252. delete[] mCountHistory;
  253. delete[] mCallHistory;
  254. }
  255. std::string LLFastTimer::NamedTimer::getToolTip(S32 history_idx)
  256. {
  257. F64 ms_multiplier = 1000.0 / (F64)LLFastTimer::countsPerSecond();
  258. if (history_idx < 0)
  259. {
  260. // by default, show average number of call
  261. return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getCountAverage() * ms_multiplier), (S32)getCallAverage());
  262. }
  263. else
  264. {
  265. return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getHistoricalCount(history_idx) * ms_multiplier), (S32)getHistoricalCalls(history_idx));
  266. }
  267. }
  268. void LLFastTimer::NamedTimer::setParent(NamedTimer* parent)
  269. {
  270. llassert_always(parent != this);
  271. llassert_always(parent != NULL);
  272. if (mParent)
  273. {
  274. // subtract our accumulated from previous parent
  275. for (S32 i = 0; i < HISTORY_NUM; i++)
  276. {
  277. mParent->mCountHistory[i] -= mCountHistory[i];
  278. }
  279. // subtract average timing from previous parent
  280. mParent->mCountAverage -= mCountAverage;
  281. std::vector<NamedTimer*>& children = mParent->getChildren();
  282. std::vector<NamedTimer*>::iterator found_it = std::find(children.begin(), children.end(), this);
  283. if (found_it != children.end())
  284. {
  285. children.erase(found_it);
  286. }
  287. }
  288. mParent = parent;
  289. if (parent)
  290. {
  291. getFrameState().mParent = &parent->getFrameState();
  292. parent->getChildren().push_back(this);
  293. parent->mNeedsSorting = true;
  294. }
  295. }
  296. S32 LLFastTimer::NamedTimer::getDepth()
  297. {
  298. S32 depth = 0;
  299. NamedTimer* timerp = mParent;
  300. while(timerp)
  301. {
  302. depth++;
  303. timerp = timerp->mParent;
  304. }
  305. return depth;
  306. }
  307. // static
  308. void LLFastTimer::NamedTimer::processTimes()
  309. {
  310. if (sCurFrameIndex < 0) return;
  311. buildHierarchy();
  312. accumulateTimings();
  313. }
  314. // sort timer info structs by depth first traversal order
  315. struct SortTimersDFS
  316. {
  317. bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2)
  318. {
  319. return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex();
  320. }
  321. };
  322. // sort child timers by name
  323. struct SortTimerByName
  324. {
  325. bool operator()(const LLFastTimer::NamedTimer* i1, const LLFastTimer::NamedTimer* i2)
  326. {
  327. return i1->getName() < i2->getName();
  328. }
  329. };
  330. //static
  331. void LLFastTimer::NamedTimer::buildHierarchy()
  332. {
  333. if (sCurFrameIndex < 0 ) return;
  334. // set up initial tree
  335. {
  336. for (instance_iter it = beginInstances(); it != endInstances(); ++it)
  337. {
  338. NamedTimer& timer = *it;
  339. if (&timer == NamedTimerFactory::instance().getRootTimer()) continue;
  340. // bootstrap tree construction by attaching to last timer to be on stack
  341. // when this timer was called
  342. if (timer.getFrameState().mLastCaller && timer.mParent == NamedTimerFactory::instance().getRootTimer())
  343. {
  344. timer.setParent(timer.getFrameState().mLastCaller->mTimer);
  345. // no need to push up tree on first use, flag can be set spuriously
  346. timer.getFrameState().mMoveUpTree = false;
  347. }
  348. }
  349. }
  350. // bump timers up tree if they've been flagged as being in the wrong place
  351. // do this in a bottom up order to promote descendants first before promoting ancestors
  352. // this preserves partial order derived from current frame's observations
  353. for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer());
  354. it != end_timer_tree_bottom_up();
  355. ++it)
  356. {
  357. NamedTimer* timerp = *it;
  358. // skip root timer
  359. if (timerp == NamedTimerFactory::instance().getRootTimer()) continue;
  360. if (timerp->getFrameState().mMoveUpTree)
  361. {
  362. // since ancestors have already been visited, reparenting won't affect tree traversal
  363. //step up tree, bringing our descendants with us
  364. //llinfos << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
  365. // " to child of " << timerp->getParent()->getParent()->getName() << llendl;
  366. timerp->setParent(timerp->getParent()->getParent());
  367. timerp->getFrameState().mMoveUpTree = false;
  368. // don't bubble up any ancestors until descendants are done bubbling up
  369. it.skipAncestors();
  370. }
  371. }
  372. // sort timers by time last called, so call graph makes sense
  373. for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
  374. it != end_timer_tree();
  375. ++it)
  376. {
  377. NamedTimer* timerp = (*it);
  378. if (timerp->mNeedsSorting)
  379. {
  380. std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName());
  381. }
  382. timerp->mNeedsSorting = false;
  383. }
  384. }
  385. //static
  386. void LLFastTimer::NamedTimer::accumulateTimings()
  387. {
  388. U32 cur_time = getCPUClockCount32();
  389. // walk up stack of active timers and accumulate current time while leaving timing structures active
  390. LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
  391. // root defined by parent pointing to self
  392. CurTimerData* cur_data = &sCurTimerData;
  393. while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
  394. {
  395. U32 cumulative_time_delta = cur_time - cur_timer->mStartTime;
  396. U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime;
  397. cur_data->mChildTime = 0;
  398. cur_timer->mFrameState->mSelfTimeCounter += self_time_delta;
  399. cur_timer->mStartTime = cur_time;
  400. cur_data = &cur_timer->mLastTimerData;
  401. cur_data->mChildTime += cumulative_time_delta;
  402. cur_timer = cur_timer->mLastTimerData.mCurTimer;
  403. }
  404. // traverse tree in DFS post order, or bottom up
  405. for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer());
  406. it != end_timer_tree_bottom_up();
  407. ++it)
  408. {
  409. NamedTimer* timerp = (*it);
  410. timerp->mTotalTimeCounter = timerp->getFrameState().mSelfTimeCounter;
  411. for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it)
  412. {
  413. timerp->mTotalTimeCounter += (*child_it)->mTotalTimeCounter;
  414. }
  415. S32 cur_frame = sCurFrameIndex;
  416. if (cur_frame >= 0)
  417. {
  418. // update timer history
  419. int hidx = cur_frame % HISTORY_NUM;
  420. timerp->mCountHistory[hidx] = timerp->mTotalTimeCounter;
  421. timerp->mCountAverage = ((U64)timerp->mCountAverage * cur_frame + timerp->mTotalTimeCounter) / (cur_frame+1);
  422. timerp->mCallHistory[hidx] = timerp->getFrameState().mCalls;
  423. timerp->mCallAverage = ((U64)timerp->mCallAverage * cur_frame + timerp->getFrameState().mCalls) / (cur_frame+1);
  424. }
  425. }
  426. }
  427. // static
  428. void LLFastTimer::NamedTimer::resetFrame()
  429. {
  430. if (sLog)
  431. { //output current frame counts to performance log
  432. static S32 call_count = 0;
  433. if (call_count % 100 == 0)
  434. {
  435. llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
  436. llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
  437. llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
  438. llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
  439. llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
  440. llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
  441. }
  442. call_count++;
  443. F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
  444. F64 total_time = 0;
  445. LLSD sd;
  446. {
  447. for (instance_iter it = beginInstances(); it != endInstances(); ++it)
  448. {
  449. NamedTimer& timer = *it;
  450. FrameState& info = timer.getFrameState();
  451. sd[timer.getName()]["Time"] = (LLSD::Real) (info.mSelfTimeCounter*iclock_freq);
  452. sd[timer.getName()]["Calls"] = (LLSD::Integer) info.mCalls;
  453. // computing total time here because getting the root timer's getCountHistory
  454. // doesn't work correctly on the first frame
  455. total_time = total_time + info.mSelfTimeCounter * iclock_freq;
  456. }
  457. }
  458. sd["Total"]["Time"] = (LLSD::Real) total_time;
  459. sd["Total"]["Calls"] = (LLSD::Integer) 1;
  460. {
  461. LLMutexLock lock(sLogLock);
  462. sLogQueue.push(sd);
  463. }
  464. }
  465. // tag timers by position in depth first traversal of tree
  466. S32 index = 0;
  467. for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
  468. it != end_timer_tree();
  469. ++it)
  470. {
  471. NamedTimer* timerp = (*it);
  472. timerp->mFrameStateIndex = index;
  473. index++;
  474. llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size());
  475. }
  476. // sort timers by DFS traversal order to improve cache coherency
  477. std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS());
  478. // update pointers into framestatelist now that we've sorted it
  479. DeclareTimer::updateCachedPointers();
  480. // reset for next frame
  481. {
  482. for (instance_iter it = beginInstances(); it != endInstances(); ++it)
  483. {
  484. NamedTimer& timer = *it;
  485. FrameState& info = timer.getFrameState();
  486. info.mSelfTimeCounter = 0;
  487. info.mCalls = 0;
  488. info.mLastCaller = NULL;
  489. info.mMoveUpTree = false;
  490. // update parent pointer in timer state struct
  491. if (timer.mParent)
  492. {
  493. info.mParent = &timer.mParent->getFrameState();
  494. }
  495. }
  496. }
  497. //sTimerCycles = 0;
  498. //sTimerCalls = 0;
  499. }
  500. //static
  501. void LLFastTimer::NamedTimer::reset()
  502. {
  503. resetFrame(); // reset frame data
  504. // walk up stack of active timers and reset start times to current time
  505. // effectively zeroing out any accumulated time
  506. U32 cur_time = getCPUClockCount32();
  507. // root defined by parent pointing to self
  508. CurTimerData* cur_data = &sCurTimerData;
  509. LLFastTimer* cur_timer = cur_data->mCurTimer;
  510. while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
  511. {
  512. cur_timer->mStartTime = cur_time;
  513. cur_data->mChildTime = 0;
  514. cur_data = &cur_timer->mLastTimerData;
  515. cur_timer = cur_data->mCurTimer;
  516. }
  517. // reset all history
  518. {
  519. for (instance_iter it = beginInstances(); it != endInstances(); ++it)
  520. {
  521. NamedTimer& timer = *it;
  522. if (&timer != NamedTimerFactory::instance().getRootTimer())
  523. {
  524. timer.setParent(NamedTimerFactory::instance().getRootTimer());
  525. }
  526. timer.mCountAverage = 0;
  527. timer.mCallAverage = 0;
  528. memset(timer.mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
  529. memset(timer.mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
  530. }
  531. }
  532. sLastFrameIndex = 0;
  533. sCurFrameIndex = 0;
  534. }
  535. //static
  536. LLFastTimer::info_list_t& LLFastTimer::getFrameStateList()
  537. {
  538. if (!sTimerInfos)
  539. {
  540. sTimerInfos = new info_list_t();
  541. }
  542. return *sTimerInfos;
  543. }
  544. U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const
  545. {
  546. S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
  547. return mCountHistory[history_idx];
  548. }
  549. U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const
  550. {
  551. S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
  552. return mCallHistory[history_idx];
  553. }
  554. LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const
  555. {
  556. llassert_always(mFrameStateIndex >= 0);
  557. if (this == NamedTimerFactory::instance().getActiveRootTimer())
  558. {
  559. return NamedTimerFactory::instance().getRootFrameState();
  560. }
  561. return getFrameStateList()[mFrameStateIndex];
  562. }
  563. // static
  564. LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer()
  565. {
  566. return *NamedTimerFactory::instance().getActiveRootTimer();
  567. }
  568. std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::beginChildren()
  569. {
  570. return mChildren.begin();
  571. }
  572. std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::endChildren()
  573. {
  574. return mChildren.end();
  575. }
  576. std::vector<LLFastTimer::NamedTimer*>& LLFastTimer::NamedTimer::getChildren()
  577. {
  578. return mChildren;
  579. }
  580. //static
  581. void LLFastTimer::nextFrame()
  582. {
  583. countsPerSecond(); // good place to calculate clock frequency
  584. U64 frame_time = getCPUClockCount64();
  585. if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff)
  586. {
  587. llinfos << "Slow frame, fast timers inaccurate" << llendl;
  588. }
  589. if (!sPauseHistory)
  590. {
  591. NamedTimer::processTimes();
  592. sLastFrameIndex = sCurFrameIndex++;
  593. }
  594. // get ready for next frame
  595. NamedTimer::resetFrame();
  596. sLastFrameTime = frame_time;
  597. }
  598. //static
  599. void LLFastTimer::dumpCurTimes()
  600. {
  601. // accumulate timings, etc.
  602. NamedTimer::processTimes();
  603. F64 clock_freq = (F64)countsPerSecond();
  604. F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds
  605. // walk over timers in depth order and output timings
  606. for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
  607. it != end_timer_tree();
  608. ++it)
  609. {
  610. NamedTimer* timerp = (*it);
  611. F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq);
  612. // Don't bother with really brief times, keep output concise
  613. if (total_time_ms < 0.1) continue;
  614. std::ostringstream out_str;
  615. for (S32 i = 0; i < timerp->getDepth(); i++)
  616. {
  617. out_str << "\t";
  618. }
  619. out_str << timerp->getName() << " "
  620. << std::setprecision(3) << total_time_ms << " ms, "
  621. << timerp->getHistoricalCalls(0) << " calls";
  622. llinfos << out_str.str() << llendl;
  623. }
  624. }
  625. //static
  626. void LLFastTimer::reset()
  627. {
  628. NamedTimer::reset();
  629. }
  630. //static
  631. void LLFastTimer::writeLog(std::ostream& os)
  632. {
  633. while (!sLogQueue.empty())
  634. {
  635. LLSD& sd = sLogQueue.front();
  636. LLSDSerialize::toXML(sd, os);
  637. LLMutexLock lock(sLogLock);
  638. sLogQueue.pop();
  639. }
  640. }
  641. //static
  642. const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& name)
  643. {
  644. return NamedTimerFactory::instance().getTimerByName(name);
  645. }
  646. LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
  647. : mFrameState(state)
  648. {
  649. U32 start_time = getCPUClockCount32();
  650. mStartTime = start_time;
  651. mFrameState->mActiveCount++;
  652. LLFastTimer::sCurTimerData.mCurTimer = this;
  653. LLFastTimer::sCurTimerData.mFrameState = mFrameState;
  654. LLFastTimer::sCurTimerData.mChildTime = 0;
  655. mLastTimerData = LLFastTimer::sCurTimerData;
  656. }
  657. //////////////////////////////////////////////////////////////////////////////
  658. //
  659. // Important note: These implementations must be FAST!
  660. //
  661. #if LL_WINDOWS
  662. //
  663. // Windows implementation of CPU clock
  664. //
  665. //
  666. // NOTE: put back in when we aren't using platform sdk anymore
  667. //
  668. // because MS has different signatures for these functions in winnt.h
  669. // need to rename them to avoid conflicts
  670. //#define _interlockedbittestandset _renamed_interlockedbittestandset
  671. //#define _interlockedbittestandreset _renamed_interlockedbittestandreset
  672. //#include <intrin.h>
  673. //#undef _interlockedbittestandset
  674. //#undef _interlockedbittestandreset
  675. //inline U32 LLFastTimer::getCPUClockCount32()
  676. //{
  677. // U64 time_stamp = __rdtsc();
  678. // return (U32)(time_stamp >> 8);
  679. //}
  680. //
  681. //// return full timer value, *not* shifted by 8 bits
  682. //inline U64 LLFastTimer::getCPUClockCount64()
  683. //{
  684. // return __rdtsc();
  685. //}
  686. // shift off lower 8 bits for lower resolution but longer term timing
  687. // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
  688. #if USE_RDTSC
  689. U32 LLFastTimer::getCPUClockCount32()
  690. {
  691. U32 ret_val;
  692. __asm
  693. {
  694. _emit 0x0f
  695. _emit 0x31
  696. shr eax,8
  697. shl edx,24
  698. or eax, edx
  699. mov dword ptr [ret_val], eax
  700. }
  701. return ret_val;
  702. }
  703. // return full timer value, *not* shifted by 8 bits
  704. U64 LLFastTimer::getCPUClockCount64()
  705. {
  706. U64 ret_val;
  707. __asm
  708. {
  709. _emit 0x0f
  710. _emit 0x31
  711. mov eax,eax
  712. mov edx,edx
  713. mov dword ptr [ret_val+4], edx
  714. mov dword ptr [ret_val], eax
  715. }
  716. return ret_val;
  717. }
  718. std::string LLFastTimer::sClockType = "rdtsc";
  719. #else
  720. //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
  721. // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
  722. U32 LLFastTimer::getCPUClockCount32()
  723. {
  724. return (U32)(get_clock_count()>>8);
  725. }
  726. U64 LLFastTimer::getCPUClockCount64()
  727. {
  728. return get_clock_count();
  729. }
  730. std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
  731. #endif
  732. #endif
  733. #if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
  734. //
  735. // Linux and Solaris implementation of CPU clock - non-x86.
  736. // This is accurate but SLOW! Only use out of desperation.
  737. //
  738. // Try to use the MONOTONIC clock if available, this is a constant time counter
  739. // with nanosecond resolution (but not necessarily accuracy) and attempts are
  740. // made to synchronize this value between cores at kernel start. It should not
  741. // be affected by CPU frequency. If not available use the REALTIME clock, but
  742. // this may be affected by NTP adjustments or other user activity affecting
  743. // the system time.
  744. U64 LLFastTimer::getCPUClockCount64()
  745. {
  746. struct timespec tp;
  747. #ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
  748. if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
  749. #endif
  750. clock_gettime(CLOCK_REALTIME,&tp);
  751. return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
  752. }
  753. U32 LLFastTimer::getCPUClockCount32()
  754. {
  755. return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
  756. }
  757. std::string LLFastTimer::sClockType = "clock_gettime";
  758. #endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
  759. #if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
  760. //
  761. // Mac+Linux+Solaris FAST x86 implementation of CPU clock
  762. U32 LLFastTimer::getCPUClockCount32()
  763. {
  764. U64 x;
  765. __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
  766. return (U32)(x >> 8);
  767. }
  768. U64 LLFastTimer::getCPUClockCount64()
  769. {
  770. U64 x;
  771. __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
  772. return x;
  773. }
  774. std::string LLFastTimer::sClockType = "rdtsc";
  775. #endif