PageRenderTime 90ms CodeModel.GetById 10ms app.highlight 72ms RepoModel.GetById 1ms app.codeStats 1ms

/indra/llcommon/llfasttimer_class.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 921 lines | 637 code | 146 blank | 138 comment | 82 complexity | 3575a057ad377336f2d3f1a669921d71 MD5 | raw file
  1/** 
  2 * @file llfasttimer_class.cpp
  3 * @brief Implementation of the fast timer.
  4 *
  5 * $LicenseInfo:firstyear=2004&license=viewerlgpl$
  6 * Second Life Viewer Source Code
  7 * Copyright (C) 2010, Linden Research, Inc.
  8 * 
  9 * This library is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU Lesser General Public
 11 * License as published by the Free Software Foundation;
 12 * version 2.1 of the License only.
 13 * 
 14 * This library is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 * Lesser General Public License for more details.
 18 * 
 19 * You should have received a copy of the GNU Lesser General Public
 20 * License along with this library; if not, write to the Free Software
 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 22 * 
 23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 24 * $/LicenseInfo$
 25 */
 26#include "linden_common.h"
 27
 28#include "llfasttimer.h"
 29
 30#include "llmemory.h"
 31#include "llprocessor.h"
 32#include "llsingleton.h"
 33#include "lltreeiterators.h"
 34#include "llsdserialize.h"
 35
 36#include <boost/bind.hpp>
 37
 38
 39#if LL_WINDOWS
 40#include "lltimer.h"
 41#elif LL_LINUX || LL_SOLARIS
 42#include <sys/time.h>
 43#include <sched.h>
 44#include "lltimer.h"
 45#elif LL_DARWIN
 46#include <sys/time.h>
 47#include "lltimer.h"	// get_clock_count()
 48#else 
 49#error "architecture not supported"
 50#endif
 51
 52//////////////////////////////////////////////////////////////////////////////
 53// statics
 54
 55S32 LLFastTimer::sCurFrameIndex = -1;
 56S32 LLFastTimer::sLastFrameIndex = -1;
 57U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64();
 58bool LLFastTimer::sPauseHistory = 0;
 59bool LLFastTimer::sResetHistory = 0;
 60LLFastTimer::CurTimerData LLFastTimer::sCurTimerData;
 61BOOL LLFastTimer::sLog = FALSE;
 62std::string LLFastTimer::sLogName = "";
 63BOOL LLFastTimer::sMetricLog = FALSE;
 64LLMutex* LLFastTimer::sLogLock = NULL;
 65std::queue<LLSD> LLFastTimer::sLogQueue;
 66
 67#define USE_RDTSC 0
 68
 69#if LL_LINUX || LL_SOLARIS
 70U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
 71#else
 72U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution
 73#endif
 74
 75std::vector<LLFastTimer::FrameState>* LLFastTimer::sTimerInfos = NULL;
 76U64				LLFastTimer::sTimerCycles = 0;
 77U32				LLFastTimer::sTimerCalls = 0;
 78
 79
 80// FIXME: move these declarations to the relevant modules
 81
 82// helper functions
 83typedef LLTreeDFSPostIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_bottom_up_iterator_t;
 84
 85static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::NamedTimer& id) 
 86{ 
 87	return timer_tree_bottom_up_iterator_t(&id, 
 88							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), 
 89							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
 90}
 91
 92static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() 
 93{ 
 94	return timer_tree_bottom_up_iterator_t(); 
 95}
 96
 97typedef LLTreeDFSIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_dfs_iterator_t;
 98
 99
100static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::NamedTimer& id) 
101{ 
102	return timer_tree_dfs_iterator_t(&id, 
103		boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), 
104							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
105}
106
107static timer_tree_dfs_iterator_t end_timer_tree() 
108{ 
109	return timer_tree_dfs_iterator_t(); 
110}
111
112
113
114// factory class that creates NamedTimers via static DeclareTimer objects
115class NamedTimerFactory : public LLSingleton<NamedTimerFactory>
116{
117public:
118	NamedTimerFactory()
119		: mActiveTimerRoot(NULL),
120		  mTimerRoot(NULL),
121		  mAppTimer(NULL),
122		  mRootFrameState(NULL)
123	{}
124
125	/*virtual */ void initSingleton()
126	{
127		mTimerRoot = new LLFastTimer::NamedTimer("root");
128
129		mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame");
130		mActiveTimerRoot->setCollapsed(false);
131
132		mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot);
133		mRootFrameState->mParent = &mTimerRoot->getFrameState();
134		mActiveTimerRoot->setParent(mTimerRoot);
135
136		mAppTimer = new LLFastTimer(mRootFrameState);
137	}
138
139	~NamedTimerFactory()
140	{
141		std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer());
142
143		delete mAppTimer;
144		delete mActiveTimerRoot; 
145		delete mTimerRoot;
146		delete mRootFrameState;
147	}
148
149	LLFastTimer::NamedTimer& createNamedTimer(const std::string& name)
150	{
151		timer_map_t::iterator found_it = mTimers.find(name);
152		if (found_it != mTimers.end())
153		{
154			return *found_it->second;
155		}
156
157		LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name);
158		timer->setParent(mTimerRoot);
159		mTimers.insert(std::make_pair(name, timer));
160
161		return *timer;
162	}
163
164	LLFastTimer::NamedTimer* getTimerByName(const std::string& name)
165	{
166		timer_map_t::iterator found_it = mTimers.find(name);
167		if (found_it != mTimers.end())
168		{
169			return found_it->second;
170		}
171		return NULL;
172	}
173
174	LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; }
175	LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; }
176	const LLFastTimer* getAppTimer() { return mAppTimer; }
177	LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; }
178
179	typedef std::map<std::string, LLFastTimer::NamedTimer*> timer_map_t;
180	timer_map_t::iterator beginTimers() { return mTimers.begin(); }
181	timer_map_t::iterator endTimers() { return mTimers.end(); }
182	S32 timerCount() { return mTimers.size(); }
183
184private:
185	timer_map_t mTimers;
186
187	LLFastTimer::NamedTimer*		mActiveTimerRoot;
188	LLFastTimer::NamedTimer*		mTimerRoot;
189	LLFastTimer*						mAppTimer;
190	LLFastTimer::FrameState*		mRootFrameState;
191};
192
193void update_cached_pointers_if_changed()
194{
195	// detect when elements have moved and update cached pointers
196	static LLFastTimer::FrameState* sFirstTimerAddress = NULL;
197	if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress)
198	{
199		LLFastTimer::DeclareTimer::updateCachedPointers();
200	}
201	sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin());
202}
203
204LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open )
205:	mTimer(NamedTimerFactory::instance().createNamedTimer(name))
206{
207	mTimer.setCollapsed(!open);
208	mFrameState = &mTimer.getFrameState();
209	update_cached_pointers_if_changed();
210}
211
212LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name)
213:	mTimer(NamedTimerFactory::instance().createNamedTimer(name))
214{
215	mFrameState = &mTimer.getFrameState();
216	update_cached_pointers_if_changed();
217}
218
219// static
220void LLFastTimer::DeclareTimer::updateCachedPointers()
221{
222	// propagate frame state pointers to timer declarations
223	for (instance_iter it = beginInstances(); it != endInstances(); ++it)
224	{
225		// update cached pointer
226		it->mFrameState = &it->mTimer.getFrameState();
227	}
228
229	// also update frame states of timers on stack
230	LLFastTimer* cur_timerp = LLFastTimer::sCurTimerData.mCurTimer;
231	while(cur_timerp->mLastTimerData.mCurTimer != cur_timerp)	
232	{
233		cur_timerp->mFrameState = &cur_timerp->mFrameState->mTimer->getFrameState();
234		cur_timerp = cur_timerp->mLastTimerData.mCurTimer;
235	}
236}
237
238//static
239#if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
240U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
241{
242	return sClockResolution >> 8;
243}
244#else // windows or x86-mac or x86-linux or x86-solaris
245U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
246{
247#if USE_RDTSC || !LL_WINDOWS
248	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
249	static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
250
251	// we drop the low-order byte in our timers, so report a lower frequency
252#else
253	// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
254	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
255	// since that would change displayed MHz stats for CPUs
256	static bool firstcall = true;
257	static U64 sCPUClockFrequency;
258	if (firstcall)
259	{
260		QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
261		firstcall = false;
262	}
263#endif
264	return sCPUClockFrequency >> 8;
265}
266#endif
267
268LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp)
269:	mActiveCount(0),
270	mCalls(0),
271	mSelfTimeCounter(0),
272	mParent(NULL),
273	mLastCaller(NULL),
274	mMoveUpTree(false),
275	mTimer(timerp)
276{}
277
278
279LLFastTimer::NamedTimer::NamedTimer(const std::string& name)
280:	mName(name),
281	mCollapsed(true),
282	mParent(NULL),
283	mTotalTimeCounter(0),
284	mCountAverage(0),
285	mCallAverage(0),
286	mNeedsSorting(false)
287{
288	info_list_t& frame_state_list = getFrameStateList();
289	mFrameStateIndex = frame_state_list.size();
290	getFrameStateList().push_back(FrameState(this));
291
292	mCountHistory = new U32[HISTORY_NUM];
293	memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
294	mCallHistory = new U32[HISTORY_NUM];
295	memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
296}
297
298LLFastTimer::NamedTimer::~NamedTimer()
299{
300	delete[] mCountHistory;
301	delete[] mCallHistory;
302}
303
304std::string LLFastTimer::NamedTimer::getToolTip(S32 history_idx)
305{
306	F64 ms_multiplier = 1000.0 / (F64)LLFastTimer::countsPerSecond();
307	if (history_idx < 0)
308	{
309		// by default, show average number of call
310		return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getCountAverage() * ms_multiplier), (S32)getCallAverage());
311	}
312	else
313	{
314		return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getHistoricalCount(history_idx) * ms_multiplier), (S32)getHistoricalCalls(history_idx));
315	}
316}
317
318void LLFastTimer::NamedTimer::setParent(NamedTimer* parent)
319{
320	llassert_always(parent != this);
321	llassert_always(parent != NULL);
322
323	if (mParent)
324	{
325		// subtract our accumulated from previous parent
326		for (S32 i = 0; i < HISTORY_NUM; i++)
327		{
328			mParent->mCountHistory[i] -= mCountHistory[i];
329		}
330
331		// subtract average timing from previous parent
332		mParent->mCountAverage -= mCountAverage;
333
334		std::vector<NamedTimer*>& children = mParent->getChildren();
335		std::vector<NamedTimer*>::iterator found_it = std::find(children.begin(), children.end(), this);
336		if (found_it != children.end())
337		{
338			children.erase(found_it);
339		}
340	}
341
342	mParent = parent;
343	if (parent)
344	{
345		getFrameState().mParent = &parent->getFrameState();
346		parent->getChildren().push_back(this);
347		parent->mNeedsSorting = true;
348	}
349}
350
351S32 LLFastTimer::NamedTimer::getDepth()
352{
353	S32 depth = 0;
354	NamedTimer* timerp = mParent;
355	while(timerp)
356	{
357		depth++;
358		timerp = timerp->mParent;
359	}
360	return depth;
361}
362
363// static
364void LLFastTimer::NamedTimer::processTimes()
365{
366	if (sCurFrameIndex < 0) return;
367
368	buildHierarchy();
369	accumulateTimings();
370}
371
372// sort timer info structs by depth first traversal order
373struct SortTimersDFS
374{
375	bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2)
376	{
377		return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex();
378	}
379};
380
381// sort child timers by name
382struct SortTimerByName
383{
384	bool operator()(const LLFastTimer::NamedTimer* i1, const LLFastTimer::NamedTimer* i2)
385	{
386		return i1->getName() < i2->getName();
387	}
388};
389
390//static
391void LLFastTimer::NamedTimer::buildHierarchy()
392{
393	if (sCurFrameIndex < 0 ) return;
394
395	// set up initial tree
396	{
397		for (instance_iter it = beginInstances(); it != endInstances(); ++it)
398		{
399			NamedTimer& timer = *it;
400			if (&timer == NamedTimerFactory::instance().getRootTimer()) continue;
401			
402			// bootstrap tree construction by attaching to last timer to be on stack
403			// when this timer was called
404			if (timer.getFrameState().mLastCaller && timer.mParent == NamedTimerFactory::instance().getRootTimer())
405			{
406				timer.setParent(timer.getFrameState().mLastCaller->mTimer);
407				// no need to push up tree on first use, flag can be set spuriously
408				timer.getFrameState().mMoveUpTree = false;
409			}
410		}
411	}
412
413	// bump timers up tree if they've been flagged as being in the wrong place
414	// do this in a bottom up order to promote descendants first before promoting ancestors
415	// this preserves partial order derived from current frame's observations
416	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer());
417		it != end_timer_tree_bottom_up();
418		++it)
419	{
420		NamedTimer* timerp = *it;
421		// skip root timer
422		if (timerp == NamedTimerFactory::instance().getRootTimer()) continue;
423
424		if (timerp->getFrameState().mMoveUpTree)
425		{
426			// since ancestors have already been visited, reparenting won't affect tree traversal
427			//step up tree, bringing our descendants with us
428			//llinfos << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
429			//	" to child of " << timerp->getParent()->getParent()->getName() << llendl;
430			timerp->setParent(timerp->getParent()->getParent());
431			timerp->getFrameState().mMoveUpTree = false;
432
433			// don't bubble up any ancestors until descendants are done bubbling up
434			it.skipAncestors();
435		}
436	}
437
438	// sort timers by time last called, so call graph makes sense
439	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
440		it != end_timer_tree();
441		++it)
442	{
443		NamedTimer* timerp = (*it);
444		if (timerp->mNeedsSorting)
445		{
446			std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName());
447		}
448		timerp->mNeedsSorting = false;
449	}
450}
451
452//static
453void LLFastTimer::NamedTimer::accumulateTimings()
454{
455	U32 cur_time = getCPUClockCount32();
456
457	// walk up stack of active timers and accumulate current time while leaving timing structures active
458	LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
459	// root defined by parent pointing to self
460	CurTimerData* cur_data = &sCurTimerData;
461	while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
462	{
463		U32 cumulative_time_delta = cur_time - cur_timer->mStartTime;
464		U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime;
465		cur_data->mChildTime = 0;
466		cur_timer->mFrameState->mSelfTimeCounter += self_time_delta;
467		cur_timer->mStartTime = cur_time;
468
469		cur_data = &cur_timer->mLastTimerData;
470		cur_data->mChildTime += cumulative_time_delta;
471
472		cur_timer = cur_timer->mLastTimerData.mCurTimer;
473	}
474
475	// traverse tree in DFS post order, or bottom up
476	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer());
477		it != end_timer_tree_bottom_up();
478		++it)
479	{
480		NamedTimer* timerp = (*it);
481		timerp->mTotalTimeCounter = timerp->getFrameState().mSelfTimeCounter;
482		for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it)
483		{
484			timerp->mTotalTimeCounter += (*child_it)->mTotalTimeCounter;
485		}
486
487		S32 cur_frame = sCurFrameIndex;
488		if (cur_frame >= 0)
489		{
490			// update timer history
491			int hidx = cur_frame % HISTORY_NUM;
492
493			timerp->mCountHistory[hidx] = timerp->mTotalTimeCounter;
494			timerp->mCountAverage = ((U64)timerp->mCountAverage * cur_frame + timerp->mTotalTimeCounter) / (cur_frame+1);
495			timerp->mCallHistory[hidx] = timerp->getFrameState().mCalls;
496			timerp->mCallAverage = ((U64)timerp->mCallAverage * cur_frame + timerp->getFrameState().mCalls) / (cur_frame+1);
497		}
498	}
499}
500
501// static
502void LLFastTimer::NamedTimer::resetFrame()
503{
504	if (sLog)
505	{ //output current frame counts to performance log
506
507		static S32 call_count = 0;
508		if (call_count % 100 == 0)
509		{
510			llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
511			llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
512			llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
513			llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
514			llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
515			llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
516		}
517		call_count++;
518
519		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
520
521		F64 total_time = 0;
522		LLSD sd;
523
524		{
525			for (instance_iter it = beginInstances(); it != endInstances(); ++it)
526			{
527				NamedTimer& timer = *it;
528				FrameState& info = timer.getFrameState();
529				sd[timer.getName()]["Time"] = (LLSD::Real) (info.mSelfTimeCounter*iclock_freq);	
530				sd[timer.getName()]["Calls"] = (LLSD::Integer) info.mCalls;
531				
532				// computing total time here because getting the root timer's getCountHistory
533				// doesn't work correctly on the first frame
534				total_time = total_time + info.mSelfTimeCounter * iclock_freq;
535			}
536		}
537
538		sd["Total"]["Time"] = (LLSD::Real) total_time;
539		sd["Total"]["Calls"] = (LLSD::Integer) 1;
540
541		{		
542			LLMutexLock lock(sLogLock);
543			sLogQueue.push(sd);
544		}
545	}
546
547
548	// tag timers by position in depth first traversal of tree
549	S32 index = 0;
550	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
551		it != end_timer_tree();
552		++it)
553	{
554		NamedTimer* timerp = (*it);
555		
556		timerp->mFrameStateIndex = index;
557		index++;
558
559		llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size());
560	}
561
562	// sort timers by DFS traversal order to improve cache coherency
563	std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS());
564
565	// update pointers into framestatelist now that we've sorted it
566	DeclareTimer::updateCachedPointers();
567
568	// reset for next frame
569	{
570		for (instance_iter it = beginInstances(); it != endInstances(); ++it)
571		{
572			NamedTimer& timer = *it;
573			
574			FrameState& info = timer.getFrameState();
575			info.mSelfTimeCounter = 0;
576			info.mCalls = 0;
577			info.mLastCaller = NULL;
578			info.mMoveUpTree = false;
579			// update parent pointer in timer state struct
580			if (timer.mParent)
581			{
582				info.mParent = &timer.mParent->getFrameState();
583			}
584		}
585	}
586
587	//sTimerCycles = 0;
588	//sTimerCalls = 0;
589}
590
591//static
592void LLFastTimer::NamedTimer::reset()
593{
594	resetFrame(); // reset frame data
595
596	// walk up stack of active timers and reset start times to current time
597	// effectively zeroing out any accumulated time
598	U32 cur_time = getCPUClockCount32();
599
600	// root defined by parent pointing to self
601	CurTimerData* cur_data = &sCurTimerData;
602	LLFastTimer* cur_timer = cur_data->mCurTimer;
603	while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
604	{
605		cur_timer->mStartTime = cur_time;
606		cur_data->mChildTime = 0;
607
608		cur_data = &cur_timer->mLastTimerData;
609		cur_timer = cur_data->mCurTimer;
610	}
611
612	// reset all history
613	{
614		for (instance_iter it = beginInstances(); it != endInstances(); ++it)
615		{
616			NamedTimer& timer = *it;
617			if (&timer != NamedTimerFactory::instance().getRootTimer()) 
618			{
619				timer.setParent(NamedTimerFactory::instance().getRootTimer());
620			}
621			
622			timer.mCountAverage = 0;
623			timer.mCallAverage = 0;
624			memset(timer.mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
625			memset(timer.mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
626		}
627	}
628
629	sLastFrameIndex = 0;
630	sCurFrameIndex = 0;
631}
632
633//static 
634LLFastTimer::info_list_t& LLFastTimer::getFrameStateList() 
635{ 
636	if (!sTimerInfos) 
637	{ 
638		sTimerInfos = new info_list_t(); 
639	} 
640	return *sTimerInfos; 
641}
642
643
644U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const
645{
646	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
647	return mCountHistory[history_idx];
648}
649
650U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const
651{
652	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
653	return mCallHistory[history_idx];
654}
655
656LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const
657{
658	llassert_always(mFrameStateIndex >= 0);
659	if (this == NamedTimerFactory::instance().getActiveRootTimer()) 
660	{
661		return NamedTimerFactory::instance().getRootFrameState();
662	}
663	return getFrameStateList()[mFrameStateIndex];
664}
665
666// static
667LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer()
668{ 
669	return *NamedTimerFactory::instance().getActiveRootTimer(); 
670}
671
672std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::beginChildren()
673{ 
674	return mChildren.begin(); 
675}
676
677std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::endChildren()
678{
679	return mChildren.end();
680}
681
682std::vector<LLFastTimer::NamedTimer*>& LLFastTimer::NamedTimer::getChildren()
683{
684	return mChildren;
685}
686
687//static
688void LLFastTimer::nextFrame()
689{
690	countsPerSecond(); // good place to calculate clock frequency
691	U64 frame_time = getCPUClockCount64();
692	if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff)
693	{
694		llinfos << "Slow frame, fast timers inaccurate" << llendl;
695	}
696
697	if (!sPauseHistory)
698	{
699		NamedTimer::processTimes();
700		sLastFrameIndex = sCurFrameIndex++;
701	}
702	
703	// get ready for next frame
704	NamedTimer::resetFrame();
705	sLastFrameTime = frame_time;
706}
707
708//static
709void LLFastTimer::dumpCurTimes()
710{
711	// accumulate timings, etc.
712	NamedTimer::processTimes();
713	
714	F64 clock_freq = (F64)countsPerSecond();
715	F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds
716
717	// walk over timers in depth order and output timings
718	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
719		it != end_timer_tree();
720		++it)
721	{
722		NamedTimer* timerp = (*it);
723		F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq);
724		// Don't bother with really brief times, keep output concise
725		if (total_time_ms < 0.1) continue;
726
727		std::ostringstream out_str;
728		for (S32 i = 0; i < timerp->getDepth(); i++)
729		{
730			out_str << "\t";
731		}
732
733
734		out_str << timerp->getName() << " " 
735			<< std::setprecision(3) << total_time_ms << " ms, "
736			<< timerp->getHistoricalCalls(0) << " calls";
737
738		llinfos << out_str.str() << llendl;
739	}
740}
741
742//static 
743void LLFastTimer::reset()
744{
745	NamedTimer::reset();
746}
747
748
749//static
750void LLFastTimer::writeLog(std::ostream& os)
751{
752	while (!sLogQueue.empty())
753	{
754		LLSD& sd = sLogQueue.front();
755		LLSDSerialize::toXML(sd, os);
756		LLMutexLock lock(sLogLock);
757		sLogQueue.pop();
758	}
759}
760
761//static
762const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& name)
763{
764	return NamedTimerFactory::instance().getTimerByName(name);
765}
766
767LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
768:	mFrameState(state)
769{
770	U32 start_time = getCPUClockCount32();
771	mStartTime = start_time;
772	mFrameState->mActiveCount++;
773	LLFastTimer::sCurTimerData.mCurTimer = this;
774	LLFastTimer::sCurTimerData.mFrameState = mFrameState;
775	LLFastTimer::sCurTimerData.mChildTime = 0;
776	mLastTimerData = LLFastTimer::sCurTimerData;
777}
778
779
780//////////////////////////////////////////////////////////////////////////////
781//
782// Important note: These implementations must be FAST!
783//
784
785
786#if LL_WINDOWS
787//
788// Windows implementation of CPU clock
789//
790
791//
792// NOTE: put back in when we aren't using platform sdk anymore
793//
794// because MS has different signatures for these functions in winnt.h
795// need to rename them to avoid conflicts
796//#define _interlockedbittestandset _renamed_interlockedbittestandset
797//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
798//#include <intrin.h>
799//#undef _interlockedbittestandset
800//#undef _interlockedbittestandreset
801
802//inline U32 LLFastTimer::getCPUClockCount32()
803//{
804//	U64 time_stamp = __rdtsc();
805//	return (U32)(time_stamp >> 8);
806//}
807//
808//// return full timer value, *not* shifted by 8 bits
809//inline U64 LLFastTimer::getCPUClockCount64()
810//{
811//	return __rdtsc();
812//}
813
814// shift off lower 8 bits for lower resolution but longer term timing
815// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
816#if USE_RDTSC
817U32 LLFastTimer::getCPUClockCount32()
818{
819	U32 ret_val;
820	__asm
821	{
822        _emit   0x0f
823        _emit   0x31
824		shr eax,8
825		shl edx,24
826		or eax, edx
827		mov dword ptr [ret_val], eax
828	}
829    return ret_val;
830}
831
832// return full timer value, *not* shifted by 8 bits
833U64 LLFastTimer::getCPUClockCount64()
834{
835	U64 ret_val;
836	__asm
837	{
838        _emit   0x0f
839        _emit   0x31
840		mov eax,eax
841		mov edx,edx
842		mov dword ptr [ret_val+4], edx
843		mov dword ptr [ret_val], eax
844	}
845    return ret_val;
846}
847
848std::string LLFastTimer::sClockType = "rdtsc";
849
850#else
851//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
852// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
853U32 LLFastTimer::getCPUClockCount32()
854{
855	return (U32)(get_clock_count()>>8);
856}
857
858U64 LLFastTimer::getCPUClockCount64()
859{
860	return get_clock_count();
861}
862
863std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
864#endif
865
866#endif
867
868
869#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
870//
871// Linux and Solaris implementation of CPU clock - non-x86.
872// This is accurate but SLOW!  Only use out of desperation.
873//
874// Try to use the MONOTONIC clock if available, this is a constant time counter
875// with nanosecond resolution (but not necessarily accuracy) and attempts are
876// made to synchronize this value between cores at kernel start. It should not
877// be affected by CPU frequency. If not available use the REALTIME clock, but
878// this may be affected by NTP adjustments or other user activity affecting
879// the system time.
880U64 LLFastTimer::getCPUClockCount64()
881{
882	struct timespec tp;
883	
884#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
885	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
886#endif
887		clock_gettime(CLOCK_REALTIME,&tp);
888
889	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
890}
891
892U32 LLFastTimer::getCPUClockCount32()
893{
894	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
895}
896
897std::string LLFastTimer::sClockType = "clock_gettime";
898
899#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
900
901
902#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
903//
904// Mac+Linux+Solaris FAST x86 implementation of CPU clock
905U32 LLFastTimer::getCPUClockCount32()
906{
907	U64 x;
908	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
909	return (U32)(x >> 8);
910}
911
912U64 LLFastTimer::getCPUClockCount64()
913{
914	U64 x;
915	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
916	return x;
917}
918
919std::string LLFastTimer::sClockType = "rdtsc";
920#endif
921