PageRenderTime 40ms CodeModel.GetById 1ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/llmath/llmath.h

https://bitbucket.org/lindenlab/viewer-beta/
C++ Header | 549 lines | 331 code | 84 blank | 134 comment | 24 complexity | c3098b4216bf2b1b9a3180ea5592e022 MD5 | raw file
  1/** 
  2 * @file llmath.h
  3 * @brief Useful math constants and macros.
  4 *
  5 * $LicenseInfo:firstyear=2000&license=viewerlgpl$
  6 * Second Life Viewer Source Code
  7 * Copyright (C) 2010, Linden Research, Inc.
  8 * 
  9 * This library is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU Lesser General Public
 11 * License as published by the Free Software Foundation;
 12 * version 2.1 of the License only.
 13 * 
 14 * This library is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 * Lesser General Public License for more details.
 18 * 
 19 * You should have received a copy of the GNU Lesser General Public
 20 * License along with this library; if not, write to the Free Software
 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 22 * 
 23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 24 * $/LicenseInfo$
 25 */
 26
 27#ifndef LLMATH_H
 28#define LLMATH_H
 29
 30#include <cmath>
 31#include <cstdlib>
 32#include <vector>
 33#include "lldefs.h"
 34//#include "llstl.h" // *TODO: Remove when LLString is gone
 35//#include "llstring.h" // *TODO: Remove when LLString is gone
 36// lltut.h uses is_approx_equal_fraction(). This was moved to its own header
 37// file in llcommon so we can use lltut.h for llcommon tests without making
 38// llcommon depend on llmath.
 39#include "is_approx_equal_fraction.h"
 40
 41// work around for Windows & older gcc non-standard function names.
 42#if LL_WINDOWS
 43#include <float.h>
 44#define llisnan(val)	_isnan(val)
 45#define llfinite(val)	_finite(val)
 46#elif (LL_LINUX && __GNUC__ <= 2)
 47#define llisnan(val)	isnan(val)
 48#define llfinite(val)	isfinite(val)
 49#elif LL_SOLARIS
 50#define llisnan(val)    isnan(val)
 51#define llfinite(val)   (val <= std::numeric_limits<double>::max())
 52#else
 53#define llisnan(val)	std::isnan(val)
 54#define llfinite(val)	std::isfinite(val)
 55#endif
 56
 57// Single Precision Floating Point Routines
 58// (There used to be more defined here, but they appeared to be redundant and 
 59// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
 60/*#ifndef tanf
 61#define tanf(x)		((F32)tan((F64)(x)))
 62#endif*/
 63
 64const F32	GRAVITY			= -9.8f;
 65
 66// mathematical constants
 67const F32	F_PI		= 3.1415926535897932384626433832795f;
 68const F32	F_TWO_PI	= 6.283185307179586476925286766559f;
 69const F32	F_PI_BY_TWO	= 1.5707963267948966192313216916398f;
 70const F32	F_SQRT_TWO_PI = 2.506628274631000502415765284811f;
 71const F32	F_E			= 2.71828182845904523536f;
 72const F32	F_SQRT2		= 1.4142135623730950488016887242097f;
 73const F32	F_SQRT3		= 1.73205080756888288657986402541f;
 74const F32	OO_SQRT2	= 0.7071067811865475244008443621049f;
 75const F32	DEG_TO_RAD	= 0.017453292519943295769236907684886f;
 76const F32	RAD_TO_DEG	= 57.295779513082320876798154814105f;
 77const F32	F_APPROXIMATELY_ZERO = 0.00001f;
 78const F32	F_LN2		= 0.69314718056f;
 79const F32	OO_LN2		= 1.4426950408889634073599246810019f;
 80
 81const F32	F_ALMOST_ZERO	= 0.0001f;
 82const F32	F_ALMOST_ONE	= 1.0f - F_ALMOST_ZERO;
 83
 84// BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
 85const F32 FP_MAG_THRESHOLD = 0.0000001f;
 86
 87// TODO: Replace with logic like is_approx_equal
 88inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
 89
 90// These functions work by interpreting sign+exp+mantissa as an unsigned
 91// integer.
 92// For example:
 93// x = <sign>1 <exponent>00000010 <mantissa>00000000000000000000000
 94// y = <sign>1 <exponent>00000001 <mantissa>11111111111111111111111
 95//
 96// interpreted as ints = 
 97// x = 10000001000000000000000000000000
 98// y = 10000000111111111111111111111111
 99// which is clearly a different of 1 in the least significant bit
100// Values with the same exponent can be trivially shown to work.
101//
102// WARNING: Denormals of opposite sign do not work
103// x = <sign>1 <exponent>00000000 <mantissa>00000000000000000000001
104// y = <sign>0 <exponent>00000000 <mantissa>00000000000000000000001
105// Although these values differ by 2 in the LSB, the sign bit makes
106// the int comparison fail.
107//
108// WARNING: NaNs can compare equal
109// There is no special treatment of exceptional values like NaNs
110//
111// WARNING: Infinity is comparable with F32_MAX and negative 
112// infinity is comparable with F32_MIN
113
114inline BOOL is_approx_equal(F32 x, F32 y)
115{
116	const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
117	return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
118}
119
120inline BOOL is_approx_equal(F64 x, F64 y)
121{
122	const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
123	return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
124}
125
126inline S32 llabs(const S32 a)
127{
128	return S32(std::labs(a));
129}
130
131inline F32 llabs(const F32 a)
132{
133	return F32(std::fabs(a));
134}
135
136inline F64 llabs(const F64 a)
137{
138	return F64(std::fabs(a));
139}
140
141inline S32 lltrunc( F32 f )
142{
143#if LL_WINDOWS && !defined( __INTEL_COMPILER )
144		// Avoids changing the floating point control word.
145		// Add or subtract 0.5 - epsilon and then round
146		const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
147		S32 result;
148		__asm {
149			fld		f
150			mov		eax,	f
151			shr		eax,	29
152			and		eax,	4
153			fadd	dword ptr [zpfp + eax]
154			fistp	result
155		}
156		return result;
157#else
158		return (S32)f;
159#endif
160}
161
162inline S32 lltrunc( F64 f )
163{
164	return (S32)f;
165}
166
167inline S32 llfloor( F32 f )
168{
169#if LL_WINDOWS && !defined( __INTEL_COMPILER )
170		// Avoids changing the floating point control word.
171		// Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
172		// Add -(0.5 - epsilon) and then round
173		const U32 zpfp = 0xBEFFFFFF;
174		S32 result;
175		__asm { 
176			fld		f
177			fadd	dword ptr [zpfp]
178			fistp	result
179		}
180		return result;
181#else
182		return (S32)floor(f);
183#endif
184}
185
186
187inline S32 llceil( F32 f )
188{
189	// This could probably be optimized, but this works.
190	return (S32)ceil(f);
191}
192
193
194#ifndef BOGUS_ROUND
195// Use this round.  Does an arithmetic round (0.5 always rounds up)
196inline S32 llround(const F32 val)
197{
198	return llfloor(val + 0.5f);
199}
200
201#else // BOGUS_ROUND
202// Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
203// Not using this because we don't have a consistent implementation on both platforms, use
204// llfloor(val + 0.5f), which is consistent on all platforms.
205inline S32 llround(const F32 val)
206{
207	#if LL_WINDOWS
208		// Note: assumes that the floating point control word is set to rounding mode (the default)
209		S32 ret_val;
210		_asm fld	val
211		_asm fistp	ret_val;
212		return ret_val;
213	#elif LL_LINUX
214		// Note: assumes that the floating point control word is set
215		// to rounding mode (the default)
216		S32 ret_val;
217		__asm__ __volatile__( "flds %1    \n\t"
218							  "fistpl %0  \n\t"
219							  : "=m" (ret_val)
220							  : "m" (val) );
221		return ret_val;
222	#else
223		return llfloor(val + 0.5f);
224	#endif
225}
226
227// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
228inline int round_int(double x)
229{
230	const float round_to_nearest = 0.5f;
231	int i;
232	__asm
233	{
234		fld x
235		fadd st, st (0)
236		fadd round_to_nearest
237		fistp i
238		sar i, 1
239	}
240	return (i);
241}
242#endif // BOGUS_ROUND
243
244inline F32 llround( F32 val, F32 nearest )
245{
246	return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
247}
248
249inline F64 llround( F64 val, F64 nearest )
250{
251	return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
252}
253
254// these provide minimum peak error
255//
256// avg  error = -0.013049 
257// peak error = -31.4 dB
258// RMS  error = -28.1 dB
259
260const F32 FAST_MAG_ALPHA = 0.960433870103f;
261const F32 FAST_MAG_BETA = 0.397824734759f;
262
263// these provide minimum RMS error
264//
265// avg  error = 0.000003 
266// peak error = -32.6 dB
267// RMS  error = -25.7 dB
268//
269//const F32 FAST_MAG_ALPHA = 0.948059448969f;
270//const F32 FAST_MAG_BETA = 0.392699081699f;
271
272inline F32 fastMagnitude(F32 a, F32 b)
273{ 
274	a = (a > 0) ? a : -a;
275	b = (b > 0) ? b : -b;
276	return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
277}
278
279
280
281////////////////////
282//
283// Fast F32/S32 conversions
284//
285// Culled from www.stereopsis.com/FPU.html
286
287const F64 LL_DOUBLE_TO_FIX_MAGIC	= 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
288const S32 LL_SHIFT_AMOUNT			= 16;                    //16.16 fixed point representation,
289
290// Endian dependent code
291#ifdef LL_LITTLE_ENDIAN
292	#define LL_EXP_INDEX				1
293	#define LL_MAN_INDEX				0
294#else
295	#define LL_EXP_INDEX				0
296	#define LL_MAN_INDEX				1
297#endif
298
299/* Deprecated: use llround(), lltrunc(), or llfloor() instead
300// ================================================================================================
301// Real2Int
302// ================================================================================================
303inline S32 F64toS32(F64 val)
304{
305	val		= val + LL_DOUBLE_TO_FIX_MAGIC;
306	return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT; 
307}
308
309// ================================================================================================
310// Real2Int
311// ================================================================================================
312inline S32 F32toS32(F32 val)
313{
314	return F64toS32 ((F64)val);
315}
316*/
317
318////////////////////////////////////////////////
319//
320// Fast exp and log
321//
322
323// Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
324// http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
325static union
326{
327	double d;
328	struct
329	{
330#ifdef LL_LITTLE_ENDIAN
331		S32 j, i;
332#else
333		S32 i, j;
334#endif
335	} n;
336} LLECO; // not sure what the name means
337
338#define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
339#define LL_EXP_C (60801)			// this value of C good for -4 < y < 4
340
341#define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
342
343
344
345inline F32 llfastpow(const F32 x, const F32 y)
346{
347	return (F32)(LL_FAST_EXP(y * log(x)));
348}
349
350
351inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
352{
353	// compute the power of ten
354	F32 bar = 1.f;
355	for (S32 i = 0; i < sig_figs; i++)
356	{
357		bar *= 10.f;
358	}
359
360	//F32 new_foo = (F32)llround(foo * bar);
361	// the llround() implementation sucks.  Don't us it.
362
363	F32 sign = (foo > 0.f) ? 1.f : -1.f;
364	F32 new_foo = F32( S64(foo * bar + sign * 0.5f));
365	new_foo /= bar;
366
367	return new_foo;
368}
369
370inline F32 lerp(F32 a, F32 b, F32 u) 
371{
372	return a + ((b - a) * u);
373}
374
375inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
376{
377	F32 a = x00 + (x01-x00)*u;
378	F32 b = x10 + (x11-x10)*u;
379	F32 r = a + (b-a)*v;
380	return r;
381}
382
383inline F32 ramp(F32 x, F32 a, F32 b)
384{
385	return (a == b) ? 0.0f : ((a - x) / (a - b));
386}
387
388inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
389{
390	return lerp(y1, y2, ramp(x, x1, x2));
391}
392
393inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
394{
395	if (y1 < y2)
396	{
397		return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
398	}
399	else
400	{
401		return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
402	}
403}
404
405
406inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
407{
408	if (x <= x0)
409		return s0;
410
411	if (x >= x1)
412		return s1;
413
414	F32 f = (x - x0) / (x1 - x0);
415
416	return	s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
417}
418
419inline F32 cubic_step( F32 x )
420{
421	x = llclampf(x);
422
423	return	(x * x) * (3.0f - 2.0f * x);
424}
425
426inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
427{
428	if (x <= x0)
429		return s0;
430
431	if (x >= x1)
432		return s1;
433
434	F32 f = (x - x0) / (x1 - x0);
435	F32 f_squared = f * f;
436
437	return	(s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
438}
439
440inline F32 llsimple_angle(F32 angle)
441{
442	while(angle <= -F_PI)
443		angle += F_TWO_PI;
444	while(angle >  F_PI)
445		angle -= F_TWO_PI;
446	return angle;
447}
448
449//SDK - Renamed this to get_lower_power_two, since this is what this actually does.
450inline U32 get_lower_power_two(U32 val, U32 max_power_two)
451{
452	if(!max_power_two)
453	{
454		max_power_two = 1 << 31 ;
455	}
456	if(max_power_two & (max_power_two - 1))
457	{
458		return 0 ;
459	}
460
461	for(; val < max_power_two ; max_power_two >>= 1) ;
462	
463	return max_power_two ;
464}
465
466// calculate next highest power of two, limited by max_power_two
467// This is taken from a brilliant little code snipped on http://acius2.blogspot.com/2007/11/calculating-next-power-of-2.html
468// Basically we convert the binary to a solid string of 1's with the same
469// number of digits, then add one.  We subtract 1 initially to handle
470// the case where the number passed in is actually a power of two.
471// WARNING: this only works with 32 bit ints.
472inline U32 get_next_power_two(U32 val, U32 max_power_two)
473{
474	if(!max_power_two)
475	{
476		max_power_two = 1 << 31 ;
477	}
478
479	if(val >= max_power_two)
480	{
481		return max_power_two;
482	}
483
484	val--;
485	val = (val >> 1) | val;
486	val = (val >> 2) | val;
487	val = (val >> 4) | val;
488	val = (val >> 8) | val;
489	val = (val >> 16) | val;
490	val++;
491
492	return val;
493}
494
495//get the gaussian value given the linear distance from axis x and guassian value o
496inline F32 llgaussian(F32 x, F32 o)
497{
498	return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
499}
500
501//helper function for removing outliers
502template <class VEC_TYPE>
503inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k)
504{
505	if (data.size() < 100)
506	{ //not enough samples
507		return;
508	}
509
510	VEC_TYPE Q1 = data[data.size()/4];
511	VEC_TYPE Q3 = data[data.size()-data.size()/4-1];
512
513	if ((F32)(Q3-Q1) < 1.f)
514	{
515		// not enough variation to detect outliers
516		return;
517	}
518
519
520	VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1));
521	VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1));
522
523	U32 i = 0;
524	while (i < data.size() && data[i] < min)
525	{
526		i++;
527	}
528
529	S32 j = data.size()-1;
530	while (j > 0 && data[j] > max)
531	{
532		j--;
533	}
534
535	if (j < data.size()-1)
536	{
537		data.erase(data.begin()+j, data.end());
538	}
539
540	if (i > 0)
541	{
542		data.erase(data.begin(), data.begin()+i);
543	}
544}
545
546// Include simd math header
547#include "llsimdmath.h"
548
549#endif