PageRenderTime 29ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/indra/llmath/llmath.h

https://bitbucket.org/lindenlab/viewer-beta/
C Header | 549 lines | 331 code | 84 blank | 134 comment | 24 complexity | c3098b4216bf2b1b9a3180ea5592e022 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. * @file llmath.h
  3. * @brief Useful math constants and macros.
  4. *
  5. * $LicenseInfo:firstyear=2000&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #ifndef LLMATH_H
  27. #define LLMATH_H
  28. #include <cmath>
  29. #include <cstdlib>
  30. #include <vector>
  31. #include "lldefs.h"
  32. //#include "llstl.h" // *TODO: Remove when LLString is gone
  33. //#include "llstring.h" // *TODO: Remove when LLString is gone
  34. // lltut.h uses is_approx_equal_fraction(). This was moved to its own header
  35. // file in llcommon so we can use lltut.h for llcommon tests without making
  36. // llcommon depend on llmath.
  37. #include "is_approx_equal_fraction.h"
  38. // work around for Windows & older gcc non-standard function names.
  39. #if LL_WINDOWS
  40. #include <float.h>
  41. #define llisnan(val) _isnan(val)
  42. #define llfinite(val) _finite(val)
  43. #elif (LL_LINUX && __GNUC__ <= 2)
  44. #define llisnan(val) isnan(val)
  45. #define llfinite(val) isfinite(val)
  46. #elif LL_SOLARIS
  47. #define llisnan(val) isnan(val)
  48. #define llfinite(val) (val <= std::numeric_limits<double>::max())
  49. #else
  50. #define llisnan(val) std::isnan(val)
  51. #define llfinite(val) std::isfinite(val)
  52. #endif
  53. // Single Precision Floating Point Routines
  54. // (There used to be more defined here, but they appeared to be redundant and
  55. // were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
  56. /*#ifndef tanf
  57. #define tanf(x) ((F32)tan((F64)(x)))
  58. #endif*/
  59. const F32 GRAVITY = -9.8f;
  60. // mathematical constants
  61. const F32 F_PI = 3.1415926535897932384626433832795f;
  62. const F32 F_TWO_PI = 6.283185307179586476925286766559f;
  63. const F32 F_PI_BY_TWO = 1.5707963267948966192313216916398f;
  64. const F32 F_SQRT_TWO_PI = 2.506628274631000502415765284811f;
  65. const F32 F_E = 2.71828182845904523536f;
  66. const F32 F_SQRT2 = 1.4142135623730950488016887242097f;
  67. const F32 F_SQRT3 = 1.73205080756888288657986402541f;
  68. const F32 OO_SQRT2 = 0.7071067811865475244008443621049f;
  69. const F32 DEG_TO_RAD = 0.017453292519943295769236907684886f;
  70. const F32 RAD_TO_DEG = 57.295779513082320876798154814105f;
  71. const F32 F_APPROXIMATELY_ZERO = 0.00001f;
  72. const F32 F_LN2 = 0.69314718056f;
  73. const F32 OO_LN2 = 1.4426950408889634073599246810019f;
  74. const F32 F_ALMOST_ZERO = 0.0001f;
  75. const F32 F_ALMOST_ONE = 1.0f - F_ALMOST_ZERO;
  76. // BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
  77. const F32 FP_MAG_THRESHOLD = 0.0000001f;
  78. // TODO: Replace with logic like is_approx_equal
  79. inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
  80. // These functions work by interpreting sign+exp+mantissa as an unsigned
  81. // integer.
  82. // For example:
  83. // x = <sign>1 <exponent>00000010 <mantissa>00000000000000000000000
  84. // y = <sign>1 <exponent>00000001 <mantissa>11111111111111111111111
  85. //
  86. // interpreted as ints =
  87. // x = 10000001000000000000000000000000
  88. // y = 10000000111111111111111111111111
  89. // which is clearly a different of 1 in the least significant bit
  90. // Values with the same exponent can be trivially shown to work.
  91. //
  92. // WARNING: Denormals of opposite sign do not work
  93. // x = <sign>1 <exponent>00000000 <mantissa>00000000000000000000001
  94. // y = <sign>0 <exponent>00000000 <mantissa>00000000000000000000001
  95. // Although these values differ by 2 in the LSB, the sign bit makes
  96. // the int comparison fail.
  97. //
  98. // WARNING: NaNs can compare equal
  99. // There is no special treatment of exceptional values like NaNs
  100. //
  101. // WARNING: Infinity is comparable with F32_MAX and negative
  102. // infinity is comparable with F32_MIN
  103. inline BOOL is_approx_equal(F32 x, F32 y)
  104. {
  105. const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
  106. return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
  107. }
  108. inline BOOL is_approx_equal(F64 x, F64 y)
  109. {
  110. const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
  111. return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
  112. }
  113. inline S32 llabs(const S32 a)
  114. {
  115. return S32(std::labs(a));
  116. }
  117. inline F32 llabs(const F32 a)
  118. {
  119. return F32(std::fabs(a));
  120. }
  121. inline F64 llabs(const F64 a)
  122. {
  123. return F64(std::fabs(a));
  124. }
  125. inline S32 lltrunc( F32 f )
  126. {
  127. #if LL_WINDOWS && !defined( __INTEL_COMPILER )
  128. // Avoids changing the floating point control word.
  129. // Add or subtract 0.5 - epsilon and then round
  130. const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
  131. S32 result;
  132. __asm {
  133. fld f
  134. mov eax, f
  135. shr eax, 29
  136. and eax, 4
  137. fadd dword ptr [zpfp + eax]
  138. fistp result
  139. }
  140. return result;
  141. #else
  142. return (S32)f;
  143. #endif
  144. }
  145. inline S32 lltrunc( F64 f )
  146. {
  147. return (S32)f;
  148. }
  149. inline S32 llfloor( F32 f )
  150. {
  151. #if LL_WINDOWS && !defined( __INTEL_COMPILER )
  152. // Avoids changing the floating point control word.
  153. // Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
  154. // Add -(0.5 - epsilon) and then round
  155. const U32 zpfp = 0xBEFFFFFF;
  156. S32 result;
  157. __asm {
  158. fld f
  159. fadd dword ptr [zpfp]
  160. fistp result
  161. }
  162. return result;
  163. #else
  164. return (S32)floor(f);
  165. #endif
  166. }
  167. inline S32 llceil( F32 f )
  168. {
  169. // This could probably be optimized, but this works.
  170. return (S32)ceil(f);
  171. }
  172. #ifndef BOGUS_ROUND
  173. // Use this round. Does an arithmetic round (0.5 always rounds up)
  174. inline S32 llround(const F32 val)
  175. {
  176. return llfloor(val + 0.5f);
  177. }
  178. #else // BOGUS_ROUND
  179. // Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
  180. // Not using this because we don't have a consistent implementation on both platforms, use
  181. // llfloor(val + 0.5f), which is consistent on all platforms.
  182. inline S32 llround(const F32 val)
  183. {
  184. #if LL_WINDOWS
  185. // Note: assumes that the floating point control word is set to rounding mode (the default)
  186. S32 ret_val;
  187. _asm fld val
  188. _asm fistp ret_val;
  189. return ret_val;
  190. #elif LL_LINUX
  191. // Note: assumes that the floating point control word is set
  192. // to rounding mode (the default)
  193. S32 ret_val;
  194. __asm__ __volatile__( "flds %1 \n\t"
  195. "fistpl %0 \n\t"
  196. : "=m" (ret_val)
  197. : "m" (val) );
  198. return ret_val;
  199. #else
  200. return llfloor(val + 0.5f);
  201. #endif
  202. }
  203. // A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
  204. inline int round_int(double x)
  205. {
  206. const float round_to_nearest = 0.5f;
  207. int i;
  208. __asm
  209. {
  210. fld x
  211. fadd st, st (0)
  212. fadd round_to_nearest
  213. fistp i
  214. sar i, 1
  215. }
  216. return (i);
  217. }
  218. #endif // BOGUS_ROUND
  219. inline F32 llround( F32 val, F32 nearest )
  220. {
  221. return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
  222. }
  223. inline F64 llround( F64 val, F64 nearest )
  224. {
  225. return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
  226. }
  227. // these provide minimum peak error
  228. //
  229. // avg error = -0.013049
  230. // peak error = -31.4 dB
  231. // RMS error = -28.1 dB
  232. const F32 FAST_MAG_ALPHA = 0.960433870103f;
  233. const F32 FAST_MAG_BETA = 0.397824734759f;
  234. // these provide minimum RMS error
  235. //
  236. // avg error = 0.000003
  237. // peak error = -32.6 dB
  238. // RMS error = -25.7 dB
  239. //
  240. //const F32 FAST_MAG_ALPHA = 0.948059448969f;
  241. //const F32 FAST_MAG_BETA = 0.392699081699f;
  242. inline F32 fastMagnitude(F32 a, F32 b)
  243. {
  244. a = (a > 0) ? a : -a;
  245. b = (b > 0) ? b : -b;
  246. return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
  247. }
  248. ////////////////////
  249. //
  250. // Fast F32/S32 conversions
  251. //
  252. // Culled from www.stereopsis.com/FPU.html
  253. const F64 LL_DOUBLE_TO_FIX_MAGIC = 68719476736.0*1.5; //2^36 * 1.5, (52-_shiftamt=36) uses limited precisicion to floor
  254. const S32 LL_SHIFT_AMOUNT = 16; //16.16 fixed point representation,
  255. // Endian dependent code
  256. #ifdef LL_LITTLE_ENDIAN
  257. #define LL_EXP_INDEX 1
  258. #define LL_MAN_INDEX 0
  259. #else
  260. #define LL_EXP_INDEX 0
  261. #define LL_MAN_INDEX 1
  262. #endif
  263. /* Deprecated: use llround(), lltrunc(), or llfloor() instead
  264. // ================================================================================================
  265. // Real2Int
  266. // ================================================================================================
  267. inline S32 F64toS32(F64 val)
  268. {
  269. val = val + LL_DOUBLE_TO_FIX_MAGIC;
  270. return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT;
  271. }
  272. // ================================================================================================
  273. // Real2Int
  274. // ================================================================================================
  275. inline S32 F32toS32(F32 val)
  276. {
  277. return F64toS32 ((F64)val);
  278. }
  279. */
  280. ////////////////////////////////////////////////
  281. //
  282. // Fast exp and log
  283. //
  284. // Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
  285. // http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
  286. static union
  287. {
  288. double d;
  289. struct
  290. {
  291. #ifdef LL_LITTLE_ENDIAN
  292. S32 j, i;
  293. #else
  294. S32 i, j;
  295. #endif
  296. } n;
  297. } LLECO; // not sure what the name means
  298. #define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
  299. #define LL_EXP_C (60801) // this value of C good for -4 < y < 4
  300. #define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
  301. inline F32 llfastpow(const F32 x, const F32 y)
  302. {
  303. return (F32)(LL_FAST_EXP(y * log(x)));
  304. }
  305. inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
  306. {
  307. // compute the power of ten
  308. F32 bar = 1.f;
  309. for (S32 i = 0; i < sig_figs; i++)
  310. {
  311. bar *= 10.f;
  312. }
  313. //F32 new_foo = (F32)llround(foo * bar);
  314. // the llround() implementation sucks. Don't us it.
  315. F32 sign = (foo > 0.f) ? 1.f : -1.f;
  316. F32 new_foo = F32( S64(foo * bar + sign * 0.5f));
  317. new_foo /= bar;
  318. return new_foo;
  319. }
  320. inline F32 lerp(F32 a, F32 b, F32 u)
  321. {
  322. return a + ((b - a) * u);
  323. }
  324. inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
  325. {
  326. F32 a = x00 + (x01-x00)*u;
  327. F32 b = x10 + (x11-x10)*u;
  328. F32 r = a + (b-a)*v;
  329. return r;
  330. }
  331. inline F32 ramp(F32 x, F32 a, F32 b)
  332. {
  333. return (a == b) ? 0.0f : ((a - x) / (a - b));
  334. }
  335. inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
  336. {
  337. return lerp(y1, y2, ramp(x, x1, x2));
  338. }
  339. inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
  340. {
  341. if (y1 < y2)
  342. {
  343. return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
  344. }
  345. else
  346. {
  347. return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
  348. }
  349. }
  350. inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
  351. {
  352. if (x <= x0)
  353. return s0;
  354. if (x >= x1)
  355. return s1;
  356. F32 f = (x - x0) / (x1 - x0);
  357. return s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
  358. }
  359. inline F32 cubic_step( F32 x )
  360. {
  361. x = llclampf(x);
  362. return (x * x) * (3.0f - 2.0f * x);
  363. }
  364. inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
  365. {
  366. if (x <= x0)
  367. return s0;
  368. if (x >= x1)
  369. return s1;
  370. F32 f = (x - x0) / (x1 - x0);
  371. F32 f_squared = f * f;
  372. return (s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
  373. }
  374. inline F32 llsimple_angle(F32 angle)
  375. {
  376. while(angle <= -F_PI)
  377. angle += F_TWO_PI;
  378. while(angle > F_PI)
  379. angle -= F_TWO_PI;
  380. return angle;
  381. }
  382. //SDK - Renamed this to get_lower_power_two, since this is what this actually does.
  383. inline U32 get_lower_power_two(U32 val, U32 max_power_two)
  384. {
  385. if(!max_power_two)
  386. {
  387. max_power_two = 1 << 31 ;
  388. }
  389. if(max_power_two & (max_power_two - 1))
  390. {
  391. return 0 ;
  392. }
  393. for(; val < max_power_two ; max_power_two >>= 1) ;
  394. return max_power_two ;
  395. }
  396. // calculate next highest power of two, limited by max_power_two
  397. // This is taken from a brilliant little code snipped on http://acius2.blogspot.com/2007/11/calculating-next-power-of-2.html
  398. // Basically we convert the binary to a solid string of 1's with the same
  399. // number of digits, then add one. We subtract 1 initially to handle
  400. // the case where the number passed in is actually a power of two.
  401. // WARNING: this only works with 32 bit ints.
  402. inline U32 get_next_power_two(U32 val, U32 max_power_two)
  403. {
  404. if(!max_power_two)
  405. {
  406. max_power_two = 1 << 31 ;
  407. }
  408. if(val >= max_power_two)
  409. {
  410. return max_power_two;
  411. }
  412. val--;
  413. val = (val >> 1) | val;
  414. val = (val >> 2) | val;
  415. val = (val >> 4) | val;
  416. val = (val >> 8) | val;
  417. val = (val >> 16) | val;
  418. val++;
  419. return val;
  420. }
  421. //get the gaussian value given the linear distance from axis x and guassian value o
  422. inline F32 llgaussian(F32 x, F32 o)
  423. {
  424. return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
  425. }
  426. //helper function for removing outliers
  427. template <class VEC_TYPE>
  428. inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k)
  429. {
  430. if (data.size() < 100)
  431. { //not enough samples
  432. return;
  433. }
  434. VEC_TYPE Q1 = data[data.size()/4];
  435. VEC_TYPE Q3 = data[data.size()-data.size()/4-1];
  436. if ((F32)(Q3-Q1) < 1.f)
  437. {
  438. // not enough variation to detect outliers
  439. return;
  440. }
  441. VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1));
  442. VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1));
  443. U32 i = 0;
  444. while (i < data.size() && data[i] < min)
  445. {
  446. i++;
  447. }
  448. S32 j = data.size()-1;
  449. while (j > 0 && data[j] > max)
  450. {
  451. j--;
  452. }
  453. if (j < data.size()-1)
  454. {
  455. data.erase(data.begin()+j, data.end());
  456. }
  457. if (i > 0)
  458. {
  459. data.erase(data.begin(), data.begin()+i);
  460. }
  461. }
  462. // Include simd math header
  463. #include "llsimdmath.h"
  464. #endif