/indra/llmath/llvector4a.h

https://bitbucket.org/lindenlab/viewer-beta/ · C++ Header · 324 lines · 117 code · 83 blank · 124 comment · 0 complexity · d7e09da02aa9521933e19d8f533439e1 MD5 · raw file

  1. /**
  2. * @file llvector4a.h
  3. * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector
  4. *
  5. * $LicenseInfo:firstyear=2010&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #ifndef LL_LLVECTOR4A_H
  27. #define LL_LLVECTOR4A_H
  28. class LLRotation;
  29. #include <assert.h>
  30. #include "llpreprocessor.h"
  31. ///////////////////////////////////
  32. // FIRST TIME USERS PLEASE READ
  33. //////////////////////////////////
  34. // This is just the beginning of LLVector4a. There are many more useful functions
  35. // yet to be implemented. For example, setNeg to negate a vector, rotate() to apply
  36. // a matrix rotation, various functions to manipulate only the X, Y, and Z elements
  37. // and many others (including a whole variety of accessors). So if you don't see a
  38. // function here that you need, please contact Falcon or someone else with SSE
  39. // experience (Richard, I think, has some and davep has a little as of the time
  40. // of this writing, July 08, 2010) about getting it implemented before you resort to
  41. // LLVector3/LLVector4.
  42. /////////////////////////////////
  43. class LLVector4a
  44. {
  45. public:
  46. ///////////////////////////////////
  47. // STATIC METHODS
  48. ///////////////////////////////////
  49. // Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers
  50. static void initClass()
  51. {
  52. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  53. _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
  54. }
  55. // Return a vector of all zeros
  56. static inline const LLVector4a& getZero()
  57. {
  58. extern const LLVector4a LL_V4A_ZERO;
  59. return LL_V4A_ZERO;
  60. }
  61. // Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks
  62. static inline const LLVector4a& getEpsilon()
  63. {
  64. extern const LLVector4a LL_V4A_EPSILON;
  65. return LL_V4A_EPSILON;
  66. }
  67. // Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned
  68. static inline void copy4a(F32* dst, const F32* src)
  69. {
  70. _mm_store_ps(dst, _mm_load_ps(src));
  71. }
  72. // Copy words 16-byte blocks from src to dst. Source and destination must not overlap.
  73. static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
  74. ////////////////////////////////////
  75. // CONSTRUCTORS
  76. ////////////////////////////////////
  77. LLVector4a()
  78. { //DO NOT INITIALIZE -- The overhead is completely unnecessary
  79. }
  80. LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
  81. {
  82. set(x,y,z,w);
  83. }
  84. LLVector4a(F32 x)
  85. {
  86. splat(x);
  87. }
  88. LLVector4a(const LLSimdScalar& x)
  89. {
  90. splat(x);
  91. }
  92. LLVector4a(LLQuad q)
  93. {
  94. mQ = q;
  95. }
  96. ////////////////////////////////////
  97. // LOAD/STORE
  98. ////////////////////////////////////
  99. // Load from 16-byte aligned src array (preferred method of loading)
  100. inline void load4a(const F32* src);
  101. // Load from unaligned src array (NB: Significantly slower than load4a)
  102. inline void loadua(const F32* src);
  103. // Load only three floats beginning at address 'src'. Slowest method.
  104. inline void load3(const F32* src);
  105. // Store to a 16-byte aligned memory address
  106. inline void store4a(F32* dst) const;
  107. ////////////////////////////////////
  108. // BASIC GET/SET
  109. ////////////////////////////////////
  110. // Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon)
  111. inline F32* getF32ptr();
  112. // Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon)
  113. inline const F32* const getF32ptr() const;
  114. // Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
  115. // the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead
  116. inline F32 operator[](const S32 idx) const;
  117. // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
  118. inline LLSimdScalar getScalarAt(const S32 idx) const;
  119. // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
  120. template <int N> LL_FORCE_INLINE LLSimdScalar getScalarAt() const;
  121. // Set to an x, y, z and optional w provided
  122. inline void set(F32 x, F32 y, F32 z, F32 w = 0.f);
  123. // Set to all zeros. This is preferred to using ::getZero()
  124. inline void clear();
  125. // Set all elements to 'x'
  126. inline void splat(const F32 x);
  127. // Set all elements to 'x'
  128. inline void splat(const LLSimdScalar& x);
  129. // Set all 4 elements to element N of src, with N known at compile time
  130. template <int N> void splat(const LLVector4a& src);
  131. // Set all 4 elements to element i of v, with i NOT known at compile time
  132. inline void splat(const LLVector4a& v, U32 i);
  133. // Select bits from sourceIfTrue and sourceIfFalse according to bits in mask
  134. inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse );
  135. ////////////////////////////////////
  136. // ALGEBRAIC
  137. ////////////////////////////////////
  138. // Set this to the element-wise (a + b)
  139. inline void setAdd(const LLVector4a& a, const LLVector4a& b);
  140. // Set this to element-wise (a - b)
  141. inline void setSub(const LLVector4a& a, const LLVector4a& b);
  142. // Set this to element-wise multiply (a * b)
  143. inline void setMul(const LLVector4a& a, const LLVector4a& b);
  144. // Set this to element-wise quotient (a / b)
  145. inline void setDiv(const LLVector4a& a, const LLVector4a& b);
  146. // Set this to the element-wise absolute value of src
  147. inline void setAbs(const LLVector4a& src);
  148. // Add to each component in this vector the corresponding component in rhs
  149. inline void add(const LLVector4a& rhs);
  150. // Subtract from each component in this vector the corresponding component in rhs
  151. inline void sub(const LLVector4a& rhs);
  152. // Multiply each component in this vector by the corresponding component in rhs
  153. inline void mul(const LLVector4a& rhs);
  154. // Divide each component in this vector by the corresponding component in rhs
  155. inline void div(const LLVector4a& rhs);
  156. // Multiply this vector by x in a scalar fashion
  157. inline void mul(const F32 x);
  158. // Set this to (a x b) (geometric cross-product)
  159. inline void setCross3(const LLVector4a& a, const LLVector4a& b);
  160. // Set all elements to the dot product of the x, y, and z elements in a and b
  161. inline void setAllDot3(const LLVector4a& a, const LLVector4a& b);
  162. // Set all elements to the dot product of the x, y, z, and w elements in a and b
  163. inline void setAllDot4(const LLVector4a& a, const LLVector4a& b);
  164. // Return the 3D dot product of this vector and b
  165. inline LLSimdScalar dot3(const LLVector4a& b) const;
  166. // Return the 4D dot product of this vector and b
  167. inline LLSimdScalar dot4(const LLVector4a& b) const;
  168. // Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
  169. // Note that this does not consider zero length vectors!
  170. inline void normalize3();
  171. // Same as normalize3() but with respect to all 4 components
  172. inline void normalize4();
  173. // Same as normalize3(), but returns length as a SIMD scalar
  174. inline LLSimdScalar normalize3withLength();
  175. // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
  176. // Note that this does not consider zero length vectors!
  177. inline void normalize3fast();
  178. // Return true if this vector is normalized with respect to x,y,z up to tolerance
  179. inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const;
  180. // Return true if this vector is normalized with respect to all components up to tolerance
  181. inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const;
  182. // Set all elements to the length of vector 'v'
  183. inline void setAllLength3( const LLVector4a& v );
  184. // Get this vector's length
  185. inline LLSimdScalar getLength3() const;
  186. // Set the components of this vector to the minimum of the corresponding components of lhs and rhs
  187. inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs);
  188. // Set the components of this vector to the maximum of the corresponding components of lhs and rhs
  189. inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs);
  190. // Clamps this vector to be within the component-wise range low to high (inclusive)
  191. inline void clamp( const LLVector4a& low, const LLVector4a& high );
  192. // Set this to (c * lhs) + rhs * ( 1 - c)
  193. inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c);
  194. // Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats
  195. inline LLBool32 isFinite3() const;
  196. inline LLBool32 isFinite4() const;
  197. // Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided
  198. void setRotated( const LLRotation& rot, const LLVector4a& vec );
  199. void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec );
  200. // Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided
  201. inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec );
  202. inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec );
  203. // Quantize this vector to 8 or 16 bit precision
  204. void quantize8( const LLVector4a& low, const LLVector4a& high );
  205. void quantize16( const LLVector4a& low, const LLVector4a& high );
  206. ////////////////////////////////////
  207. // LOGICAL
  208. ////////////////////////////////////
  209. // The functions in this section will compare the elements in this vector
  210. // to those in rhs and return an LLVector4Logical with all bits set in elements
  211. // where the comparison was true and all bits unset in elements where the comparison
  212. // was false. See llvector4logica.h
  213. ////////////////////////////////////
  214. // WARNING: Other than equals3 and equals4, these functions do NOT account
  215. // for floating point tolerance. You should include the appropriate tolerance
  216. // in the inputs.
  217. ////////////////////////////////////
  218. inline LLVector4Logical greaterThan(const LLVector4a& rhs) const;
  219. inline LLVector4Logical lessThan(const LLVector4a& rhs) const;
  220. inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const;
  221. inline LLVector4Logical lessEqual(const LLVector4a& rhs) const;
  222. inline LLVector4Logical equal(const LLVector4a& rhs) const;
  223. // Returns true if this and rhs are componentwise equal up to the specified absolute tolerance
  224. inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
  225. inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
  226. ////////////////////////////////////
  227. // OPERATORS
  228. ////////////////////////////////////
  229. // Do NOT add aditional operators without consulting someone with SSE experience
  230. inline const LLVector4a& operator= ( const LLVector4a& rhs );
  231. inline const LLVector4a& operator= ( const LLQuad& rhs );
  232. inline operator LLQuad() const;
  233. private:
  234. LLQuad mQ;
  235. };
  236. inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)
  237. {
  238. min.setMin(min, p);
  239. max.setMax(max, p);
  240. }
  241. #endif