PageRenderTime 28ms CodeModel.GetById 14ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/llmath/llvector4a.h

https://bitbucket.org/lindenlab/viewer-beta/
C++ Header | 324 lines | 117 code | 83 blank | 124 comment | 0 complexity | d7e09da02aa9521933e19d8f533439e1 MD5 | raw file
  1/** 
  2 * @file llvector4a.h
  3 * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector
  4 *
  5 * $LicenseInfo:firstyear=2010&license=viewerlgpl$
  6 * Second Life Viewer Source Code
  7 * Copyright (C) 2010, Linden Research, Inc.
  8 * 
  9 * This library is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU Lesser General Public
 11 * License as published by the Free Software Foundation;
 12 * version 2.1 of the License only.
 13 * 
 14 * This library is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 * Lesser General Public License for more details.
 18 * 
 19 * You should have received a copy of the GNU Lesser General Public
 20 * License along with this library; if not, write to the Free Software
 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 22 * 
 23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 24 * $/LicenseInfo$
 25 */
 26
 27#ifndef	LL_LLVECTOR4A_H
 28#define	LL_LLVECTOR4A_H
 29
 30
 31class LLRotation;
 32
 33#include <assert.h>
 34#include "llpreprocessor.h"
 35
 36///////////////////////////////////
 37// FIRST TIME USERS PLEASE READ
 38//////////////////////////////////
 39// This is just the beginning of LLVector4a. There are many more useful functions
 40// yet to be implemented. For example, setNeg to negate a vector, rotate() to apply
 41// a matrix rotation, various functions to manipulate only the X, Y, and Z elements
 42// and many others (including a whole variety of accessors). So if you don't see a 
 43// function here that you need, please contact Falcon or someone else with SSE 
 44// experience (Richard, I think, has some and davep has a little as of the time 
 45// of this writing, July 08, 2010) about getting it implemented before you resort to
 46// LLVector3/LLVector4. 
 47/////////////////////////////////
 48
 49class LLVector4a
 50{
 51public:
 52
 53	///////////////////////////////////
 54	// STATIC METHODS
 55	///////////////////////////////////
 56	
 57	// Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers
 58	static void initClass()
 59	{
 60		_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 61		_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
 62	}
 63
 64	// Return a vector of all zeros
 65	static inline const LLVector4a& getZero()
 66	{
 67		extern const LLVector4a LL_V4A_ZERO;
 68		return LL_V4A_ZERO;
 69	}
 70	
 71	// Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks
 72	static inline const LLVector4a& getEpsilon()
 73	{
 74		extern const LLVector4a LL_V4A_EPSILON;
 75		return LL_V4A_EPSILON;
 76	}
 77
 78	// Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned
 79	static inline void copy4a(F32* dst, const F32* src)
 80	{
 81		_mm_store_ps(dst, _mm_load_ps(src));
 82	}
 83
 84	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap. 
 85	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
 86
 87	////////////////////////////////////
 88	// CONSTRUCTORS 
 89	////////////////////////////////////
 90	
 91	LLVector4a()
 92	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary
 93	}
 94	
 95	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
 96	{
 97		set(x,y,z,w);
 98	}
 99	
100	LLVector4a(F32 x)
101	{
102		splat(x);
103	}
104	
105	LLVector4a(const LLSimdScalar& x)
106	{
107		splat(x);
108	}
109
110	LLVector4a(LLQuad q)
111	{
112		mQ = q;
113	}
114
115	////////////////////////////////////
116	// LOAD/STORE
117	////////////////////////////////////
118	
119	// Load from 16-byte aligned src array (preferred method of loading)
120	inline void load4a(const F32* src);
121	
122	// Load from unaligned src array (NB: Significantly slower than load4a)
123	inline void loadua(const F32* src);
124	
125	// Load only three floats beginning at address 'src'. Slowest method.
126	inline void load3(const F32* src);
127	
128	// Store to a 16-byte aligned memory address
129	inline void store4a(F32* dst) const;
130	
131	////////////////////////////////////
132	// BASIC GET/SET 
133	////////////////////////////////////
134	
135	// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
136	inline F32* getF32ptr();
137	
138	// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
139	inline const F32* const getF32ptr() const;
140	
141	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
142	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead
143	inline F32 operator[](const S32 idx) const;
144
145	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
146	inline LLSimdScalar getScalarAt(const S32 idx) const;
147
148	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
149	template <int N> LL_FORCE_INLINE LLSimdScalar getScalarAt() const;
150
151	// Set to an x, y, z and optional w provided
152	inline void set(F32 x, F32 y, F32 z, F32 w = 0.f);
153	
154	// Set to all zeros. This is preferred to using ::getZero()
155	inline void clear();
156	
157	// Set all elements to 'x'
158	inline void splat(const F32 x);
159
160	// Set all elements to 'x'
161	inline void splat(const LLSimdScalar& x);
162	
163	// Set all 4 elements to element N of src, with N known at compile time
164	template <int N> void splat(const LLVector4a& src);
165	
166	// Set all 4 elements to element i of v, with i NOT known at compile time
167	inline void splat(const LLVector4a& v, U32 i);
168	
169	// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask
170	inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse );
171	
172	////////////////////////////////////
173	// ALGEBRAIC
174	////////////////////////////////////
175	
176	// Set this to the element-wise (a + b)
177	inline void setAdd(const LLVector4a& a, const LLVector4a& b);
178	
179	// Set this to element-wise (a - b)
180	inline void setSub(const LLVector4a& a, const LLVector4a& b);
181	
182	// Set this to element-wise multiply (a * b)
183	inline void setMul(const LLVector4a& a, const LLVector4a& b);
184	
185	// Set this to element-wise quotient (a / b)
186	inline void setDiv(const LLVector4a& a, const LLVector4a& b);
187	
188	// Set this to the element-wise absolute value of src
189	inline void setAbs(const LLVector4a& src);
190	
191	// Add to each component in this vector the corresponding component in rhs
192	inline void add(const LLVector4a& rhs);
193	
194	// Subtract from each component in this vector the corresponding component in rhs
195	inline void sub(const LLVector4a& rhs);
196	
197	// Multiply each component in this vector by the corresponding component in rhs
198	inline void mul(const LLVector4a& rhs);
199	
200	// Divide each component in this vector by the corresponding component in rhs
201	inline void div(const LLVector4a& rhs);
202	
203	// Multiply this vector by x in a scalar fashion
204	inline void mul(const F32 x);
205
206	// Set this to (a x b) (geometric cross-product)
207	inline void setCross3(const LLVector4a& a, const LLVector4a& b);
208	
209	// Set all elements to the dot product of the x, y, and z elements in a and b
210	inline void setAllDot3(const LLVector4a& a, const LLVector4a& b);
211
212	// Set all elements to the dot product of the x, y, z, and w elements in a and b
213	inline void setAllDot4(const LLVector4a& a, const LLVector4a& b);
214
215	// Return the 3D dot product of this vector and b
216	inline LLSimdScalar dot3(const LLVector4a& b) const;
217
218	// Return the 4D dot product of this vector and b
219	inline LLSimdScalar dot4(const LLVector4a& b) const;
220
221	// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
222	// Note that this does not consider zero length vectors!
223	inline void normalize3();
224
225	// Same as normalize3() but with respect to all 4 components
226	inline void normalize4();
227
228	// Same as normalize3(), but returns length as a SIMD scalar
229	inline LLSimdScalar normalize3withLength();
230
231	// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
232	// Note that this does not consider zero length vectors!
233	inline void normalize3fast();
234
235	// Return true if this vector is normalized with respect to x,y,z up to tolerance
236	inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const;
237
238	// Return true if this vector is normalized with respect to all components up to tolerance
239	inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const;
240
241	// Set all elements to the length of vector 'v' 
242	inline void setAllLength3( const LLVector4a& v );
243
244	// Get this vector's length
245	inline LLSimdScalar getLength3() const;
246	
247	// Set the components of this vector to the minimum of the corresponding components of lhs and rhs
248	inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs);
249	
250	// Set the components of this vector to the maximum of the corresponding components of lhs and rhs
251	inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs);
252	
253	// Clamps this vector to be within the component-wise range low to high (inclusive)
254	inline void clamp( const LLVector4a& low, const LLVector4a& high );
255
256	// Set this to  (c * lhs) + rhs * ( 1 - c)
257	inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c);
258	
259	// Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats
260	inline LLBool32 isFinite3() const;	
261	inline LLBool32 isFinite4() const;
262
263	// Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided
264	void setRotated( const LLRotation& rot, const LLVector4a& vec );
265	void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec );
266
267	// Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided
268	inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec );
269	inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec );
270
271	// Quantize this vector to 8 or 16 bit precision
272	void quantize8( const LLVector4a& low, const LLVector4a& high );
273	void quantize16( const LLVector4a& low, const LLVector4a& high );
274
275	////////////////////////////////////
276	// LOGICAL
277	////////////////////////////////////	
278	// The functions in this section will compare the elements in this vector
279	// to those in rhs and return an LLVector4Logical with all bits set in elements
280	// where the comparison was true and all bits unset in elements where the comparison
281	// was false. See llvector4logica.h
282	////////////////////////////////////
283	// WARNING: Other than equals3 and equals4, these functions do NOT account
284	// for floating point tolerance. You should include the appropriate tolerance
285	// in the inputs.
286	////////////////////////////////////
287	
288	inline LLVector4Logical greaterThan(const LLVector4a& rhs) const;
289
290	inline LLVector4Logical lessThan(const LLVector4a& rhs) const;
291	
292	inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const;
293
294	inline LLVector4Logical lessEqual(const LLVector4a& rhs) const;
295	
296	inline LLVector4Logical equal(const LLVector4a& rhs) const;
297
298	// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance
299	inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
300
301	inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
302
303	////////////////////////////////////
304	// OPERATORS
305	////////////////////////////////////	
306	
307	// Do NOT add aditional operators without consulting someone with SSE experience
308	inline const LLVector4a& operator= ( const LLVector4a& rhs );
309	
310	inline const LLVector4a& operator= ( const LLQuad& rhs );
311
312	inline operator LLQuad() const;	
313
314private:
315	LLQuad mQ;
316};
317
318inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)
319{
320	min.setMin(min, p);
321	max.setMax(max, p);
322}
323
324#endif