PageRenderTime 70ms CodeModel.GetById 18ms app.highlight 46ms RepoModel.GetById 2ms app.codeStats 0ms

/indra/llmath/llv4matrix3.h

https://bitbucket.org/lindenlab/viewer-beta/
C Header | 220 lines | 135 code | 40 blank | 45 comment | 1 complexity | 3e9ae5e7f75f8843a2a53865e6d4559b MD5 | raw file
  1/** 
  2 * @file llviewerjointmesh.cpp
  3 * @brief LLV4* class header file - vector processor enabled math
  4 *
  5 * $LicenseInfo:firstyear=2007&license=viewerlgpl$
  6 * Second Life Viewer Source Code
  7 * Copyright (C) 2010, Linden Research, Inc.
  8 * 
  9 * This library is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU Lesser General Public
 11 * License as published by the Free Software Foundation;
 12 * version 2.1 of the License only.
 13 * 
 14 * This library is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17 * Lesser General Public License for more details.
 18 * 
 19 * You should have received a copy of the GNU Lesser General Public
 20 * License along with this library; if not, write to the Free Software
 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 22 * 
 23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 24 * $/LicenseInfo$
 25 */
 26
 27#ifndef LL_LLV4MATRIX3_H
 28#define LL_LLV4MATRIX3_H
 29
 30#include "llv4math.h"
 31#include "llv4vector3.h"
 32#include "m3math.h"			// for operator LLMatrix3()
 33
 34//-----------------------------------------------------------------------------
 35//-----------------------------------------------------------------------------
 36// LLV4Matrix3
 37//-----------------------------------------------------------------------------
 38//-----------------------------------------------------------------------------
 39
 40LL_LLV4MATH_ALIGN_PREFIX
 41
 42class LLV4Matrix3
 43{
 44public:
 45	union {
 46		F32		mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS];
 47		V4F32	mV[LLV4_NUM_AXIS];
 48	};
 49
 50	void				lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w);
 51	void				multiply(const LLVector3 &a, LLVector3& out) const;
 52	void				multiply(const LLVector4 &a, LLV4Vector3& out) const;
 53	void				multiply(const LLVector3 &a, LLV4Vector3& out) const;
 54
 55	const LLV4Matrix3&	transpose();
 56	const LLV4Matrix3&	operator=(const LLMatrix3& a);
 57
 58	operator			LLMatrix3()	const { return (reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0])))->getMat3(); }
 59
 60	friend LLVector3	operator*(const LLVector3& a, const LLV4Matrix3& b);
 61}
 62
 63LL_LLV4MATH_ALIGN_POSTFIX;
 64
 65
 66
 67//-----------------------------------------------------------------------------
 68//-----------------------------------------------------------------------------
 69// LLV4Matrix3 - SSE
 70//-----------------------------------------------------------------------------
 71//-----------------------------------------------------------------------------
 72
 73#if LL_VECTORIZE
 74
 75inline void LLV4Matrix3::lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w)
 76{
 77	__m128 vw = _mm_set1_ps(w);
 78	mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]); // ( b - a ) * w + a
 79	mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]);
 80	mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]);
 81}
 82
 83inline void LLV4Matrix3::multiply(const LLVector3 &a, LLVector3& o) const
 84{
 85	LLV4Vector3 j;
 86	j.v = 				 	_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ...
 87	j.v = _mm_add_ps(j.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY]));
 88	j.v = _mm_add_ps(j.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ]));
 89	o.setVec(j.mV);
 90}
 91
 92inline void LLV4Matrix3::multiply(const LLVector4 &a, LLV4Vector3& o) const
 93{
 94	o.v =					_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ...
 95	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY]));
 96	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ]));
 97}
 98
 99inline void LLV4Matrix3::multiply(const LLVector3 &a, LLV4Vector3& o) const
100{
101	o.v =					_mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]); // ( ax * vx ) + ...
102	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY]));
103	o.v = _mm_add_ps(o.v  , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ]));
104}
105
106//-----------------------------------------------------------------------------
107//-----------------------------------------------------------------------------
108// LLV4Matrix3
109//-----------------------------------------------------------------------------
110//-----------------------------------------------------------------------------
111
112#else
113
114inline void LLV4Matrix3::lerp(const LLV4Matrix3 &a, const LLV4Matrix3 &b, const F32 &w)
115{
116	mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w);
117	mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w);
118	mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w);
119
120	mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w);
121	mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w);
122	mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w);
123
124	mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w);
125	mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w);
126	mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w);
127}
128
129inline void LLV4Matrix3::multiply(const LLVector3 &a, LLVector3& o) const
130{
131	o.setVec(		a.mV[VX] * mMatrix[VX][VX] + 
132					a.mV[VY] * mMatrix[VY][VX] + 
133					a.mV[VZ] * mMatrix[VZ][VX],
134					 
135					a.mV[VX] * mMatrix[VX][VY] + 
136					a.mV[VY] * mMatrix[VY][VY] + 
137					a.mV[VZ] * mMatrix[VZ][VY],
138					 
139					a.mV[VX] * mMatrix[VX][VZ] + 
140					a.mV[VY] * mMatrix[VY][VZ] + 
141					a.mV[VZ] * mMatrix[VZ][VZ]);
142}
143
144inline void LLV4Matrix3::multiply(const LLVector4 &a, LLV4Vector3& o) const
145{
146	o.setVec(		a.mV[VX] * mMatrix[VX][VX] + 
147					a.mV[VY] * mMatrix[VY][VX] + 
148					a.mV[VZ] * mMatrix[VZ][VX],
149					 
150					a.mV[VX] * mMatrix[VX][VY] + 
151					a.mV[VY] * mMatrix[VY][VY] + 
152					a.mV[VZ] * mMatrix[VZ][VY],
153					 
154					a.mV[VX] * mMatrix[VX][VZ] + 
155					a.mV[VY] * mMatrix[VY][VZ] + 
156					a.mV[VZ] * mMatrix[VZ][VZ]);
157}
158
159inline void LLV4Matrix3::multiply(const LLVector3 &a, LLV4Vector3& o) const
160{
161	o.setVec(		a.mV[VX] * mMatrix[VX][VX] + 
162					a.mV[VY] * mMatrix[VY][VX] + 
163					a.mV[VZ] * mMatrix[VZ][VX],
164					 
165					a.mV[VX] * mMatrix[VX][VY] + 
166					a.mV[VY] * mMatrix[VY][VY] + 
167					a.mV[VZ] * mMatrix[VZ][VY],
168					 
169					a.mV[VX] * mMatrix[VX][VZ] + 
170					a.mV[VY] * mMatrix[VY][VZ] + 
171					a.mV[VZ] * mMatrix[VZ][VZ]);
172}
173
174//-----------------------------------------------------------------------------
175//-----------------------------------------------------------------------------
176// LLV4Matrix3
177//-----------------------------------------------------------------------------
178//-----------------------------------------------------------------------------
179
180#endif
181
182inline const LLV4Matrix3&	LLV4Matrix3::transpose()
183{
184#if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS)
185	_MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]);
186	return *this;
187#else
188	F32 temp;
189	temp = mMatrix[VX][VY]; mMatrix[VX][VY] = mMatrix[VY][VX]; mMatrix[VY][VX] = temp;
190	temp = mMatrix[VX][VZ]; mMatrix[VX][VZ] = mMatrix[VZ][VX]; mMatrix[VZ][VX] = temp;
191	temp = mMatrix[VY][VZ]; mMatrix[VY][VZ] = mMatrix[VZ][VY]; mMatrix[VZ][VY] = temp;
192#endif
193	return *this;
194}
195
196inline const LLV4Matrix3& LLV4Matrix3::operator=(const LLMatrix3& a)
197{
198	memcpy(mMatrix[VX], a.mMatrix[VX], sizeof(F32) * 3 );
199	memcpy(mMatrix[VY], a.mMatrix[VY], sizeof(F32) * 3 );
200	memcpy(mMatrix[VZ], a.mMatrix[VZ], sizeof(F32) * 3 );
201	return *this;
202}
203
204inline LLVector3 operator*(const LLVector3& a, const LLV4Matrix3& b)
205{
206	return LLVector3(
207				a.mV[VX] * b.mMatrix[VX][VX] + 
208				a.mV[VY] * b.mMatrix[VY][VX] + 
209				a.mV[VZ] * b.mMatrix[VZ][VX],
210	
211				a.mV[VX] * b.mMatrix[VX][VY] + 
212				a.mV[VY] * b.mMatrix[VY][VY] + 
213				a.mV[VZ] * b.mMatrix[VZ][VY],
214	
215				a.mV[VX] * b.mMatrix[VX][VZ] + 
216				a.mV[VY] * b.mMatrix[VY][VZ] + 
217				a.mV[VZ] * b.mMatrix[VZ][VZ] );
218}
219
220#endif