--- /dev/null
+/************************************************************************
+* *
+* xnamathmisc.inl -- SIMD C++ Math library for Windows and Xbox 360 *
+* Quaternion, plane, and color functions *
+* *
+* Copyright (c) Microsoft Corp. All rights reserved. *
+* *
+************************************************************************/
+
+#if defined(_MSC_VER) && (_MSC_VER > 1000)
+#pragma once
+#endif
+
+#ifndef __XNAMATHMISC_INL__
+#define __XNAMATHMISC_INL__
+
+/****************************************************************************
+ *
+ * Quaternion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMQuaternionEqual
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4Equal(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMQuaternionNotEqual
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4NotEqual(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMQuaternionIsNaN
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4IsNaN(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMQuaternionIsInfinite
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4IsInfinite(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMQuaternionIsIdentity
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return XMVector4Equal(Q, g_XMIdentityR3.v);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
+ return (_mm_movemask_ps(vTemp)==0x0f);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionDot
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4Dot(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionMultiply
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR NegativeQ1;
+ XMVECTOR Q2X;
+ XMVECTOR Q2Y;
+ XMVECTOR Q2Z;
+ XMVECTOR Q2W;
+ XMVECTOR Q1WZYX;
+ XMVECTOR Q1ZWXY;
+ XMVECTOR Q1YXWZ;
+ XMVECTOR Result;
+ CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
+ CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
+ CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
+
+ NegativeQ1 = XMVectorNegate(Q1);
+
+ Q2W = XMVectorSplatW(Q2);
+ Q2X = XMVectorSplatX(Q2);
+ Q2Y = XMVectorSplatY(Q2);
+ Q2Z = XMVectorSplatZ(Q2);
+
+ Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
+ Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
+ Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
+
+ Result = XMVectorMultiply(Q1, Q2W);
+ Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
+ Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
+ Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
+ static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
+ static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
+ // Copy to SSE registers and use as few as possible for x86
+ XMVECTOR Q2X = Q2;
+ XMVECTOR Q2Y = Q2;
+ XMVECTOR Q2Z = Q2;
+ XMVECTOR vResult = Q2;
+ // Splat with one instruction
+ vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
+ Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
+ Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
+ Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
+ // Retire Q1 and perform Q1*Q2W
+ vResult = _mm_mul_ps(vResult,Q1);
+ XMVECTOR Q1Shuffle = Q1;
+ // Shuffle the copies of Q1
+ Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+ // Mul by Q1WZYX
+ Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
+ Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
+ // Flip the signs on y and z
+ Q2X = _mm_mul_ps(Q2X,ControlWZYX);
+ // Mul by Q1ZWXY
+ Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
+ Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+ // Flip the signs on z and w
+ Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
+ // Mul by Q1YXWZ
+ Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
+ vResult = _mm_add_ps(vResult,Q2X);
+ // Flip the signs on x and w
+ Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
+ Q2Y = _mm_add_ps(Q2Y,Q2Z);
+ vResult = _mm_add_ps(vResult,Q2Y);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionLengthSq
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4LengthSq(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionReciprocalLength
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4ReciprocalLength(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionLength
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4Length(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionNormalizeEst
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4NormalizeEst(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionNormalize
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4Normalize(Q);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionConjugate
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result = {
+ -Q.vector4_f32[0],
+ -Q.vector4_f32[1],
+ -Q.vector4_f32[2],
+ Q.vector4_f32[3]
+ };
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
+ XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionInverse
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Conjugate;
+ XMVECTOR L;
+ XMVECTOR Control;
+ XMVECTOR Result;
+ CONST XMVECTOR Zero = XMVectorZero();
+
+ L = XMVector4LengthSq(Q);
+ Conjugate = XMQuaternionConjugate(Q);
+
+ Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
+
+ L = XMVectorReciprocal(L);
+ Result = XMVectorMultiply(Conjugate, L);
+
+ Result = XMVectorSelect(Result, Zero, Control);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Conjugate;
+ XMVECTOR L;
+ XMVECTOR Control;
+ XMVECTOR Result;
+ XMVECTOR Zero = XMVectorZero();
+
+ L = XMVector4LengthSq(Q);
+ Conjugate = XMQuaternionConjugate(Q);
+ Control = XMVectorLessOrEqual(L, g_XMEpsilon);
+ Result = _mm_div_ps(Conjugate,L);
+ Result = XMVectorSelect(Result, Zero, Control);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionLn
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Q0;
+ XMVECTOR QW;
+ XMVECTOR Theta;
+ XMVECTOR SinTheta;
+ XMVECTOR S;
+ XMVECTOR ControlW;
+ XMVECTOR Result;
+ static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+ QW = XMVectorSplatW(Q);
+ Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
+
+ ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
+
+ Theta = XMVectorACos(QW);
+ SinTheta = XMVectorSin(Theta);
+
+ S = XMVectorReciprocal(SinTheta);
+ S = XMVectorMultiply(Theta, S);
+
+ Result = XMVectorMultiply(Q0, S);
+
+ Result = XMVectorSelect(Q0, Result, ControlW);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+ static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
+ // Get W only
+ XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
+ // W = 0
+ XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
+ // Use W if within bounds
+ XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
+ XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
+ ControlW = _mm_and_ps(ControlW,vTemp2);
+ // Get theta
+ XMVECTOR vTheta = XMVectorACos(QW);
+ // Get Sine of theta
+ vTemp2 = XMVectorSin(vTheta);
+ // theta/sine of theta
+ vTheta = _mm_div_ps(vTheta,vTemp2);
+ // Here's the answer
+ vTheta = _mm_mul_ps(vTheta,Q0);
+ // Was W in bounds? If not, return input as is
+ vTheta = XMVectorSelect(Q0,vTheta,ControlW);
+ return vTheta;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionExp
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Theta;
+ XMVECTOR SinTheta;
+ XMVECTOR CosTheta;
+ XMVECTOR S;
+ XMVECTOR Control;
+ XMVECTOR Zero;
+ XMVECTOR Result;
+
+ Theta = XMVector3Length(Q);
+ XMVectorSinCos(&SinTheta, &CosTheta, Theta);
+
+ S = XMVectorReciprocal(Theta);
+ S = XMVectorMultiply(SinTheta, S);
+
+ Result = XMVectorMultiply(Q, S);
+
+ Zero = XMVectorZero();
+ Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
+ Result = XMVectorSelect(Result, Q, Control);
+
+ Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Theta;
+ XMVECTOR SinTheta;
+ XMVECTOR CosTheta;
+ XMVECTOR S;
+ XMVECTOR Control;
+ XMVECTOR Zero;
+ XMVECTOR Result;
+ Theta = XMVector3Length(Q);
+ XMVectorSinCos(&SinTheta, &CosTheta, Theta);
+ S = _mm_div_ps(SinTheta,Theta);
+ Result = _mm_mul_ps(Q, S);
+ Zero = XMVectorZero();
+ Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
+ Result = XMVectorSelect(Result,Q,Control);
+ Result = _mm_and_ps(Result,g_XMMask3);
+ CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
+ Result = _mm_or_ps(Result,CosTheta);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE XMVECTOR XMQuaternionSlerp
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FLOAT t
+)
+{
+ XMVECTOR T = XMVectorReplicate(t);
+ return XMQuaternionSlerpV(Q0, Q1, T);
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE XMVECTOR XMQuaternionSlerpV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR T
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
+ XMVECTOR Omega;
+ XMVECTOR CosOmega;
+ XMVECTOR SinOmega;
+ XMVECTOR InvSinOmega;
+ XMVECTOR V01;
+ XMVECTOR C1000;
+ XMVECTOR SignMask;
+ XMVECTOR S0;
+ XMVECTOR S1;
+ XMVECTOR Sign;
+ XMVECTOR Control;
+ XMVECTOR Result;
+ XMVECTOR Zero;
+ CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+ XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
+
+ CosOmega = XMQuaternionDot(Q0, Q1);
+
+ Zero = XMVectorZero();
+ Control = XMVectorLess(CosOmega, Zero);
+ Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
+
+ CosOmega = XMVectorMultiply(CosOmega, Sign);
+
+ Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+ SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
+ SinOmega = XMVectorSqrt(SinOmega);
+
+ Omega = XMVectorATan2(SinOmega, CosOmega);
+
+ SignMask = XMVectorSplatSignMask();
+ C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
+ V01 = XMVectorShiftLeft(T, Zero, 2);
+ SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
+ V01 = XMVectorXorInt(V01, SignMask);
+ V01 = XMVectorAdd(C1000, V01);
+
+ InvSinOmega = XMVectorReciprocal(SinOmega);
+
+ S0 = XMVectorMultiply(V01, Omega);
+ S0 = XMVectorSin(S0);
+ S0 = XMVectorMultiply(S0, InvSinOmega);
+
+ S0 = XMVectorSelect(V01, S0, Control);
+
+ S1 = XMVectorSplatY(S0);
+ S0 = XMVectorSplatX(S0);
+
+ S1 = XMVectorMultiply(S1, Sign);
+
+ Result = XMVectorMultiply(Q0, S0);
+ Result = XMVectorMultiplyAdd(Q1, S1, Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
+ XMVECTOR Omega;
+ XMVECTOR CosOmega;
+ XMVECTOR SinOmega;
+ XMVECTOR V01;
+ XMVECTOR S0;
+ XMVECTOR S1;
+ XMVECTOR Sign;
+ XMVECTOR Control;
+ XMVECTOR Result;
+ XMVECTOR Zero;
+ static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+ static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
+ static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
+
+ XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
+
+ CosOmega = XMQuaternionDot(Q0, Q1);
+
+ Zero = XMVectorZero();
+ Control = XMVectorLess(CosOmega, Zero);
+ Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
+
+ CosOmega = _mm_mul_ps(CosOmega, Sign);
+
+ Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+ SinOmega = _mm_mul_ps(CosOmega,CosOmega);
+ SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
+ SinOmega = _mm_sqrt_ps(SinOmega);
+
+ Omega = XMVectorATan2(SinOmega, CosOmega);
+
+ V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
+ V01 = _mm_and_ps(V01,MaskXY);
+ V01 = _mm_xor_ps(V01,SignMask2);
+ V01 = _mm_add_ps(g_XMIdentityR0, V01);
+
+ S0 = _mm_mul_ps(V01, Omega);
+ S0 = XMVectorSin(S0);
+ S0 = _mm_div_ps(S0, SinOmega);
+
+ S0 = XMVectorSelect(V01, S0, Control);
+
+ S1 = XMVectorSplatY(S0);
+ S0 = XMVectorSplatX(S0);
+
+ S1 = _mm_mul_ps(S1, Sign);
+ Result = _mm_mul_ps(Q0, S0);
+ S1 = _mm_mul_ps(S1, Q1);
+ Result = _mm_add_ps(Result,S1);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionSquad
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ CXMVECTOR Q3,
+ FLOAT t
+)
+{
+ XMVECTOR T = XMVectorReplicate(t);
+ return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionSquadV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ CXMVECTOR Q3,
+ CXMVECTOR T
+)
+{
+ XMVECTOR Q03;
+ XMVECTOR Q12;
+ XMVECTOR TP;
+ XMVECTOR Two;
+ XMVECTOR Result;
+
+ XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
+
+ TP = T;
+ Two = XMVectorSplatConstant(2, 0);
+
+ Q03 = XMQuaternionSlerpV(Q0, Q3, T);
+ Q12 = XMQuaternionSlerpV(Q1, Q2, T);
+
+ TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
+ TP = XMVectorMultiply(TP, Two);
+
+ Result = XMQuaternionSlerpV(Q03, Q12, TP);
+
+ return Result;
+
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE VOID XMQuaternionSquadSetup
+(
+ XMVECTOR* pA,
+ XMVECTOR* pB,
+ XMVECTOR* pC,
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ CXMVECTOR Q3
+)
+{
+ XMVECTOR SQ0, SQ2, SQ3;
+ XMVECTOR InvQ1, InvQ2;
+ XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
+ XMVECTOR ExpQ02, ExpQ13;
+ XMVECTOR LS01, LS12, LS23;
+ XMVECTOR LD01, LD12, LD23;
+ XMVECTOR Control0, Control1, Control2;
+ XMVECTOR NegativeOneQuarter;
+
+ XMASSERT(pA);
+ XMASSERT(pB);
+ XMASSERT(pC);
+
+ LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
+ LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
+ SQ2 = XMVectorNegate(Q2);
+
+ Control1 = XMVectorLess(LS12, LD12);
+ SQ2 = XMVectorSelect(Q2, SQ2, Control1);
+
+ LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
+ LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
+ SQ0 = XMVectorNegate(Q0);
+
+ LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
+ LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
+ SQ3 = XMVectorNegate(Q3);
+
+ Control0 = XMVectorLess(LS01, LD01);
+ Control2 = XMVectorLess(LS23, LD23);
+
+ SQ0 = XMVectorSelect(Q0, SQ0, Control0);
+ SQ3 = XMVectorSelect(Q3, SQ3, Control2);
+
+ InvQ1 = XMQuaternionInverse(Q1);
+ InvQ2 = XMQuaternionInverse(SQ2);
+
+ LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
+ LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
+ LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
+ LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
+
+ NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
+
+ ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
+ ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
+ ExpQ02 = XMQuaternionExp(ExpQ02);
+ ExpQ13 = XMQuaternionExp(ExpQ13);
+
+ *pA = XMQuaternionMultiply(Q1, ExpQ02);
+ *pB = XMQuaternionMultiply(SQ2, ExpQ13);
+ *pC = SQ2;
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionBaryCentric
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ FLOAT f,
+ FLOAT g
+)
+{
+ XMVECTOR Q01;
+ XMVECTOR Q02;
+ FLOAT s;
+ XMVECTOR Result;
+
+ s = f + g;
+
+ if ((s < 0.00001f) && (s > -0.00001f))
+ {
+ Result = Q0;
+ }
+ else
+ {
+ Q01 = XMQuaternionSlerp(Q0, Q1, s);
+ Q02 = XMQuaternionSlerp(Q0, Q2, s);
+
+ Result = XMQuaternionSlerp(Q01, Q02, g / s);
+ }
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionBaryCentricV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ CXMVECTOR F,
+ CXMVECTOR G
+)
+{
+ XMVECTOR Q01;
+ XMVECTOR Q02;
+ XMVECTOR S, GS;
+ XMVECTOR Epsilon;
+ XMVECTOR Result;
+
+ XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
+ XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
+
+ Epsilon = XMVectorSplatConstant(1, 16);
+
+ S = XMVectorAdd(F, G);
+
+ if (XMVector4InBounds(S, Epsilon))
+ {
+ Result = Q0;
+ }
+ else
+ {
+ Q01 = XMQuaternionSlerpV(Q0, Q1, S);
+ Q02 = XMQuaternionSlerpV(Q0, Q2, S);
+ GS = XMVectorReciprocal(S);
+ GS = XMVectorMultiply(G, GS);
+
+ Result = XMQuaternionSlerpV(Q01, Q02, GS);
+ }
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+// Transformation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionIdentity()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return g_XMIdentityR3.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ return g_XMIdentityR3;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
+(
+ FLOAT Pitch,
+ FLOAT Yaw,
+ FLOAT Roll
+)
+{
+ XMVECTOR Angles;
+ XMVECTOR Q;
+
+ Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
+ Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
+
+ return Q;
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
+(
+ FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Q, Q0, Q1;
+ XMVECTOR P0, P1, Y0, Y1, R0, R1;
+ XMVECTOR HalfAngles;
+ XMVECTOR SinAngles, CosAngles;
+ static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
+ static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
+ static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
+ static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f};
+
+ HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
+ XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
+
+ P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
+ Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
+ R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
+ P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
+ Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
+ R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
+
+ Q1 = XMVectorMultiply(P1, Sign);
+ Q0 = XMVectorMultiply(P0, Y0);
+ Q1 = XMVectorMultiply(Q1, Y1);
+ Q0 = XMVectorMultiply(Q0, R0);
+ Q = XMVectorMultiplyAdd(Q1, R1, Q0);
+
+ return Q;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Q, Q0, Q1;
+ XMVECTOR P0, P1, Y0, Y1, R0, R1;
+ XMVECTOR HalfAngles;
+ XMVECTOR SinAngles, CosAngles;
+ static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
+ static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
+ static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
+ static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
+
+ HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
+ XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
+
+ P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
+ Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
+ R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
+ P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
+ Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
+ R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
+
+ Q1 = _mm_mul_ps(P1, Sign);
+ Q0 = _mm_mul_ps(P0, Y0);
+ Q1 = _mm_mul_ps(Q1, Y1);
+ Q0 = _mm_mul_ps(Q0, R0);
+ Q = _mm_mul_ps(Q1, R1);
+ Q = _mm_add_ps(Q,Q0);
+ return Q;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionRotationNormal
+(
+ FXMVECTOR NormalAxis,
+ FLOAT Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Q;
+ XMVECTOR N;
+ XMVECTOR Scale;
+
+ N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
+
+ XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
+
+ Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
+
+ Q = XMVectorMultiply(N, Scale);
+
+ return Q;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
+ N = _mm_or_ps(N,g_XMIdentityR3);
+ XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
+ XMVECTOR vSine;
+ XMVECTOR vCosine;
+ XMVectorSinCos(&vSine,&vCosine,Scale);
+ Scale = _mm_and_ps(vSine,g_XMMask3);
+ vCosine = _mm_and_ps(vCosine,g_XMMaskW);
+ Scale = _mm_or_ps(Scale,vCosine);
+ N = _mm_mul_ps(N,Scale);
+ return N;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMQuaternionRotationAxis
+(
+ FXMVECTOR Axis,
+ FLOAT Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Normal;
+ XMVECTOR Q;
+
+ XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
+ XMASSERT(!XMVector3IsInfinite(Axis));
+
+ Normal = XMVector3Normalize(Axis);
+ Q = XMQuaternionRotationNormal(Normal, Angle);
+
+ return Q;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Normal;
+ XMVECTOR Q;
+
+ XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
+ XMASSERT(!XMVector3IsInfinite(Axis));
+
+ Normal = XMVector3Normalize(Axis);
+ Q = XMQuaternionRotationNormal(Normal, Angle);
+ return Q;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE XMVECTOR XMQuaternionRotationMatrix
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
+
+ XMVECTOR Q0, Q1, Q2;
+ XMVECTOR M00, M11, M22;
+ XMVECTOR CQ0, CQ1, C;
+ XMVECTOR CX, CY, CZ, CW;
+ XMVECTOR SQ1, Scale;
+ XMVECTOR Rsq, Sqrt, VEqualsNaN;
+ XMVECTOR A, B, P;
+ XMVECTOR PermuteSplat, PermuteSplatT;
+ XMVECTOR SignB, SignBT;
+ XMVECTOR PermuteControl, PermuteControlT;
+ XMVECTOR Result;
+ static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
+ static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
+ static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
+ static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
+ static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
+ static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
+ static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
+ static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
+ static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
+ static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
+ static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
+ static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
+ static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
+ static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
+ static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
+ static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
+ static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
+ static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
+ static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
+ static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
+
+ M00 = XMVectorSplatX(M.r[0]);
+ M11 = XMVectorSplatY(M.r[1]);
+ M22 = XMVectorSplatZ(M.r[2]);
+
+ Q0 = XMVectorMultiply(SignPNNP.v, M00);
+ Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);
+ Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);
+
+ Q1 = XMVectorAdd(Q0, g_XMOne.v);
+
+ Rsq = XMVectorReciprocalSqrt(Q1);
+ VEqualsNaN = XMVectorIsNaN(Rsq);
+ Sqrt = XMVectorMultiply(Q1, Rsq);
+ Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);
+
+ Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
+
+ SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
+
+ CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
+ CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);
+ C = XMVectorGreaterOrEqual(CQ0, CQ1);
+
+ CX = XMVectorSplatX(C);
+ CY = XMVectorSplatY(C);
+ CZ = XMVectorSplatZ(C);
+ CW = XMVectorSplatW(C);
+
+ PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
+ SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);
+ PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
+
+ PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
+ SignB = XMVectorSelect(SignB, SignNPPP.v, CX);
+ PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
+
+ PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
+ SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);
+ PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
+
+ PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
+ SignB = XMVectorSelect(SignB, SignBT, CX);
+ PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
+
+ PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
+ SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);
+ PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
+
+ Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
+
+ P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21}
+ A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03}
+ B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03}
+
+ Q2 = XMVectorMultiplyAdd(SignB, B, A);
+ Q2 = XMVectorMultiply(Q2, Scale);
+
+ Result = XMVectorPermute(Q1, Q2, PermuteControl);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE VOID XMQuaternionToAxisAngle
+(
+ XMVECTOR* pAxis,
+ FLOAT* pAngle,
+ FXMVECTOR Q
+)
+{
+ XMASSERT(pAxis);
+ XMASSERT(pAngle);
+
+ *pAxis = Q;
+
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ *pAngle = 2.0f * acosf(XMVectorGetW(Q));
+#else
+ *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
+#endif
+}
+
+/****************************************************************************
+ *
+ * Plane
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMPlaneEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+ return XMVector4Equal(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMPlaneNearEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2,
+ FXMVECTOR Epsilon
+)
+{
+ XMVECTOR NP1 = XMPlaneNormalize(P1);
+ XMVECTOR NP2 = XMPlaneNormalize(P2);
+ return XMVector4NearEqual(NP1, NP2, Epsilon);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMPlaneNotEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+ return XMVector4NotEqual(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMPlaneIsNaN
+(
+ FXMVECTOR P
+)
+{
+ return XMVector4IsNaN(P);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMPlaneIsInfinite
+(
+ FXMVECTOR P
+)
+{
+ return XMVector4IsInfinite(P);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneDot
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return XMVector4Dot(P, V);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 vTemp2 = V;
+ __m128 vTemp = _mm_mul_ps(P,vTemp2);
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
+ vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
+ vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
+ vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
+ return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneDotCoord
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V3;
+ XMVECTOR Result;
+
+ // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
+ V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
+ Result = XMVector4Dot(P, V3);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
+ vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
+ XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
+ vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
+ vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
+ vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
+ return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneDotNormal
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+ return XMVector3Dot(P, V);
+}
+
+//------------------------------------------------------------------------------
+// XMPlaneNormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+XMFINLINE XMVECTOR XMPlaneNormalizeEst
+(
+ FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector3ReciprocalLength(P);
+ Result = XMVectorMultiply(P, Result);
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product
+ XMVECTOR vDot = _mm_mul_ps(P,P);
+ // x=Dot.y, y=Dot.z
+ XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
+ // Result.x = x+y
+ vDot = _mm_add_ss(vDot,vTemp);
+ // x=Dot.z
+ vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
+ // Result.x = (x+y)+z
+ vDot = _mm_add_ss(vDot,vTemp);
+ // Splat x
+ vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
+ // Get the reciprocal
+ vDot = _mm_rsqrt_ps(vDot);
+ // Get the reciprocal
+ vDot = _mm_mul_ps(vDot,P);
+ return vDot;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneNormalize
+(
+ FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
+ // Prevent divide by zero
+ if (fLengthSq) {
+ fLengthSq = 1.0f/fLengthSq;
+ }
+ {
+ XMVECTOR vResult = {
+ P.vector4_f32[0]*fLengthSq,
+ P.vector4_f32[1]*fLengthSq,
+ P.vector4_f32[2]*fLengthSq,
+ P.vector4_f32[3]*fLengthSq
+ };
+ return vResult;
+ }
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z only
+ XMVECTOR vLengthSq = _mm_mul_ps(P,P);
+ XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_div_ps(P,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vLengthSq);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneIntersectLine
+(
+ FXMVECTOR P,
+ FXMVECTOR LinePoint1,
+ FXMVECTOR LinePoint2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V1;
+ XMVECTOR V2;
+ XMVECTOR D;
+ XMVECTOR ReciprocalD;
+ XMVECTOR VT;
+ XMVECTOR Point;
+ XMVECTOR Zero;
+ XMVECTOR Control;
+ XMVECTOR Result;
+
+ V1 = XMVector3Dot(P, LinePoint1);
+ V2 = XMVector3Dot(P, LinePoint2);
+ D = XMVectorSubtract(V1, V2);
+
+ ReciprocalD = XMVectorReciprocal(D);
+ VT = XMPlaneDotCoord(P, LinePoint1);
+ VT = XMVectorMultiply(VT, ReciprocalD);
+
+ Point = XMVectorSubtract(LinePoint2, LinePoint1);
+ Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
+
+ Zero = XMVectorZero();
+ Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
+
+ Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR V1;
+ XMVECTOR V2;
+ XMVECTOR D;
+ XMVECTOR VT;
+ XMVECTOR Point;
+ XMVECTOR Zero;
+ XMVECTOR Control;
+ XMVECTOR Result;
+
+ V1 = XMVector3Dot(P, LinePoint1);
+ V2 = XMVector3Dot(P, LinePoint2);
+ D = _mm_sub_ps(V1, V2);
+
+ VT = XMPlaneDotCoord(P, LinePoint1);
+ VT = _mm_div_ps(VT, D);
+
+ Point = _mm_sub_ps(LinePoint2, LinePoint1);
+ Point = _mm_mul_ps(Point,VT);
+ Point = _mm_add_ps(Point,LinePoint1);
+ Zero = XMVectorZero();
+ Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
+ Result = XMVectorSelect(Point, g_XMQNaN, Control);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE VOID XMPlaneIntersectPlane
+(
+ XMVECTOR* pLinePoint1,
+ XMVECTOR* pLinePoint2,
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V1;
+ XMVECTOR V2;
+ XMVECTOR V3;
+ XMVECTOR LengthSq;
+ XMVECTOR RcpLengthSq;
+ XMVECTOR Point;
+ XMVECTOR P1W;
+ XMVECTOR P2W;
+ XMVECTOR Control;
+ XMVECTOR LinePoint1;
+ XMVECTOR LinePoint2;
+
+ XMASSERT(pLinePoint1);
+ XMASSERT(pLinePoint2);
+
+ V1 = XMVector3Cross(P2, P1);
+
+ LengthSq = XMVector3LengthSq(V1);
+
+ V2 = XMVector3Cross(P2, V1);
+
+ P1W = XMVectorSplatW(P1);
+ Point = XMVectorMultiply(V2, P1W);
+
+ V3 = XMVector3Cross(V1, P1);
+
+ P2W = XMVectorSplatW(P2);
+ Point = XMVectorMultiplyAdd(V3, P2W, Point);
+
+ RcpLengthSq = XMVectorReciprocal(LengthSq);
+ LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
+
+ LinePoint2 = XMVectorAdd(LinePoint1, V1);
+
+ Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
+ *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
+ *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMASSERT(pLinePoint1);
+ XMASSERT(pLinePoint2);
+ XMVECTOR V1;
+ XMVECTOR V2;
+ XMVECTOR V3;
+ XMVECTOR LengthSq;
+ XMVECTOR Point;
+ XMVECTOR P1W;
+ XMVECTOR P2W;
+ XMVECTOR Control;
+ XMVECTOR LinePoint1;
+ XMVECTOR LinePoint2;
+
+ V1 = XMVector3Cross(P2, P1);
+
+ LengthSq = XMVector3LengthSq(V1);
+
+ V2 = XMVector3Cross(P2, V1);
+
+ P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
+ Point = _mm_mul_ps(V2, P1W);
+
+ V3 = XMVector3Cross(V1, P1);
+
+ P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
+ V3 = _mm_mul_ps(V3,P2W);
+ Point = _mm_add_ps(Point,V3);
+ LinePoint1 = _mm_div_ps(Point,LengthSq);
+
+ LinePoint2 = _mm_add_ps(LinePoint1, V1);
+
+ Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
+ *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
+ *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneTransform
+(
+ FXMVECTOR P,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR X;
+ XMVECTOR Y;
+ XMVECTOR Z;
+ XMVECTOR W;
+ XMVECTOR Result;
+
+ W = XMVectorSplatW(P);
+ Z = XMVectorSplatZ(P);
+ Y = XMVectorSplatY(P);
+ X = XMVectorSplatX(P);
+
+ Result = XMVectorMultiply(W, M.r[3]);
+ Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
+ Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
+ XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
+ X = _mm_mul_ps(X, M.r[0]);
+ Y = _mm_mul_ps(Y, M.r[1]);
+ Z = _mm_mul_ps(Z, M.r[2]);
+ W = _mm_mul_ps(W, M.r[3]);
+ X = _mm_add_ps(X,Z);
+ Y = _mm_add_ps(Y,W);
+ X = _mm_add_ps(X,Y);
+ return X;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMFLOAT4* XMPlaneTransformStream
+(
+ XMFLOAT4* pOutputStream,
+ size_t OutputStride,
+ CONST XMFLOAT4* pInputStream,
+ size_t InputStride,
+ size_t PlaneCount,
+ CXMMATRIX M
+)
+{
+ return XMVector4TransformStream(pOutputStream,
+ OutputStride,
+ pInputStream,
+ InputStride,
+ PlaneCount,
+ M);
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneFromPointNormal
+(
+ FXMVECTOR Point,
+ FXMVECTOR Normal
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR W;
+ XMVECTOR Result;
+
+ W = XMVector3Dot(Point, Normal);
+ W = XMVectorNegate(W);
+ Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR W;
+ XMVECTOR Result;
+ W = XMVector3Dot(Point,Normal);
+ W = _mm_mul_ps(W,g_XMNegativeOne);
+ Result = _mm_and_ps(Normal,g_XMMask3);
+ W = _mm_and_ps(W,g_XMMaskW);
+ Result = _mm_or_ps(Result,W);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMPlaneFromPoints
+(
+ FXMVECTOR Point1,
+ FXMVECTOR Point2,
+ FXMVECTOR Point3
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR N;
+ XMVECTOR D;
+ XMVECTOR V21;
+ XMVECTOR V31;
+ XMVECTOR Result;
+
+ V21 = XMVectorSubtract(Point1, Point2);
+ V31 = XMVectorSubtract(Point1, Point3);
+
+ N = XMVector3Cross(V21, V31);
+ N = XMVector3Normalize(N);
+
+ D = XMPlaneDotNormal(N, Point1);
+ D = XMVectorNegate(D);
+
+ Result = XMVectorSelect(D, N, g_XMSelect1110.v);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR N;
+ XMVECTOR D;
+ XMVECTOR V21;
+ XMVECTOR V31;
+ XMVECTOR Result;
+
+ V21 = _mm_sub_ps(Point1, Point2);
+ V31 = _mm_sub_ps(Point1, Point3);
+
+ N = XMVector3Cross(V21, V31);
+ N = XMVector3Normalize(N);
+
+ D = XMPlaneDotNormal(N, Point1);
+ D = _mm_mul_ps(D,g_XMNegativeOne);
+ N = _mm_and_ps(N,g_XMMask3);
+ D = _mm_and_ps(D,g_XMMaskW);
+ Result = _mm_or_ps(D,N);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Color
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Equal(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorNotEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4NotEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorGreater
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Greater(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorGreaterOrEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4GreaterOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorLess
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Less(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorLessOrEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4LessOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorIsNaN
+(
+ FXMVECTOR C
+)
+{
+ return XMVector4IsNaN(C);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMColorIsInfinite
+(
+ FXMVECTOR C
+)
+{
+ return XMVector4IsInfinite(C);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMColorNegative
+(
+ FXMVECTOR vColor
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+// XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
+// XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
+ XMVECTOR vResult = {
+ 1.0f - vColor.vector4_f32[0],
+ 1.0f - vColor.vector4_f32[1],
+ 1.0f - vColor.vector4_f32[2],
+ vColor.vector4_f32[3]
+ };
+ return vResult;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Negate only x,y and z.
+ XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
+ // Add 1,1,1,0 to -x,-y,-z,w
+ return _mm_add_ps(vTemp,g_XMOne3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMColorModulate
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVectorMultiply(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMColorAdjustSaturation
+(
+ FXMVECTOR vColor,
+ FLOAT fSaturation
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+
+ // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
+ // Result = (C - Luminance) * Saturation + Luminance;
+
+ FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
+ XMVECTOR vResult = {
+ ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
+ ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
+ ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
+ vColor.vector4_f32[3]};
+ return vResult;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+// Mul RGB by intensity constants
+ XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
+// vResult.x = vLuminance.y, vResult.y = vLuminance.y,
+// vResult.z = vLuminance.z, vResult.w = vLuminance.z
+ XMVECTOR vResult = vLuminance;
+ vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
+// vLuminance.x += vLuminance.y
+ vLuminance = _mm_add_ss(vLuminance,vResult);
+// Splat vLuminance.z
+ vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
+// vLuminance.x += vLuminance.z (Dot product)
+ vLuminance = _mm_add_ss(vLuminance,vResult);
+// Splat vLuminance
+ vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
+// Splat fSaturation
+ XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
+// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
+ vResult = _mm_sub_ps(vColor,vLuminance);
+ vResult = _mm_mul_ps(vResult,vSaturation);
+ vResult = _mm_add_ps(vResult,vLuminance);
+// Retain w from the source color
+ vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+ vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMColorAdjustContrast
+(
+ FXMVECTOR vColor,
+ FLOAT fContrast
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ // Result = (vColor - 0.5f) * fContrast + 0.5f;
+ XMVECTOR vResult = {
+ ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
+ ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
+ ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
+ vColor.vector4_f32[3] // Leave W untouched
+ };
+ return vResult;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
+ XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
+ vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
+ vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
+// Retain w from the source color
+ vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+ vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Miscellaneous
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+XMINLINE BOOL XMVerifyCPUSupport()
+{
+#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
+ return TRUE;
+#else // _XM_SSE_INTRINSICS_
+ // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
+ // Detecting SSE2 on older versions of Windows would require using cpuid directly
+ return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+
+#define XMASSERT_LINE_STRING_SIZE 16
+
+XMINLINE VOID XMAssert
+(
+ CONST CHAR* pExpression,
+ CONST CHAR* pFileName,
+ UINT LineNumber
+)
+{
+ CHAR aLineString[XMASSERT_LINE_STRING_SIZE];
+ CHAR* pLineString;
+ UINT Line;
+
+ aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
+ aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
+ for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
+ Line != 0 && pLineString >= aLineString;
+ Line /= 10, pLineString--)
+ {
+ *pLineString = (CHAR)('0' + (Line % 10));
+ }
+
+#ifndef NO_OUTPUT_DEBUG_STRING
+ OutputDebugStringA("Assertion failed: ");
+ OutputDebugStringA(pExpression);
+ OutputDebugStringA(", file ");
+ OutputDebugStringA(pFileName);
+ OutputDebugStringA(", line ");
+ OutputDebugStringA(pLineString + 1);
+ OutputDebugStringA("\r\n");
+#else
+ DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
+#endif
+
+ __debugbreak();
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE XMVECTOR XMFresnelTerm
+(
+ FXMVECTOR CosIncidentAngle,
+ FXMVECTOR RefractionIndex
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR G;
+ XMVECTOR D, S;
+ XMVECTOR V0, V1, V2, V3;
+ XMVECTOR Result;
+
+ // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
+ // c = CosIncidentAngle
+ // g = sqrt(c^2 + RefractionIndex^2 - 1)
+
+ XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
+
+ G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
+ G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
+ G = XMVectorAbs(G);
+ G = XMVectorSqrt(G);
+
+ S = XMVectorAdd(G, CosIncidentAngle);
+ D = XMVectorSubtract(G, CosIncidentAngle);
+
+ V0 = XMVectorMultiply(D, D);
+ V1 = XMVectorMultiply(S, S);
+ V1 = XMVectorReciprocal(V1);
+ V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
+ V0 = XMVectorMultiply(V0, V1);
+
+ V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
+ V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
+ V2 = XMVectorMultiply(V2, V2);
+ V3 = XMVectorMultiply(V3, V3);
+ V3 = XMVectorReciprocal(V3);
+ V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
+
+ Result = XMVectorMultiply(V0, V2);
+
+ Result = XMVectorSaturate(Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
+ // c = CosIncidentAngle
+ // g = sqrt(c^2 + RefractionIndex^2 - 1)
+
+ XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
+
+ // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
+ XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
+ XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
+ G = _mm_sub_ps(G,g_XMOne);
+ vTemp = _mm_add_ps(vTemp,G);
+ // max((0-vTemp),vTemp) == abs(vTemp)
+ // The abs is needed to deal with refraction and cosine being zero
+ G = _mm_setzero_ps();
+ G = _mm_sub_ps(G,vTemp);
+ G = _mm_max_ps(G,vTemp);
+ // Last operation, the sqrt()
+ G = _mm_sqrt_ps(G);
+
+ // Calc G-C and G+C
+ XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
+ XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
+ // Perform the term (0.5f *(g - c)^2) / (g + c)^2
+ XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
+ vTemp = _mm_mul_ps(GAddC,GAddC);
+ vResult = _mm_mul_ps(vResult,g_XMOneHalf);
+ vResult = _mm_div_ps(vResult,vTemp);
+ // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
+ GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
+ GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
+ GAddC = _mm_sub_ps(GAddC,g_XMOne);
+ GSubC = _mm_add_ps(GSubC,g_XMOne);
+ GAddC = _mm_mul_ps(GAddC,GAddC);
+ GSubC = _mm_mul_ps(GSubC,GSubC);
+ GAddC = _mm_div_ps(GAddC,GSubC);
+ GAddC = _mm_add_ps(GAddC,g_XMOne);
+ // Multiply the two term parts
+ vResult = _mm_mul_ps(vResult,GAddC);
+ // Clamp to 0.0 - 1.0f
+ vResult = _mm_max_ps(vResult,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE BOOL XMScalarNearEqual
+(
+ FLOAT S1,
+ FLOAT S2,
+ FLOAT Epsilon
+)
+{
+ FLOAT Delta = S1 - S2;
+#if defined(_XM_NO_INTRINSICS_)
+ UINT AbsDelta = *(const UINT*)&Delta & 0x7FFFFFFF;
+ return (*(FLOAT*)&AbsDelta <= Epsilon);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return (fabsf(Delta) <= Epsilon);
+#else
+ return (__fabs(Delta) <= Epsilon);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
+XMFINLINE FLOAT XMScalarModAngle
+(
+ FLOAT Angle
+)
+{
+ // Note: The modulo is performed with unsigned math only to work
+ // around a precision error on numbers that are close to PI
+ float fTemp;
+#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
+ // Normalize the range from 0.0f to XM_2PI
+ Angle = Angle + XM_PI;
+ // Perform the modulo, unsigned
+ fTemp = fabsf(Angle);
+ fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
+ // Restore the number to the range of -XM_PI to XM_PI-epsilon
+ fTemp = fTemp - XM_PI;
+ // If the modulo'd value was negative, restore negation
+ if (Angle<0.0f) {
+ fTemp = -fTemp;
+ }
+ return fTemp;
+#else
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE FLOAT XMScalarSin
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT ValueMod;
+ FLOAT ValueSq;
+ XMVECTOR V0123, V0246, V1357, V9111315, V17192123;
+ XMVECTOR V1, V7, V8;
+ XMVECTOR R0, R1, R2;
+
+ ValueMod = XMScalarModAngle(Value);
+
+ // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
+ // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
+
+ ValueSq = ValueMod * ValueMod;
+
+ V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
+ V1 = XMVectorSplatY(V0123);
+ V0246 = XMVectorMultiply(V0123, V0123);
+ V1357 = XMVectorMultiply(V0246, V1);
+ V7 = XMVectorSplatW(V1357);
+ V8 = XMVectorMultiply(V7, V1);
+ V9111315 = XMVectorMultiply(V1357, V8);
+ V17192123 = XMVectorMultiply(V9111315, V8);
+
+ R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
+ R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
+ R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
+
+ return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ return sinf( Value );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE FLOAT XMScalarCos
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT ValueMod;
+ FLOAT ValueSq;
+ XMVECTOR V0123, V0246, V8101214, V16182022;
+ XMVECTOR V2, V6, V8;
+ XMVECTOR R0, R1, R2;
+
+ ValueMod = XMScalarModAngle(Value);
+
+ // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
+ // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
+
+ ValueSq = ValueMod * ValueMod;
+
+ V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
+ V0246 = XMVectorMultiply(V0123, V0123);
+
+ V2 = XMVectorSplatZ(V0123);
+ V6 = XMVectorSplatW(V0246);
+ V8 = XMVectorMultiply(V6, V2);
+
+ V8101214 = XMVectorMultiply(V0246, V8);
+ V16182022 = XMVectorMultiply(V8101214, V8);
+
+ R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
+ R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
+ R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
+
+ return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ return cosf(Value);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE VOID XMScalarSinCos
+(
+ FLOAT* pSin,
+ FLOAT* pCos,
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT ValueMod;
+ FLOAT ValueSq;
+ XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
+ XMVECTOR V1, V2, V6, V8;
+ XMVECTOR S0, S1, S2, C0, C1, C2;
+
+ XMASSERT(pSin);
+ XMASSERT(pCos);
+
+ ValueMod = XMScalarModAngle(Value);
+
+ // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
+ // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
+ // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
+ // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
+
+ ValueSq = ValueMod * ValueMod;
+
+ V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
+
+ V1 = XMVectorSplatY(V0123);
+ V2 = XMVectorSplatZ(V0123);
+
+ V0246 = XMVectorMultiply(V0123, V0123);
+ V1357 = XMVectorMultiply(V0246, V1);
+
+ V6 = XMVectorSplatW(V0246);
+ V8 = XMVectorMultiply(V6, V2);
+
+ V8101214 = XMVectorMultiply(V0246, V8);
+ V9111315 = XMVectorMultiply(V1357, V8);
+ V16182022 = XMVectorMultiply(V8101214, V8);
+ V17192123 = XMVectorMultiply(V9111315, V8);
+
+ C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
+ S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
+ C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
+ S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
+ C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
+ S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
+
+ *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
+ *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMASSERT(pSin);
+ XMASSERT(pCos);
+
+ *pSin = sinf(Value);
+ *pCos = cosf(Value);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE FLOAT XMScalarASin
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT AbsValue, Value2, Value3, D;
+ XMVECTOR AbsV, R0, R1, Result;
+ XMVECTOR V3;
+
+ *(UINT*)&AbsValue = *(const UINT*)&Value & 0x7FFFFFFF;
+
+ Value2 = Value * AbsValue;
+ Value3 = Value * Value2;
+ D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
+
+ AbsV = XMVectorReplicate(AbsValue);
+
+ V3.vector4_f32[0] = Value3;
+ V3.vector4_f32[1] = 1.0f;
+ V3.vector4_f32[2] = Value3;
+ V3.vector4_f32[3] = 1.0f;
+
+ R1 = XMVectorSet(D, D, Value, Value);
+ R1 = XMVectorMultiply(R1, V3);
+
+ R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
+ R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
+
+ Result = XMVector4Dot(R0, R1);
+
+ return Result.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ return asinf(Value);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMINLINE FLOAT XMScalarACos
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return XM_PIDIV2 - XMScalarASin(Value);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ return acosf(Value);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE FLOAT XMScalarSinEst
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT ValueSq;
+ XMVECTOR V;
+ XMVECTOR Y;
+ XMVECTOR Result;
+
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+
+ // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
+
+ ValueSq = Value * Value;
+
+ V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
+ Y = XMVectorSplatY(V);
+ V = XMVectorMultiply(V, V);
+ V = XMVectorMultiply(V, Y);
+
+ Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
+
+ return Result.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+ float ValueSq = Value*Value;
+ XMVECTOR vValue = _mm_set_ps1(Value);
+ XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
+ vTemp = _mm_mul_ps(vTemp,vTemp);
+ vTemp = _mm_mul_ps(vTemp,vValue);
+ // vTemp = Value,Value^3,Value^5,Value^7
+ vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
+ vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
+ vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
+ vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
+ vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
+ vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
+#if defined(_MSC_VER) && (_MSC_VER>=1500)
+ return _mm_cvtss_f32(vTemp);
+#else
+ return vTemp.m128_f32[0];
+#endif
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE FLOAT XMScalarCosEst
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ FLOAT ValueSq;
+ XMVECTOR V;
+ XMVECTOR Result;
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+ // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
+ ValueSq = Value * Value;
+ V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
+ V = XMVectorMultiply(V, V);
+ Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
+ return Result.vector4_f32[0];
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+ float ValueSq = Value*Value;
+ XMVECTOR vValue = _mm_setzero_ps();
+ XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
+ vTemp = _mm_mul_ps(vTemp,vTemp);
+ // vTemp = 1.0f,Value^2,Value^4,Value^6
+ vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
+ vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
+ vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
+ vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
+ vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
+ vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
+#if defined(_MSC_VER) && (_MSC_VER>=1500)
+ return _mm_cvtss_f32(vTemp);
+#else
+ return vTemp.m128_f32[0];
+#endif
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE VOID XMScalarSinCosEst
+(
+ FLOAT* pSin,
+ FLOAT* pCos,
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ FLOAT ValueSq;
+ XMVECTOR V, Sin, Cos;
+ XMVECTOR Y;
+
+ XMASSERT(pSin);
+ XMASSERT(pCos);
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+
+ // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
+ // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
+
+ ValueSq = Value * Value;
+ V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
+ Y = XMVectorSplatY(V);
+ Cos = XMVectorMultiply(V, V);
+ Sin = XMVectorMultiply(Cos, Y);
+
+ Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
+ Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
+
+ *pCos = Cos.vector4_f32[0];
+ *pSin = Sin.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMASSERT(pSin);
+ XMASSERT(pCos);
+ XMASSERT(Value >= -XM_PI);
+ XMASSERT(Value < XM_PI);
+ float ValueSq = Value * Value;
+ XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
+ XMVECTOR Sin = _mm_set_ps1(Value);
+ Cos = _mm_mul_ps(Cos,Cos);
+ Sin = _mm_mul_ps(Sin,Cos);
+ // Cos = 1.0f,Value^2,Value^4,Value^6
+ Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
+ _mm_store_ss(pCos,Cos);
+ // Sin = Value,Value^3,Value^5,Value^7
+ Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
+ _mm_store_ss(pSin,Sin);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE FLOAT XMScalarASinEst
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR VR, CR, CS;
+ XMVECTOR Result;
+ FLOAT AbsV, V2, D;
+ CONST FLOAT OnePlusEps = 1.00000011921f;
+
+ *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
+ V2 = Value * AbsV;
+ D = OnePlusEps - AbsV;
+
+ CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
+ VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
+ CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
+
+ Result = XMVector4Dot(VR, CR);
+
+ return Result.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ CONST FLOAT OnePlusEps = 1.00000011921f;
+ FLOAT AbsV = fabsf(Value);
+ FLOAT V2 = Value * AbsV; // Square with sign retained
+ FLOAT D = OnePlusEps - AbsV;
+
+ XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
+ XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
+ Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
+ Result = XMVector4Dot(VR,Result);
+#if defined(_MSC_VER) && (_MSC_VER>=1500)
+ return _mm_cvtss_f32(Result);
+#else
+ return Result.m128_f32[0];
+#endif
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+XMFINLINE FLOAT XMScalarACosEst
+(
+ FLOAT Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR VR, CR, CS;
+ XMVECTOR Result;
+ FLOAT AbsV, V2, D;
+ CONST FLOAT OnePlusEps = 1.00000011921f;
+
+ // return XM_PIDIV2 - XMScalarASin(Value);
+
+ *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
+ V2 = Value * AbsV;
+ D = OnePlusEps - AbsV;
+
+ CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
+ VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
+ CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
+
+ Result = XMVector4Dot(VR, CR);
+
+ return XM_PIDIV2 - Result.vector4_f32[0];
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ CONST FLOAT OnePlusEps = 1.00000011921f;
+ FLOAT AbsV = fabsf(Value);
+ FLOAT V2 = Value * AbsV; // Value^2 retaining sign
+ FLOAT D = OnePlusEps - AbsV;
+ XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
+ XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
+ Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
+ Result = XMVector4Dot(VR,Result);
+#if defined(_MSC_VER) && (_MSC_VER>=1500)
+ return XM_PIDIV2 - _mm_cvtss_f32(Result);
+#else
+ return XM_PIDIV2 - Result.m128_f32[0];
+#endif
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+#endif // __XNAMATHMISC_INL__
+