1 /************************************************************************
3 * xnamathmisc.inl -- SIMD C++ Math library for Windows and Xbox 360 *
4 * Quaternion, plane, and color functions *
6 * Copyright (c) Microsoft Corp. All rights reserved. *
8 ************************************************************************/
10 #if defined(_MSC_VER) && (_MSC_VER > 1000)
14 #ifndef __XNAMATHMISC_INL__
15 #define __XNAMATHMISC_INL__
17 /****************************************************************************
21 ****************************************************************************/
23 //------------------------------------------------------------------------------
24 // Comparison operations
25 //------------------------------------------------------------------------------
27 //------------------------------------------------------------------------------
29 XMFINLINE BOOL XMQuaternionEqual
35 return XMVector4Equal(Q1, Q2);
38 //------------------------------------------------------------------------------
40 XMFINLINE BOOL XMQuaternionNotEqual
46 return XMVector4NotEqual(Q1, Q2);
49 //------------------------------------------------------------------------------
51 XMFINLINE BOOL XMQuaternionIsNaN
56 return XMVector4IsNaN(Q);
59 //------------------------------------------------------------------------------
61 XMFINLINE BOOL XMQuaternionIsInfinite
66 return XMVector4IsInfinite(Q);
69 //------------------------------------------------------------------------------
71 XMFINLINE BOOL XMQuaternionIsIdentity
76 #if defined(_XM_NO_INTRINSICS_)
78 return XMVector4Equal(Q, g_XMIdentityR3.v);
80 #elif defined(_XM_SSE_INTRINSICS_)
81 XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
82 return (_mm_movemask_ps(vTemp)==0x0f);
83 #else // _XM_VMX128_INTRINSICS_
84 #endif // _XM_VMX128_INTRINSICS_
87 //------------------------------------------------------------------------------
88 // Computation operations
89 //------------------------------------------------------------------------------
91 //------------------------------------------------------------------------------
93 XMFINLINE XMVECTOR XMQuaternionDot
99 return XMVector4Dot(Q1, Q2);
102 //------------------------------------------------------------------------------
104 XMFINLINE XMVECTOR XMQuaternionMultiply
110 #if defined(_XM_NO_INTRINSICS_)
121 CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
122 CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
123 CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
125 NegativeQ1 = XMVectorNegate(Q1);
127 Q2W = XMVectorSplatW(Q2);
128 Q2X = XMVectorSplatX(Q2);
129 Q2Y = XMVectorSplatY(Q2);
130 Q2Z = XMVectorSplatZ(Q2);
132 Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
133 Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
134 Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
136 Result = XMVectorMultiply(Q1, Q2W);
137 Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
138 Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
139 Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
143 #elif defined(_XM_SSE_INTRINSICS_)
144 static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
145 static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
146 static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
147 // Copy to SSE registers and use as few as possible for x86
151 XMVECTOR vResult = Q2;
152 // Splat with one instruction
153 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
154 Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
155 Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
156 Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
157 // Retire Q1 and perform Q1*Q2W
158 vResult = _mm_mul_ps(vResult,Q1);
159 XMVECTOR Q1Shuffle = Q1;
160 // Shuffle the copies of Q1
161 Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
163 Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
164 Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
165 // Flip the signs on y and z
166 Q2X = _mm_mul_ps(Q2X,ControlWZYX);
168 Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
169 Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
170 // Flip the signs on z and w
171 Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
173 Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
174 vResult = _mm_add_ps(vResult,Q2X);
175 // Flip the signs on x and w
176 Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
177 Q2Y = _mm_add_ps(Q2Y,Q2Z);
178 vResult = _mm_add_ps(vResult,Q2Y);
180 #else // _XM_VMX128_INTRINSICS_
181 #endif // _XM_VMX128_INTRINSICS_
184 //------------------------------------------------------------------------------
186 XMFINLINE XMVECTOR XMQuaternionLengthSq
191 return XMVector4LengthSq(Q);
194 //------------------------------------------------------------------------------
196 XMFINLINE XMVECTOR XMQuaternionReciprocalLength
201 return XMVector4ReciprocalLength(Q);
204 //------------------------------------------------------------------------------
206 XMFINLINE XMVECTOR XMQuaternionLength
211 return XMVector4Length(Q);
214 //------------------------------------------------------------------------------
216 XMFINLINE XMVECTOR XMQuaternionNormalizeEst
221 return XMVector4NormalizeEst(Q);
224 //------------------------------------------------------------------------------
226 XMFINLINE XMVECTOR XMQuaternionNormalize
231 return XMVector4Normalize(Q);
234 //------------------------------------------------------------------------------
236 XMFINLINE XMVECTOR XMQuaternionConjugate
241 #if defined(_XM_NO_INTRINSICS_)
250 #elif defined(_XM_SSE_INTRINSICS_)
251 static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
252 XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
254 #else // _XM_VMX128_INTRINSICS_
255 #endif // _XM_VMX128_INTRINSICS_
258 //------------------------------------------------------------------------------
260 XMFINLINE XMVECTOR XMQuaternionInverse
265 #if defined(_XM_NO_INTRINSICS_)
271 CONST XMVECTOR Zero = XMVectorZero();
273 L = XMVector4LengthSq(Q);
274 Conjugate = XMQuaternionConjugate(Q);
276 Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
278 L = XMVectorReciprocal(L);
279 Result = XMVectorMultiply(Conjugate, L);
281 Result = XMVectorSelect(Result, Zero, Control);
285 #elif defined(_XM_SSE_INTRINSICS_)
290 XMVECTOR Zero = XMVectorZero();
292 L = XMVector4LengthSq(Q);
293 Conjugate = XMQuaternionConjugate(Q);
294 Control = XMVectorLessOrEqual(L, g_XMEpsilon);
295 Result = _mm_div_ps(Conjugate,L);
296 Result = XMVectorSelect(Result, Zero, Control);
298 #else // _XM_VMX128_INTRINSICS_
299 #endif // _XM_VMX128_INTRINSICS_
302 //------------------------------------------------------------------------------
304 XMFINLINE XMVECTOR XMQuaternionLn
309 #if defined(_XM_NO_INTRINSICS_)
318 static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
320 QW = XMVectorSplatW(Q);
321 Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
323 ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
325 Theta = XMVectorACos(QW);
326 SinTheta = XMVectorSin(Theta);
328 S = XMVectorReciprocal(SinTheta);
329 S = XMVectorMultiply(Theta, S);
331 Result = XMVectorMultiply(Q0, S);
333 Result = XMVectorSelect(Q0, Result, ControlW);
337 #elif defined(_XM_SSE_INTRINSICS_)
338 static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
339 static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
341 XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
343 XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
344 // Use W if within bounds
345 XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
346 XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
347 ControlW = _mm_and_ps(ControlW,vTemp2);
349 XMVECTOR vTheta = XMVectorACos(QW);
351 vTemp2 = XMVectorSin(vTheta);
352 // theta/sine of theta
353 vTheta = _mm_div_ps(vTheta,vTemp2);
355 vTheta = _mm_mul_ps(vTheta,Q0);
356 // Was W in bounds? If not, return input as is
357 vTheta = XMVectorSelect(Q0,vTheta,ControlW);
359 #else // _XM_VMX128_INTRINSICS_
360 #endif // _XM_VMX128_INTRINSICS_
363 //------------------------------------------------------------------------------
365 XMFINLINE XMVECTOR XMQuaternionExp
370 #if defined(_XM_NO_INTRINSICS_)
380 Theta = XMVector3Length(Q);
381 XMVectorSinCos(&SinTheta, &CosTheta, Theta);
383 S = XMVectorReciprocal(Theta);
384 S = XMVectorMultiply(SinTheta, S);
386 Result = XMVectorMultiply(Q, S);
388 Zero = XMVectorZero();
389 Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
390 Result = XMVectorSelect(Result, Q, Control);
392 Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
396 #elif defined(_XM_SSE_INTRINSICS_)
404 Theta = XMVector3Length(Q);
405 XMVectorSinCos(&SinTheta, &CosTheta, Theta);
406 S = _mm_div_ps(SinTheta,Theta);
407 Result = _mm_mul_ps(Q, S);
408 Zero = XMVectorZero();
409 Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
410 Result = XMVectorSelect(Result,Q,Control);
411 Result = _mm_and_ps(Result,g_XMMask3);
412 CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
413 Result = _mm_or_ps(Result,CosTheta);
415 #else // _XM_VMX128_INTRINSICS_
416 #endif // _XM_VMX128_INTRINSICS_
419 //------------------------------------------------------------------------------
421 XMINLINE XMVECTOR XMQuaternionSlerp
428 XMVECTOR T = XMVectorReplicate(t);
429 return XMQuaternionSlerpV(Q0, Q1, T);
432 //------------------------------------------------------------------------------
434 XMINLINE XMVECTOR XMQuaternionSlerpV
441 #if defined(_XM_NO_INTRINSICS_)
443 // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
447 XMVECTOR InvSinOmega;
457 CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
459 XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
461 CosOmega = XMQuaternionDot(Q0, Q1);
463 Zero = XMVectorZero();
464 Control = XMVectorLess(CosOmega, Zero);
465 Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
467 CosOmega = XMVectorMultiply(CosOmega, Sign);
469 Control = XMVectorLess(CosOmega, OneMinusEpsilon);
471 SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
472 SinOmega = XMVectorSqrt(SinOmega);
474 Omega = XMVectorATan2(SinOmega, CosOmega);
476 SignMask = XMVectorSplatSignMask();
477 C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
478 V01 = XMVectorShiftLeft(T, Zero, 2);
479 SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
480 V01 = XMVectorXorInt(V01, SignMask);
481 V01 = XMVectorAdd(C1000, V01);
483 InvSinOmega = XMVectorReciprocal(SinOmega);
485 S0 = XMVectorMultiply(V01, Omega);
486 S0 = XMVectorSin(S0);
487 S0 = XMVectorMultiply(S0, InvSinOmega);
489 S0 = XMVectorSelect(V01, S0, Control);
491 S1 = XMVectorSplatY(S0);
492 S0 = XMVectorSplatX(S0);
494 S1 = XMVectorMultiply(S1, Sign);
496 Result = XMVectorMultiply(Q0, S0);
497 Result = XMVectorMultiplyAdd(Q1, S1, Result);
501 #elif defined(_XM_SSE_INTRINSICS_)
502 // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
513 static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
514 static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
515 static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
517 XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
519 CosOmega = XMQuaternionDot(Q0, Q1);
521 Zero = XMVectorZero();
522 Control = XMVectorLess(CosOmega, Zero);
523 Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
525 CosOmega = _mm_mul_ps(CosOmega, Sign);
527 Control = XMVectorLess(CosOmega, OneMinusEpsilon);
529 SinOmega = _mm_mul_ps(CosOmega,CosOmega);
530 SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
531 SinOmega = _mm_sqrt_ps(SinOmega);
533 Omega = XMVectorATan2(SinOmega, CosOmega);
535 V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
536 V01 = _mm_and_ps(V01,MaskXY);
537 V01 = _mm_xor_ps(V01,SignMask2);
538 V01 = _mm_add_ps(g_XMIdentityR0, V01);
540 S0 = _mm_mul_ps(V01, Omega);
541 S0 = XMVectorSin(S0);
542 S0 = _mm_div_ps(S0, SinOmega);
544 S0 = XMVectorSelect(V01, S0, Control);
546 S1 = XMVectorSplatY(S0);
547 S0 = XMVectorSplatX(S0);
549 S1 = _mm_mul_ps(S1, Sign);
550 Result = _mm_mul_ps(Q0, S0);
551 S1 = _mm_mul_ps(S1, Q1);
552 Result = _mm_add_ps(Result,S1);
554 #else // _XM_VMX128_INTRINSICS_
555 #endif // _XM_VMX128_INTRINSICS_
558 //------------------------------------------------------------------------------
560 XMFINLINE XMVECTOR XMQuaternionSquad
569 XMVECTOR T = XMVectorReplicate(t);
570 return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
573 //------------------------------------------------------------------------------
575 XMFINLINE XMVECTOR XMQuaternionSquadV
590 XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
593 Two = XMVectorSplatConstant(2, 0);
595 Q03 = XMQuaternionSlerpV(Q0, Q3, T);
596 Q12 = XMQuaternionSlerpV(Q1, Q2, T);
598 TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
599 TP = XMVectorMultiply(TP, Two);
601 Result = XMQuaternionSlerpV(Q03, Q12, TP);
607 //------------------------------------------------------------------------------
609 XMINLINE VOID XMQuaternionSquadSetup
620 XMVECTOR SQ0, SQ2, SQ3;
621 XMVECTOR InvQ1, InvQ2;
622 XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
623 XMVECTOR ExpQ02, ExpQ13;
624 XMVECTOR LS01, LS12, LS23;
625 XMVECTOR LD01, LD12, LD23;
626 XMVECTOR Control0, Control1, Control2;
627 XMVECTOR NegativeOneQuarter;
633 LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
634 LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
635 SQ2 = XMVectorNegate(Q2);
637 Control1 = XMVectorLess(LS12, LD12);
638 SQ2 = XMVectorSelect(Q2, SQ2, Control1);
640 LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
641 LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
642 SQ0 = XMVectorNegate(Q0);
644 LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
645 LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
646 SQ3 = XMVectorNegate(Q3);
648 Control0 = XMVectorLess(LS01, LD01);
649 Control2 = XMVectorLess(LS23, LD23);
651 SQ0 = XMVectorSelect(Q0, SQ0, Control0);
652 SQ3 = XMVectorSelect(Q3, SQ3, Control2);
654 InvQ1 = XMQuaternionInverse(Q1);
655 InvQ2 = XMQuaternionInverse(SQ2);
657 LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
658 LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
659 LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
660 LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
662 NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
664 ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
665 ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
666 ExpQ02 = XMQuaternionExp(ExpQ02);
667 ExpQ13 = XMQuaternionExp(ExpQ13);
669 *pA = XMQuaternionMultiply(Q1, ExpQ02);
670 *pB = XMQuaternionMultiply(SQ2, ExpQ13);
674 //------------------------------------------------------------------------------
676 XMFINLINE XMVECTOR XMQuaternionBaryCentric
692 if ((s < 0.00001f) && (s > -0.00001f))
698 Q01 = XMQuaternionSlerp(Q0, Q1, s);
699 Q02 = XMQuaternionSlerp(Q0, Q2, s);
701 Result = XMQuaternionSlerp(Q01, Q02, g / s);
707 //------------------------------------------------------------------------------
709 XMFINLINE XMVECTOR XMQuaternionBaryCentricV
724 XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
725 XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
727 Epsilon = XMVectorSplatConstant(1, 16);
729 S = XMVectorAdd(F, G);
731 if (XMVector4InBounds(S, Epsilon))
737 Q01 = XMQuaternionSlerpV(Q0, Q1, S);
738 Q02 = XMQuaternionSlerpV(Q0, Q2, S);
739 GS = XMVectorReciprocal(S);
740 GS = XMVectorMultiply(G, GS);
742 Result = XMQuaternionSlerpV(Q01, Q02, GS);
748 //------------------------------------------------------------------------------
749 // Transformation operations
750 //------------------------------------------------------------------------------
752 //------------------------------------------------------------------------------
754 XMFINLINE XMVECTOR XMQuaternionIdentity()
756 #if defined(_XM_NO_INTRINSICS_)
757 return g_XMIdentityR3.v;
758 #elif defined(_XM_SSE_INTRINSICS_)
759 return g_XMIdentityR3;
760 #else // _XM_VMX128_INTRINSICS_
761 #endif // _XM_VMX128_INTRINSICS_
764 //------------------------------------------------------------------------------
766 XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
776 Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
777 Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
782 //------------------------------------------------------------------------------
784 XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
786 FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
789 #if defined(_XM_NO_INTRINSICS_)
792 XMVECTOR P0, P1, Y0, Y1, R0, R1;
794 XMVECTOR SinAngles, CosAngles;
795 static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
796 static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
797 static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
798 static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f};
800 HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
801 XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
803 P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
804 Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
805 R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
806 P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
807 Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
808 R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
810 Q1 = XMVectorMultiply(P1, Sign);
811 Q0 = XMVectorMultiply(P0, Y0);
812 Q1 = XMVectorMultiply(Q1, Y1);
813 Q0 = XMVectorMultiply(Q0, R0);
814 Q = XMVectorMultiplyAdd(Q1, R1, Q0);
818 #elif defined(_XM_SSE_INTRINSICS_)
820 XMVECTOR P0, P1, Y0, Y1, R0, R1;
822 XMVECTOR SinAngles, CosAngles;
823 static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
824 static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
825 static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
826 static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
828 HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
829 XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
831 P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
832 Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
833 R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
834 P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
835 Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
836 R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
838 Q1 = _mm_mul_ps(P1, Sign);
839 Q0 = _mm_mul_ps(P0, Y0);
840 Q1 = _mm_mul_ps(Q1, Y1);
841 Q0 = _mm_mul_ps(Q0, R0);
842 Q = _mm_mul_ps(Q1, R1);
843 Q = _mm_add_ps(Q,Q0);
845 #else // _XM_VMX128_INTRINSICS_
846 #endif // _XM_VMX128_INTRINSICS_
849 //------------------------------------------------------------------------------
851 XMFINLINE XMVECTOR XMQuaternionRotationNormal
853 FXMVECTOR NormalAxis,
857 #if defined(_XM_NO_INTRINSICS_)
863 N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
865 XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
867 Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
869 Q = XMVectorMultiply(N, Scale);
873 #elif defined(_XM_SSE_INTRINSICS_)
874 XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
875 N = _mm_or_ps(N,g_XMIdentityR3);
876 XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
879 XMVectorSinCos(&vSine,&vCosine,Scale);
880 Scale = _mm_and_ps(vSine,g_XMMask3);
881 vCosine = _mm_and_ps(vCosine,g_XMMaskW);
882 Scale = _mm_or_ps(Scale,vCosine);
883 N = _mm_mul_ps(N,Scale);
885 #else // _XM_VMX128_INTRINSICS_
886 #endif // _XM_VMX128_INTRINSICS_
889 //------------------------------------------------------------------------------
891 XMFINLINE XMVECTOR XMQuaternionRotationAxis
897 #if defined(_XM_NO_INTRINSICS_)
902 XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
903 XMASSERT(!XMVector3IsInfinite(Axis));
905 Normal = XMVector3Normalize(Axis);
906 Q = XMQuaternionRotationNormal(Normal, Angle);
910 #elif defined(_XM_SSE_INTRINSICS_)
914 XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
915 XMASSERT(!XMVector3IsInfinite(Axis));
917 Normal = XMVector3Normalize(Axis);
918 Q = XMQuaternionRotationNormal(Normal, Angle);
920 #else // _XM_VMX128_INTRINSICS_
921 #endif // _XM_VMX128_INTRINSICS_
924 //------------------------------------------------------------------------------
926 XMINLINE XMVECTOR XMQuaternionRotationMatrix
931 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
934 XMVECTOR M00, M11, M22;
935 XMVECTOR CQ0, CQ1, C;
936 XMVECTOR CX, CY, CZ, CW;
938 XMVECTOR Rsq, Sqrt, VEqualsNaN;
940 XMVECTOR PermuteSplat, PermuteSplatT;
941 XMVECTOR SignB, SignBT;
942 XMVECTOR PermuteControl, PermuteControlT;
944 static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
945 static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
946 static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
947 static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
948 static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
949 static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
950 static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
951 static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
952 static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
953 static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
954 static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
955 static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
956 static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
957 static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
958 static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
959 static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
960 static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
961 static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
962 static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
963 static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
965 M00 = XMVectorSplatX(M.r[0]);
966 M11 = XMVectorSplatY(M.r[1]);
967 M22 = XMVectorSplatZ(M.r[2]);
969 Q0 = XMVectorMultiply(SignPNNP.v, M00);
970 Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);
971 Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);
973 Q1 = XMVectorAdd(Q0, g_XMOne.v);
975 Rsq = XMVectorReciprocalSqrt(Q1);
976 VEqualsNaN = XMVectorIsNaN(Rsq);
977 Sqrt = XMVectorMultiply(Q1, Rsq);
978 Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);
980 Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
982 SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
984 CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
985 CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);
986 C = XMVectorGreaterOrEqual(CQ0, CQ1);
988 CX = XMVectorSplatX(C);
989 CY = XMVectorSplatY(C);
990 CZ = XMVectorSplatZ(C);
991 CW = XMVectorSplatW(C);
993 PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
994 SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);
995 PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
997 PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
998 SignB = XMVectorSelect(SignB, SignNPPP.v, CX);
999 PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
1001 PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
1002 SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);
1003 PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
1005 PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
1006 SignB = XMVectorSelect(SignB, SignBT, CX);
1007 PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
1009 PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
1010 SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);
1011 PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
1013 Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
1015 P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21}
1016 A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03}
1017 B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03}
1019 Q2 = XMVectorMultiplyAdd(SignB, B, A);
1020 Q2 = XMVectorMultiply(Q2, Scale);
1022 Result = XMVectorPermute(Q1, Q2, PermuteControl);
1026 #else // _XM_VMX128_INTRINSICS_
1027 #endif // _XM_VMX128_INTRINSICS_
1030 //------------------------------------------------------------------------------
1031 // Conversion operations
1032 //------------------------------------------------------------------------------
1034 //------------------------------------------------------------------------------
1036 XMFINLINE VOID XMQuaternionToAxisAngle
1048 #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1049 *pAngle = 2.0f * acosf(XMVectorGetW(Q));
1051 *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
1055 /****************************************************************************
1059 ****************************************************************************/
1061 //------------------------------------------------------------------------------
1062 // Comparison operations
1063 //------------------------------------------------------------------------------
1065 //------------------------------------------------------------------------------
1067 XMFINLINE BOOL XMPlaneEqual
1073 return XMVector4Equal(P1, P2);
1076 //------------------------------------------------------------------------------
1078 XMFINLINE BOOL XMPlaneNearEqual
1085 XMVECTOR NP1 = XMPlaneNormalize(P1);
1086 XMVECTOR NP2 = XMPlaneNormalize(P2);
1087 return XMVector4NearEqual(NP1, NP2, Epsilon);
1090 //------------------------------------------------------------------------------
1092 XMFINLINE BOOL XMPlaneNotEqual
1098 return XMVector4NotEqual(P1, P2);
1101 //------------------------------------------------------------------------------
1103 XMFINLINE BOOL XMPlaneIsNaN
1108 return XMVector4IsNaN(P);
1111 //------------------------------------------------------------------------------
1113 XMFINLINE BOOL XMPlaneIsInfinite
1118 return XMVector4IsInfinite(P);
1121 //------------------------------------------------------------------------------
1122 // Computation operations
1123 //------------------------------------------------------------------------------
1125 //------------------------------------------------------------------------------
1127 XMFINLINE XMVECTOR XMPlaneDot
1133 #if defined(_XM_NO_INTRINSICS_)
1135 return XMVector4Dot(P, V);
1137 #elif defined(_XM_SSE_INTRINSICS_)
1139 __m128 vTemp = _mm_mul_ps(P,vTemp2);
1140 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1141 vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
1142 vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
1143 vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
1144 return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
1145 #else // _XM_VMX128_INTRINSICS_
1146 #endif // _XM_VMX128_INTRINSICS_
1149 //------------------------------------------------------------------------------
1151 XMFINLINE XMVECTOR XMPlaneDotCoord
1157 #if defined(_XM_NO_INTRINSICS_)
1162 // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
1163 V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
1164 Result = XMVector4Dot(P, V3);
1168 #elif defined(_XM_SSE_INTRINSICS_)
1169 XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
1170 vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
1171 XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
1172 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1173 vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
1174 vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
1175 vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
1176 return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
1177 #else // _XM_VMX128_INTRINSICS_
1178 #endif // _XM_VMX128_INTRINSICS_
1181 //------------------------------------------------------------------------------
1183 XMFINLINE XMVECTOR XMPlaneDotNormal
1189 return XMVector3Dot(P, V);
1192 //------------------------------------------------------------------------------
1193 // XMPlaneNormalizeEst uses a reciprocal estimate and
1194 // returns QNaN on zero and infinite vectors.
1196 XMFINLINE XMVECTOR XMPlaneNormalizeEst
1201 #if defined(_XM_NO_INTRINSICS_)
1204 Result = XMVector3ReciprocalLength(P);
1205 Result = XMVectorMultiply(P, Result);
1208 #elif defined(_XM_SSE_INTRINSICS_)
1209 // Perform the dot product
1210 XMVECTOR vDot = _mm_mul_ps(P,P);
1212 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
1214 vDot = _mm_add_ss(vDot,vTemp);
1216 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1217 // Result.x = (x+y)+z
1218 vDot = _mm_add_ss(vDot,vTemp);
1220 vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
1221 // Get the reciprocal
1222 vDot = _mm_rsqrt_ps(vDot);
1223 // Get the reciprocal
1224 vDot = _mm_mul_ps(vDot,P);
1226 #else // _XM_VMX128_INTRINSICS_
1227 #endif // _XM_VMX128_INTRINSICS_
1230 //------------------------------------------------------------------------------
1232 XMFINLINE XMVECTOR XMPlaneNormalize
1237 #if defined(_XM_NO_INTRINSICS_)
1238 FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
1239 // Prevent divide by zero
1241 fLengthSq = 1.0f/fLengthSq;
1244 XMVECTOR vResult = {
1245 P.vector4_f32[0]*fLengthSq,
1246 P.vector4_f32[1]*fLengthSq,
1247 P.vector4_f32[2]*fLengthSq,
1248 P.vector4_f32[3]*fLengthSq
1252 #elif defined(_XM_SSE_INTRINSICS_)
1253 // Perform the dot product on x,y and z only
1254 XMVECTOR vLengthSq = _mm_mul_ps(P,P);
1255 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
1256 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1257 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1258 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1259 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
1260 // Prepare for the division
1261 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1262 // Failsafe on zero (Or epsilon) length planes
1263 // If the length is infinity, set the elements to zero
1264 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
1265 // Reciprocal mul to perform the normalization
1266 vResult = _mm_div_ps(P,vResult);
1267 // Any that are infinity, set to zero
1268 vResult = _mm_and_ps(vResult,vLengthSq);
1270 #else // _XM_VMX128_INTRINSICS_
1271 #endif // _XM_VMX128_INTRINSICS_
1274 //------------------------------------------------------------------------------
1276 XMFINLINE XMVECTOR XMPlaneIntersectLine
1279 FXMVECTOR LinePoint1,
1280 FXMVECTOR LinePoint2
1283 #if defined(_XM_NO_INTRINSICS_)
1288 XMVECTOR ReciprocalD;
1295 V1 = XMVector3Dot(P, LinePoint1);
1296 V2 = XMVector3Dot(P, LinePoint2);
1297 D = XMVectorSubtract(V1, V2);
1299 ReciprocalD = XMVectorReciprocal(D);
1300 VT = XMPlaneDotCoord(P, LinePoint1);
1301 VT = XMVectorMultiply(VT, ReciprocalD);
1303 Point = XMVectorSubtract(LinePoint2, LinePoint1);
1304 Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
1306 Zero = XMVectorZero();
1307 Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
1309 Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
1313 #elif defined(_XM_SSE_INTRINSICS_)
1323 V1 = XMVector3Dot(P, LinePoint1);
1324 V2 = XMVector3Dot(P, LinePoint2);
1325 D = _mm_sub_ps(V1, V2);
1327 VT = XMPlaneDotCoord(P, LinePoint1);
1328 VT = _mm_div_ps(VT, D);
1330 Point = _mm_sub_ps(LinePoint2, LinePoint1);
1331 Point = _mm_mul_ps(Point,VT);
1332 Point = _mm_add_ps(Point,LinePoint1);
1333 Zero = XMVectorZero();
1334 Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
1335 Result = XMVectorSelect(Point, g_XMQNaN, Control);
1337 #else // _XM_VMX128_INTRINSICS_
1338 #endif // _XM_VMX128_INTRINSICS_
1341 //------------------------------------------------------------------------------
1343 XMINLINE VOID XMPlaneIntersectPlane
1345 XMVECTOR* pLinePoint1,
1346 XMVECTOR* pLinePoint2,
1351 #if defined(_XM_NO_INTRINSICS_)
1357 XMVECTOR RcpLengthSq;
1362 XMVECTOR LinePoint1;
1363 XMVECTOR LinePoint2;
1365 XMASSERT(pLinePoint1);
1366 XMASSERT(pLinePoint2);
1368 V1 = XMVector3Cross(P2, P1);
1370 LengthSq = XMVector3LengthSq(V1);
1372 V2 = XMVector3Cross(P2, V1);
1374 P1W = XMVectorSplatW(P1);
1375 Point = XMVectorMultiply(V2, P1W);
1377 V3 = XMVector3Cross(V1, P1);
1379 P2W = XMVectorSplatW(P2);
1380 Point = XMVectorMultiplyAdd(V3, P2W, Point);
1382 RcpLengthSq = XMVectorReciprocal(LengthSq);
1383 LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
1385 LinePoint2 = XMVectorAdd(LinePoint1, V1);
1387 Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
1388 *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
1389 *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
1391 #elif defined(_XM_SSE_INTRINSICS_)
1392 XMASSERT(pLinePoint1);
1393 XMASSERT(pLinePoint2);
1402 XMVECTOR LinePoint1;
1403 XMVECTOR LinePoint2;
1405 V1 = XMVector3Cross(P2, P1);
1407 LengthSq = XMVector3LengthSq(V1);
1409 V2 = XMVector3Cross(P2, V1);
1411 P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
1412 Point = _mm_mul_ps(V2, P1W);
1414 V3 = XMVector3Cross(V1, P1);
1416 P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
1417 V3 = _mm_mul_ps(V3,P2W);
1418 Point = _mm_add_ps(Point,V3);
1419 LinePoint1 = _mm_div_ps(Point,LengthSq);
1421 LinePoint2 = _mm_add_ps(LinePoint1, V1);
1423 Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
1424 *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
1425 *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
1426 #else // _XM_VMX128_INTRINSICS_
1427 #endif // _XM_VMX128_INTRINSICS_
1430 //------------------------------------------------------------------------------
1432 XMFINLINE XMVECTOR XMPlaneTransform
1438 #if defined(_XM_NO_INTRINSICS_)
1446 W = XMVectorSplatW(P);
1447 Z = XMVectorSplatZ(P);
1448 Y = XMVectorSplatY(P);
1449 X = XMVectorSplatX(P);
1451 Result = XMVectorMultiply(W, M.r[3]);
1452 Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
1453 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
1454 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
1458 #elif defined(_XM_SSE_INTRINSICS_)
1459 XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
1460 XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
1461 XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
1462 XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
1463 X = _mm_mul_ps(X, M.r[0]);
1464 Y = _mm_mul_ps(Y, M.r[1]);
1465 Z = _mm_mul_ps(Z, M.r[2]);
1466 W = _mm_mul_ps(W, M.r[3]);
1467 X = _mm_add_ps(X,Z);
1468 Y = _mm_add_ps(Y,W);
1469 X = _mm_add_ps(X,Y);
1471 #else // _XM_VMX128_INTRINSICS_
1472 #endif // _XM_VMX128_INTRINSICS_
1475 //------------------------------------------------------------------------------
1477 XMFINLINE XMFLOAT4* XMPlaneTransformStream
1479 XMFLOAT4* pOutputStream,
1480 size_t OutputStride,
1481 CONST XMFLOAT4* pInputStream,
1487 return XMVector4TransformStream(pOutputStream,
1495 //------------------------------------------------------------------------------
1496 // Conversion operations
1497 //------------------------------------------------------------------------------
1499 //------------------------------------------------------------------------------
1501 XMFINLINE XMVECTOR XMPlaneFromPointNormal
1507 #if defined(_XM_NO_INTRINSICS_)
1512 W = XMVector3Dot(Point, Normal);
1513 W = XMVectorNegate(W);
1514 Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
1518 #elif defined(_XM_SSE_INTRINSICS_)
1521 W = XMVector3Dot(Point,Normal);
1522 W = _mm_mul_ps(W,g_XMNegativeOne);
1523 Result = _mm_and_ps(Normal,g_XMMask3);
1524 W = _mm_and_ps(W,g_XMMaskW);
1525 Result = _mm_or_ps(Result,W);
1527 #else // _XM_VMX128_INTRINSICS_
1528 #endif // _XM_VMX128_INTRINSICS_
1531 //------------------------------------------------------------------------------
1533 XMFINLINE XMVECTOR XMPlaneFromPoints
1540 #if defined(_XM_NO_INTRINSICS_)
1548 V21 = XMVectorSubtract(Point1, Point2);
1549 V31 = XMVectorSubtract(Point1, Point3);
1551 N = XMVector3Cross(V21, V31);
1552 N = XMVector3Normalize(N);
1554 D = XMPlaneDotNormal(N, Point1);
1555 D = XMVectorNegate(D);
1557 Result = XMVectorSelect(D, N, g_XMSelect1110.v);
1561 #elif defined(_XM_SSE_INTRINSICS_)
1568 V21 = _mm_sub_ps(Point1, Point2);
1569 V31 = _mm_sub_ps(Point1, Point3);
1571 N = XMVector3Cross(V21, V31);
1572 N = XMVector3Normalize(N);
1574 D = XMPlaneDotNormal(N, Point1);
1575 D = _mm_mul_ps(D,g_XMNegativeOne);
1576 N = _mm_and_ps(N,g_XMMask3);
1577 D = _mm_and_ps(D,g_XMMaskW);
1578 Result = _mm_or_ps(D,N);
1580 #else // _XM_VMX128_INTRINSICS_
1581 #endif // _XM_VMX128_INTRINSICS_
1584 /****************************************************************************
1588 ****************************************************************************/
1590 //------------------------------------------------------------------------------
1591 // Comparison operations
1592 //------------------------------------------------------------------------------
1594 //------------------------------------------------------------------------------
1596 XMFINLINE BOOL XMColorEqual
1602 return XMVector4Equal(C1, C2);
1605 //------------------------------------------------------------------------------
1607 XMFINLINE BOOL XMColorNotEqual
1613 return XMVector4NotEqual(C1, C2);
1616 //------------------------------------------------------------------------------
1618 XMFINLINE BOOL XMColorGreater
1624 return XMVector4Greater(C1, C2);
1627 //------------------------------------------------------------------------------
1629 XMFINLINE BOOL XMColorGreaterOrEqual
1635 return XMVector4GreaterOrEqual(C1, C2);
1638 //------------------------------------------------------------------------------
1640 XMFINLINE BOOL XMColorLess
1646 return XMVector4Less(C1, C2);
1649 //------------------------------------------------------------------------------
1651 XMFINLINE BOOL XMColorLessOrEqual
1657 return XMVector4LessOrEqual(C1, C2);
1660 //------------------------------------------------------------------------------
1662 XMFINLINE BOOL XMColorIsNaN
1667 return XMVector4IsNaN(C);
1670 //------------------------------------------------------------------------------
1672 XMFINLINE BOOL XMColorIsInfinite
1677 return XMVector4IsInfinite(C);
1680 //------------------------------------------------------------------------------
1681 // Computation operations
1682 //------------------------------------------------------------------------------
1684 //------------------------------------------------------------------------------
1686 XMFINLINE XMVECTOR XMColorNegative
1691 #if defined(_XM_NO_INTRINSICS_)
1692 // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
1693 // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
1694 XMVECTOR vResult = {
1695 1.0f - vColor.vector4_f32[0],
1696 1.0f - vColor.vector4_f32[1],
1697 1.0f - vColor.vector4_f32[2],
1698 vColor.vector4_f32[3]
1702 #elif defined(_XM_SSE_INTRINSICS_)
1703 // Negate only x,y and z.
1704 XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
1705 // Add 1,1,1,0 to -x,-y,-z,w
1706 return _mm_add_ps(vTemp,g_XMOne3);
1707 #else // _XM_VMX128_INTRINSICS_
1708 #endif // _XM_VMX128_INTRINSICS_
1711 //------------------------------------------------------------------------------
1713 XMFINLINE XMVECTOR XMColorModulate
1719 return XMVectorMultiply(C1, C2);
1722 //------------------------------------------------------------------------------
1724 XMFINLINE XMVECTOR XMColorAdjustSaturation
1730 #if defined(_XM_NO_INTRINSICS_)
1731 CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1733 // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
1734 // Result = (C - Luminance) * Saturation + Luminance;
1736 FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
1737 XMVECTOR vResult = {
1738 ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
1739 ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
1740 ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
1741 vColor.vector4_f32[3]};
1744 #elif defined(_XM_SSE_INTRINSICS_)
1745 static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1746 // Mul RGB by intensity constants
1747 XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
1748 // vResult.x = vLuminance.y, vResult.y = vLuminance.y,
1749 // vResult.z = vLuminance.z, vResult.w = vLuminance.z
1750 XMVECTOR vResult = vLuminance;
1751 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
1752 // vLuminance.x += vLuminance.y
1753 vLuminance = _mm_add_ss(vLuminance,vResult);
1754 // Splat vLuminance.z
1755 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
1756 // vLuminance.x += vLuminance.z (Dot product)
1757 vLuminance = _mm_add_ss(vLuminance,vResult);
1759 vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
1760 // Splat fSaturation
1761 XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
1762 // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
1763 vResult = _mm_sub_ps(vColor,vLuminance);
1764 vResult = _mm_mul_ps(vResult,vSaturation);
1765 vResult = _mm_add_ps(vResult,vLuminance);
1766 // Retain w from the source color
1767 vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1768 vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1770 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1771 #endif // _XM_VMX128_INTRINSICS_
1774 //------------------------------------------------------------------------------
1776 XMFINLINE XMVECTOR XMColorAdjustContrast
1782 #if defined(_XM_NO_INTRINSICS_)
1783 // Result = (vColor - 0.5f) * fContrast + 0.5f;
1784 XMVECTOR vResult = {
1785 ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
1786 ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
1787 ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
1788 vColor.vector4_f32[3] // Leave W untouched
1792 #elif defined(_XM_SSE_INTRINSICS_)
1793 XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
1794 XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
1795 vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
1796 vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
1797 // Retain w from the source color
1798 vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1799 vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1801 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1802 #endif // _XM_VMX128_INTRINSICS_
1805 /****************************************************************************
1809 ****************************************************************************/
1811 //------------------------------------------------------------------------------
1813 XMINLINE BOOL XMVerifyCPUSupport()
1815 #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
1817 #else // _XM_SSE_INTRINSICS_
1818 // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
1819 // Detecting SSE2 on older versions of Windows would require using cpuid directly
1820 return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
1825 //------------------------------------------------------------------------------
1827 #define XMASSERT_LINE_STRING_SIZE 16
1829 XMINLINE VOID XMAssert
1831 CONST CHAR* pExpression,
1832 CONST CHAR* pFileName,
1836 CHAR aLineString[XMASSERT_LINE_STRING_SIZE];
1840 aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
1841 aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
1842 for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
1843 Line != 0 && pLineString >= aLineString;
1844 Line /= 10, pLineString--)
1846 *pLineString = (CHAR)('0' + (Line % 10));
1849 #ifndef NO_OUTPUT_DEBUG_STRING
1850 OutputDebugStringA("Assertion failed: ");
1851 OutputDebugStringA(pExpression);
1852 OutputDebugStringA(", file ");
1853 OutputDebugStringA(pFileName);
1854 OutputDebugStringA(", line ");
1855 OutputDebugStringA(pLineString + 1);
1856 OutputDebugStringA("\r\n");
1858 DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
1864 //------------------------------------------------------------------------------
1866 XMFINLINE XMVECTOR XMFresnelTerm
1868 FXMVECTOR CosIncidentAngle,
1869 FXMVECTOR RefractionIndex
1872 #if defined(_XM_NO_INTRINSICS_)
1876 XMVECTOR V0, V1, V2, V3;
1879 // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
1880 // c = CosIncidentAngle
1881 // g = sqrt(c^2 + RefractionIndex^2 - 1)
1883 XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
1885 G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
1886 G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
1888 G = XMVectorSqrt(G);
1890 S = XMVectorAdd(G, CosIncidentAngle);
1891 D = XMVectorSubtract(G, CosIncidentAngle);
1893 V0 = XMVectorMultiply(D, D);
1894 V1 = XMVectorMultiply(S, S);
1895 V1 = XMVectorReciprocal(V1);
1896 V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
1897 V0 = XMVectorMultiply(V0, V1);
1899 V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
1900 V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
1901 V2 = XMVectorMultiply(V2, V2);
1902 V3 = XMVectorMultiply(V3, V3);
1903 V3 = XMVectorReciprocal(V3);
1904 V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
1906 Result = XMVectorMultiply(V0, V2);
1908 Result = XMVectorSaturate(Result);
1912 #elif defined(_XM_SSE_INTRINSICS_)
1913 // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
1914 // c = CosIncidentAngle
1915 // g = sqrt(c^2 + RefractionIndex^2 - 1)
1917 XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
1919 // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
1920 XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
1921 XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
1922 G = _mm_sub_ps(G,g_XMOne);
1923 vTemp = _mm_add_ps(vTemp,G);
1924 // max((0-vTemp),vTemp) == abs(vTemp)
1925 // The abs is needed to deal with refraction and cosine being zero
1926 G = _mm_setzero_ps();
1927 G = _mm_sub_ps(G,vTemp);
1928 G = _mm_max_ps(G,vTemp);
1929 // Last operation, the sqrt()
1933 XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
1934 XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
1935 // Perform the term (0.5f *(g - c)^2) / (g + c)^2
1936 XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
1937 vTemp = _mm_mul_ps(GAddC,GAddC);
1938 vResult = _mm_mul_ps(vResult,g_XMOneHalf);
1939 vResult = _mm_div_ps(vResult,vTemp);
1940 // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
1941 GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
1942 GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
1943 GAddC = _mm_sub_ps(GAddC,g_XMOne);
1944 GSubC = _mm_add_ps(GSubC,g_XMOne);
1945 GAddC = _mm_mul_ps(GAddC,GAddC);
1946 GSubC = _mm_mul_ps(GSubC,GSubC);
1947 GAddC = _mm_div_ps(GAddC,GSubC);
1948 GAddC = _mm_add_ps(GAddC,g_XMOne);
1949 // Multiply the two term parts
1950 vResult = _mm_mul_ps(vResult,GAddC);
1951 // Clamp to 0.0 - 1.0f
1952 vResult = _mm_max_ps(vResult,g_XMZero);
1953 vResult = _mm_min_ps(vResult,g_XMOne);
1955 #else // _XM_VMX128_INTRINSICS_
1956 #endif // _XM_VMX128_INTRINSICS_
1959 //------------------------------------------------------------------------------
1961 XMFINLINE BOOL XMScalarNearEqual
1968 FLOAT Delta = S1 - S2;
1969 #if defined(_XM_NO_INTRINSICS_)
1970 UINT AbsDelta = *(const UINT*)&Delta & 0x7FFFFFFF;
1971 return (*(FLOAT*)&AbsDelta <= Epsilon);
1972 #elif defined(_XM_SSE_INTRINSICS_)
1973 return (fabsf(Delta) <= Epsilon);
1975 return (__fabs(Delta) <= Epsilon);
1979 //------------------------------------------------------------------------------
1980 // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
1981 XMFINLINE FLOAT XMScalarModAngle
1986 // Note: The modulo is performed with unsigned math only to work
1987 // around a precision error on numbers that are close to PI
1989 #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
1990 // Normalize the range from 0.0f to XM_2PI
1991 Angle = Angle + XM_PI;
1992 // Perform the modulo, unsigned
1993 fTemp = fabsf(Angle);
1994 fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
1995 // Restore the number to the range of -XM_PI to XM_PI-epsilon
1996 fTemp = fTemp - XM_PI;
1997 // If the modulo'd value was negative, restore negation
2006 //------------------------------------------------------------------------------
2008 XMINLINE FLOAT XMScalarSin
2013 #if defined(_XM_NO_INTRINSICS_)
2017 XMVECTOR V0123, V0246, V1357, V9111315, V17192123;
2018 XMVECTOR V1, V7, V8;
2019 XMVECTOR R0, R1, R2;
2021 ValueMod = XMScalarModAngle(Value);
2023 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2024 // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2026 ValueSq = ValueMod * ValueMod;
2028 V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2029 V1 = XMVectorSplatY(V0123);
2030 V0246 = XMVectorMultiply(V0123, V0123);
2031 V1357 = XMVectorMultiply(V0246, V1);
2032 V7 = XMVectorSplatW(V1357);
2033 V8 = XMVectorMultiply(V7, V1);
2034 V9111315 = XMVectorMultiply(V1357, V8);
2035 V17192123 = XMVectorMultiply(V9111315, V8);
2037 R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2038 R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2039 R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2041 return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2043 #elif defined(_XM_SSE_INTRINSICS_)
2044 return sinf( Value );
2045 #else // _XM_VMX128_INTRINSICS_
2046 #endif // _XM_VMX128_INTRINSICS_
2049 //------------------------------------------------------------------------------
2051 XMINLINE FLOAT XMScalarCos
2056 #if defined(_XM_NO_INTRINSICS_)
2060 XMVECTOR V0123, V0246, V8101214, V16182022;
2061 XMVECTOR V2, V6, V8;
2062 XMVECTOR R0, R1, R2;
2064 ValueMod = XMScalarModAngle(Value);
2066 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2067 // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2069 ValueSq = ValueMod * ValueMod;
2071 V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2072 V0246 = XMVectorMultiply(V0123, V0123);
2074 V2 = XMVectorSplatZ(V0123);
2075 V6 = XMVectorSplatW(V0246);
2076 V8 = XMVectorMultiply(V6, V2);
2078 V8101214 = XMVectorMultiply(V0246, V8);
2079 V16182022 = XMVectorMultiply(V8101214, V8);
2081 R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2082 R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2083 R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2085 return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2087 #elif defined(_XM_SSE_INTRINSICS_)
2089 #else // _XM_VMX128_INTRINSICS_
2090 #endif // _XM_VMX128_INTRINSICS_
2093 //------------------------------------------------------------------------------
2095 XMINLINE VOID XMScalarSinCos
2102 #if defined(_XM_NO_INTRINSICS_)
2106 XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
2107 XMVECTOR V1, V2, V6, V8;
2108 XMVECTOR S0, S1, S2, C0, C1, C2;
2113 ValueMod = XMScalarModAngle(Value);
2115 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2116 // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2117 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2118 // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2120 ValueSq = ValueMod * ValueMod;
2122 V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2124 V1 = XMVectorSplatY(V0123);
2125 V2 = XMVectorSplatZ(V0123);
2127 V0246 = XMVectorMultiply(V0123, V0123);
2128 V1357 = XMVectorMultiply(V0246, V1);
2130 V6 = XMVectorSplatW(V0246);
2131 V8 = XMVectorMultiply(V6, V2);
2133 V8101214 = XMVectorMultiply(V0246, V8);
2134 V9111315 = XMVectorMultiply(V1357, V8);
2135 V16182022 = XMVectorMultiply(V8101214, V8);
2136 V17192123 = XMVectorMultiply(V9111315, V8);
2138 C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2139 S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2140 C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2141 S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2142 C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2143 S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2145 *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
2146 *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
2148 #elif defined(_XM_SSE_INTRINSICS_)
2152 *pSin = sinf(Value);
2153 *pCos = cosf(Value);
2154 #else // _XM_VMX128_INTRINSICS_
2155 #endif // _XM_VMX128_INTRINSICS_
2158 //------------------------------------------------------------------------------
2160 XMINLINE FLOAT XMScalarASin
2165 #if defined(_XM_NO_INTRINSICS_)
2167 FLOAT AbsValue, Value2, Value3, D;
2168 XMVECTOR AbsV, R0, R1, Result;
2171 *(UINT*)&AbsValue = *(const UINT*)&Value & 0x7FFFFFFF;
2173 Value2 = Value * AbsValue;
2174 Value3 = Value * Value2;
2175 D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
2177 AbsV = XMVectorReplicate(AbsValue);
2179 V3.vector4_f32[0] = Value3;
2180 V3.vector4_f32[1] = 1.0f;
2181 V3.vector4_f32[2] = Value3;
2182 V3.vector4_f32[3] = 1.0f;
2184 R1 = XMVectorSet(D, D, Value, Value);
2185 R1 = XMVectorMultiply(R1, V3);
2187 R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
2188 R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
2190 Result = XMVector4Dot(R0, R1);
2192 return Result.vector4_f32[0];
2194 #elif defined(_XM_SSE_INTRINSICS_)
2195 return asinf(Value);
2196 #else // _XM_VMX128_INTRINSICS_
2197 #endif // _XM_VMX128_INTRINSICS_
2200 //------------------------------------------------------------------------------
2202 XMINLINE FLOAT XMScalarACos
2207 #if defined(_XM_NO_INTRINSICS_)
2209 return XM_PIDIV2 - XMScalarASin(Value);
2211 #elif defined(_XM_SSE_INTRINSICS_)
2212 return acosf(Value);
2213 #else // _XM_VMX128_INTRINSICS_
2214 #endif // _XM_VMX128_INTRINSICS_
2217 //------------------------------------------------------------------------------
2219 XMFINLINE FLOAT XMScalarSinEst
2224 #if defined(_XM_NO_INTRINSICS_)
2231 XMASSERT(Value >= -XM_PI);
2232 XMASSERT(Value < XM_PI);
2234 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2236 ValueSq = Value * Value;
2238 V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2239 Y = XMVectorSplatY(V);
2240 V = XMVectorMultiply(V, V);
2241 V = XMVectorMultiply(V, Y);
2243 Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
2245 return Result.vector4_f32[0];
2247 #elif defined(_XM_SSE_INTRINSICS_)
2248 XMASSERT(Value >= -XM_PI);
2249 XMASSERT(Value < XM_PI);
2250 float ValueSq = Value*Value;
2251 XMVECTOR vValue = _mm_set_ps1(Value);
2252 XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
2253 vTemp = _mm_mul_ps(vTemp,vTemp);
2254 vTemp = _mm_mul_ps(vTemp,vValue);
2255 // vTemp = Value,Value^3,Value^5,Value^7
2256 vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
2257 vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2258 vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
2259 vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
2260 vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
2261 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
2262 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2263 return _mm_cvtss_f32(vTemp);
2265 return vTemp.m128_f32[0];
2267 #else // _XM_VMX128_INTRINSICS_
2268 #endif // _XM_VMX128_INTRINSICS_
2271 //------------------------------------------------------------------------------
2273 XMFINLINE FLOAT XMScalarCosEst
2278 #if defined(_XM_NO_INTRINSICS_)
2282 XMASSERT(Value >= -XM_PI);
2283 XMASSERT(Value < XM_PI);
2284 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2285 ValueSq = Value * Value;
2286 V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2287 V = XMVectorMultiply(V, V);
2288 Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
2289 return Result.vector4_f32[0];
2290 #elif defined(_XM_SSE_INTRINSICS_)
2291 XMASSERT(Value >= -XM_PI);
2292 XMASSERT(Value < XM_PI);
2293 float ValueSq = Value*Value;
2294 XMVECTOR vValue = _mm_setzero_ps();
2295 XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
2296 vTemp = _mm_mul_ps(vTemp,vTemp);
2297 // vTemp = 1.0f,Value^2,Value^4,Value^6
2298 vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
2299 vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2300 vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
2301 vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
2302 vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
2303 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
2304 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2305 return _mm_cvtss_f32(vTemp);
2307 return vTemp.m128_f32[0];
2309 #else // _XM_VMX128_INTRINSICS_
2310 #endif // _XM_VMX128_INTRINSICS_
2313 //------------------------------------------------------------------------------
2315 XMFINLINE VOID XMScalarSinCosEst
2322 #if defined(_XM_NO_INTRINSICS_)
2325 XMVECTOR V, Sin, Cos;
2330 XMASSERT(Value >= -XM_PI);
2331 XMASSERT(Value < XM_PI);
2333 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2334 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2336 ValueSq = Value * Value;
2337 V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
2338 Y = XMVectorSplatY(V);
2339 Cos = XMVectorMultiply(V, V);
2340 Sin = XMVectorMultiply(Cos, Y);
2342 Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
2343 Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
2345 *pCos = Cos.vector4_f32[0];
2346 *pSin = Sin.vector4_f32[0];
2348 #elif defined(_XM_SSE_INTRINSICS_)
2351 XMASSERT(Value >= -XM_PI);
2352 XMASSERT(Value < XM_PI);
2353 float ValueSq = Value * Value;
2354 XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
2355 XMVECTOR Sin = _mm_set_ps1(Value);
2356 Cos = _mm_mul_ps(Cos,Cos);
2357 Sin = _mm_mul_ps(Sin,Cos);
2358 // Cos = 1.0f,Value^2,Value^4,Value^6
2359 Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
2360 _mm_store_ss(pCos,Cos);
2361 // Sin = Value,Value^3,Value^5,Value^7
2362 Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
2363 _mm_store_ss(pSin,Sin);
2364 #else // _XM_VMX128_INTRINSICS_
2365 #endif // _XM_VMX128_INTRINSICS_
2368 //------------------------------------------------------------------------------
2370 XMFINLINE FLOAT XMScalarASinEst
2375 #if defined(_XM_NO_INTRINSICS_)
2377 XMVECTOR VR, CR, CS;
2380 CONST FLOAT OnePlusEps = 1.00000011921f;
2382 *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
2384 D = OnePlusEps - AbsV;
2386 CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2387 VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2388 CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2390 Result = XMVector4Dot(VR, CR);
2392 return Result.vector4_f32[0];
2394 #elif defined(_XM_SSE_INTRINSICS_)
2395 CONST FLOAT OnePlusEps = 1.00000011921f;
2396 FLOAT AbsV = fabsf(Value);
2397 FLOAT V2 = Value * AbsV; // Square with sign retained
2398 FLOAT D = OnePlusEps - AbsV;
2400 XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2401 XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2402 Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
2403 Result = XMVector4Dot(VR,Result);
2404 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2405 return _mm_cvtss_f32(Result);
2407 return Result.m128_f32[0];
2409 #else // _XM_VMX128_INTRINSICS_
2410 #endif // _XM_VMX128_INTRINSICS_
2413 //------------------------------------------------------------------------------
2415 XMFINLINE FLOAT XMScalarACosEst
2420 #if defined(_XM_NO_INTRINSICS_)
2422 XMVECTOR VR, CR, CS;
2425 CONST FLOAT OnePlusEps = 1.00000011921f;
2427 // return XM_PIDIV2 - XMScalarASin(Value);
2429 *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
2431 D = OnePlusEps - AbsV;
2433 CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2434 VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2435 CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2437 Result = XMVector4Dot(VR, CR);
2439 return XM_PIDIV2 - Result.vector4_f32[0];
2441 #elif defined(_XM_SSE_INTRINSICS_)
2442 CONST FLOAT OnePlusEps = 1.00000011921f;
2443 FLOAT AbsV = fabsf(Value);
2444 FLOAT V2 = Value * AbsV; // Value^2 retaining sign
2445 FLOAT D = OnePlusEps - AbsV;
2446 XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2447 XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2448 Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
2449 Result = XMVector4Dot(VR,Result);
2450 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2451 return XM_PIDIV2 - _mm_cvtss_f32(Result);
2453 return XM_PIDIV2 - Result.m128_f32[0];
2455 #else // _XM_VMX128_INTRINSICS_
2456 #endif // _XM_VMX128_INTRINSICS_
2459 #endif // __XNAMATHMISC_INL__