git.cworth.org Git - apitrace/blob - thirdparty/directxtex/XNAMath/xnamathmisc.inl

   1 /************************************************************************
   2 *                                                                       *
   3 * xnamathmisc.inl -- SIMD C++ Math library for Windows and Xbox 360     *
   4 *                    Quaternion, plane, and color functions             *
   5 *                                                                       *
   6 * Copyright (c) Microsoft Corp. All rights reserved.                    *
   7 *                                                                       *
   8 ************************************************************************/
   9
  10 #if defined(_MSC_VER) && (_MSC_VER > 1000)
  11 #pragma once
  12 #endif
  13
  14 #ifndef __XNAMATHMISC_INL__
  15 #define __XNAMATHMISC_INL__
  16
  17 /****************************************************************************
  18  *
  19  * Quaternion
  20  *
  21  ****************************************************************************/
  22
  23 //------------------------------------------------------------------------------
  24 // Comparison operations
  25 //------------------------------------------------------------------------------
  26
  27 //------------------------------------------------------------------------------
  28
  29 XMFINLINE BOOL XMQuaternionEqual
  30 (
  31     FXMVECTOR Q1,
  32     FXMVECTOR Q2
  33 )
  34 {
  35     return XMVector4Equal(Q1, Q2);
  36 }
  37
  38 //------------------------------------------------------------------------------
  39
  40 XMFINLINE BOOL XMQuaternionNotEqual
  41 (
  42     FXMVECTOR Q1,
  43     FXMVECTOR Q2
  44 )
  45 {
  46     return XMVector4NotEqual(Q1, Q2);
  47 }
  48
  49 //------------------------------------------------------------------------------
  50
  51 XMFINLINE BOOL XMQuaternionIsNaN
  52 (
  53     FXMVECTOR Q
  54 )
  55 {
  56     return XMVector4IsNaN(Q);
  57 }
  58
  59 //------------------------------------------------------------------------------
  60
  61 XMFINLINE BOOL XMQuaternionIsInfinite
  62 (
  63     FXMVECTOR Q
  64 )
  65 {
  66     return XMVector4IsInfinite(Q);
  67 }
  68
  69 //------------------------------------------------------------------------------
  70
  71 XMFINLINE BOOL XMQuaternionIsIdentity
  72 (
  73     FXMVECTOR Q
  74 )
  75 {
  76 #if defined(_XM_NO_INTRINSICS_)
  77
  78     return XMVector4Equal(Q, g_XMIdentityR3.v);
  79
  80 #elif defined(_XM_SSE_INTRINSICS_)
  81     XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
  82     return (_mm_movemask_ps(vTemp)==0x0f);
  83 #else // _XM_VMX128_INTRINSICS_
  84 #endif // _XM_VMX128_INTRINSICS_
  85 }
  86
  87 //------------------------------------------------------------------------------
  88 // Computation operations
  89 //------------------------------------------------------------------------------
  90
  91 //------------------------------------------------------------------------------
  92
  93 XMFINLINE XMVECTOR XMQuaternionDot
  94 (
  95     FXMVECTOR Q1,
  96     FXMVECTOR Q2
  97 )
  98 {
  99     return XMVector4Dot(Q1, Q2);
 100 }
 101
 102 //------------------------------------------------------------------------------
 103
 104 XMFINLINE XMVECTOR XMQuaternionMultiply
 105 (
 106     FXMVECTOR Q1,
 107     FXMVECTOR Q2
 108 )
 109 {
 110 #if defined(_XM_NO_INTRINSICS_)
 111
 112     XMVECTOR         NegativeQ1;
 113     XMVECTOR         Q2X;
 114     XMVECTOR         Q2Y;
 115     XMVECTOR         Q2Z;
 116     XMVECTOR         Q2W;
 117     XMVECTOR         Q1WZYX;
 118     XMVECTOR         Q1ZWXY;
 119     XMVECTOR         Q1YXWZ;
 120     XMVECTOR         Result;
 121     CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
 122     CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
 123     CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
 124
 125     NegativeQ1 = XMVectorNegate(Q1);
 126
 127     Q2W = XMVectorSplatW(Q2);
 128     Q2X = XMVectorSplatX(Q2);
 129     Q2Y = XMVectorSplatY(Q2);
 130     Q2Z = XMVectorSplatZ(Q2);
 131
 132     Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
 133     Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
 134     Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
 135
 136     Result = XMVectorMultiply(Q1, Q2W);
 137     Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
 138     Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
 139     Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
 140
 141     return Result;
 142
 143 #elif defined(_XM_SSE_INTRINSICS_)
 144     static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
 145     static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
 146     static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
 147     // Copy to SSE registers and use as few as possible for x86
 148     XMVECTOR Q2X = Q2;
 149     XMVECTOR Q2Y = Q2;
 150     XMVECTOR Q2Z = Q2;
 151     XMVECTOR vResult = Q2;
 152     // Splat with one instruction
 153     vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
 154     Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
 155     Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
 156     Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
 157     // Retire Q1 and perform Q1*Q2W
 158     vResult = _mm_mul_ps(vResult,Q1);
 159     XMVECTOR Q1Shuffle = Q1;
 160     // Shuffle the copies of Q1
 161     Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
 162     // Mul by Q1WZYX
 163     Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
 164     Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
 165     // Flip the signs on y and z
 166     Q2X = _mm_mul_ps(Q2X,ControlWZYX);
 167     // Mul by Q1ZWXY
 168     Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
 169     Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
 170     // Flip the signs on z and w
 171     Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
 172     // Mul by Q1YXWZ
 173     Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
 174     vResult = _mm_add_ps(vResult,Q2X);
 175     // Flip the signs on x and w
 176     Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
 177     Q2Y = _mm_add_ps(Q2Y,Q2Z);
 178     vResult = _mm_add_ps(vResult,Q2Y);
 179     return vResult;
 180 #else // _XM_VMX128_INTRINSICS_
 181 #endif // _XM_VMX128_INTRINSICS_
 182 }
 183
 184 //------------------------------------------------------------------------------
 185
 186 XMFINLINE XMVECTOR XMQuaternionLengthSq
 187 (
 188     FXMVECTOR Q
 189 )
 190 {
 191     return XMVector4LengthSq(Q);
 192 }
 193
 194 //------------------------------------------------------------------------------
 195
 196 XMFINLINE XMVECTOR XMQuaternionReciprocalLength
 197 (
 198     FXMVECTOR Q
 199 )
 200 {
 201     return XMVector4ReciprocalLength(Q);
 202 }
 203
 204 //------------------------------------------------------------------------------
 205
 206 XMFINLINE XMVECTOR XMQuaternionLength
 207 (
 208     FXMVECTOR Q
 209 )
 210 {
 211     return XMVector4Length(Q);
 212 }
 213
 214 //------------------------------------------------------------------------------
 215
 216 XMFINLINE XMVECTOR XMQuaternionNormalizeEst
 217 (
 218     FXMVECTOR Q
 219 )
 220 {
 221     return XMVector4NormalizeEst(Q);
 222 }
 223
 224 //------------------------------------------------------------------------------
 225
 226 XMFINLINE XMVECTOR XMQuaternionNormalize
 227 (
 228     FXMVECTOR Q
 229 )
 230 {
 231     return XMVector4Normalize(Q);
 232 }
 233
 234 //------------------------------------------------------------------------------
 235
 236 XMFINLINE XMVECTOR XMQuaternionConjugate
 237 (
 238     FXMVECTOR Q
 239 )
 240 {
 241 #if defined(_XM_NO_INTRINSICS_)
 242
 243     XMVECTOR Result = {
 244         -Q.vector4_f32[0],
 245         -Q.vector4_f32[1],
 246         -Q.vector4_f32[2],
 247         Q.vector4_f32[3]
 248     };
 249     return Result;
 250 #elif defined(_XM_SSE_INTRINSICS_)
 251     static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
 252     XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
 253     return Result;
 254 #else // _XM_VMX128_INTRINSICS_
 255 #endif // _XM_VMX128_INTRINSICS_
 256 }
 257
 258 //------------------------------------------------------------------------------
 259
 260 XMFINLINE XMVECTOR XMQuaternionInverse
 261 (
 262     FXMVECTOR Q
 263 )
 264 {
 265 #if defined(_XM_NO_INTRINSICS_)
 266
 267     XMVECTOR        Conjugate;
 268     XMVECTOR        L;
 269     XMVECTOR        Control;
 270     XMVECTOR        Result;
 271     CONST XMVECTOR  Zero = XMVectorZero();
 272
 273     L = XMVector4LengthSq(Q);
 274     Conjugate = XMQuaternionConjugate(Q);
 275
 276     Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
 277
 278     L = XMVectorReciprocal(L);
 279     Result = XMVectorMultiply(Conjugate, L);
 280
 281     Result = XMVectorSelect(Result, Zero, Control);
 282
 283     return Result;
 284
 285 #elif defined(_XM_SSE_INTRINSICS_)
 286     XMVECTOR        Conjugate;
 287     XMVECTOR        L;
 288     XMVECTOR        Control;
 289     XMVECTOR        Result;
 290     XMVECTOR  Zero = XMVectorZero();
 291
 292     L = XMVector4LengthSq(Q);
 293     Conjugate = XMQuaternionConjugate(Q);
 294     Control = XMVectorLessOrEqual(L, g_XMEpsilon);
 295     Result = _mm_div_ps(Conjugate,L);
 296     Result = XMVectorSelect(Result, Zero, Control);
 297     return Result;
 298 #else // _XM_VMX128_INTRINSICS_
 299 #endif // _XM_VMX128_INTRINSICS_
 300 }
 301
 302 //------------------------------------------------------------------------------
 303
 304 XMFINLINE XMVECTOR XMQuaternionLn
 305 (
 306     FXMVECTOR Q
 307 )
 308 {
 309 #if defined(_XM_NO_INTRINSICS_)
 310
 311     XMVECTOR Q0;
 312     XMVECTOR QW;
 313     XMVECTOR Theta;
 314     XMVECTOR SinTheta;
 315     XMVECTOR S;
 316     XMVECTOR ControlW;
 317     XMVECTOR Result;
 318     static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
 319
 320     QW = XMVectorSplatW(Q);
 321     Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
 322
 323     ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
 324
 325     Theta = XMVectorACos(QW);
 326     SinTheta = XMVectorSin(Theta);
 327
 328     S = XMVectorReciprocal(SinTheta);
 329     S = XMVectorMultiply(Theta, S);
 330
 331     Result = XMVectorMultiply(Q0, S);
 332
 333     Result = XMVectorSelect(Q0, Result, ControlW);
 334
 335     return Result;
 336
 337 #elif defined(_XM_SSE_INTRINSICS_)
 338     static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
 339     static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
 340     // Get W only
 341     XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
 342     // W = 0
 343     XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
 344     // Use W if within bounds
 345     XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
 346     XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
 347     ControlW = _mm_and_ps(ControlW,vTemp2);
 348     // Get theta
 349     XMVECTOR vTheta = XMVectorACos(QW);
 350     // Get Sine of theta
 351     vTemp2 = XMVectorSin(vTheta);
 352     // theta/sine of theta
 353     vTheta = _mm_div_ps(vTheta,vTemp2);
 354     // Here's the answer
 355     vTheta = _mm_mul_ps(vTheta,Q0);
 356     // Was W in bounds? If not, return input as is
 357     vTheta = XMVectorSelect(Q0,vTheta,ControlW);
 358     return vTheta;
 359 #else // _XM_VMX128_INTRINSICS_
 360 #endif // _XM_VMX128_INTRINSICS_
 361 }
 362
 363 //------------------------------------------------------------------------------
 364
 365 XMFINLINE XMVECTOR XMQuaternionExp
 366 (
 367     FXMVECTOR Q
 368 )
 369 {
 370 #if defined(_XM_NO_INTRINSICS_)
 371
 372     XMVECTOR Theta;
 373     XMVECTOR SinTheta;
 374     XMVECTOR CosTheta;
 375     XMVECTOR S;
 376     XMVECTOR Control;
 377     XMVECTOR Zero;
 378     XMVECTOR Result;
 379
 380     Theta = XMVector3Length(Q);
 381     XMVectorSinCos(&SinTheta, &CosTheta, Theta);
 382
 383     S = XMVectorReciprocal(Theta);
 384     S = XMVectorMultiply(SinTheta, S);
 385
 386     Result = XMVectorMultiply(Q, S);
 387
 388     Zero = XMVectorZero();
 389     Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
 390     Result = XMVectorSelect(Result, Q, Control);
 391
 392     Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
 393
 394     return Result;
 395
 396 #elif defined(_XM_SSE_INTRINSICS_)
 397     XMVECTOR Theta;
 398     XMVECTOR SinTheta;
 399     XMVECTOR CosTheta;
 400     XMVECTOR S;
 401     XMVECTOR Control;
 402     XMVECTOR Zero;
 403     XMVECTOR Result;
 404     Theta = XMVector3Length(Q);
 405     XMVectorSinCos(&SinTheta, &CosTheta, Theta);
 406     S = _mm_div_ps(SinTheta,Theta);
 407     Result = _mm_mul_ps(Q, S);
 408     Zero = XMVectorZero();
 409     Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
 410     Result = XMVectorSelect(Result,Q,Control);
 411     Result = _mm_and_ps(Result,g_XMMask3);
 412     CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
 413     Result = _mm_or_ps(Result,CosTheta);
 414     return Result;
 415 #else // _XM_VMX128_INTRINSICS_
 416 #endif // _XM_VMX128_INTRINSICS_
 417 }
 418
 419 //------------------------------------------------------------------------------
 420
 421 XMINLINE XMVECTOR XMQuaternionSlerp
 422 (
 423     FXMVECTOR Q0,
 424     FXMVECTOR Q1,
 425     FLOAT    t
 426 )
 427 {
 428     XMVECTOR T = XMVectorReplicate(t);
 429     return XMQuaternionSlerpV(Q0, Q1, T);
 430 }
 431
 432 //------------------------------------------------------------------------------
 433
 434 XMINLINE XMVECTOR XMQuaternionSlerpV
 435 (
 436     FXMVECTOR Q0,
 437     FXMVECTOR Q1,
 438     FXMVECTOR T
 439 )
 440 {
 441 #if defined(_XM_NO_INTRINSICS_)
 442
 443     // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
 444     XMVECTOR Omega;
 445     XMVECTOR CosOmega;
 446     XMVECTOR SinOmega;
 447     XMVECTOR InvSinOmega;
 448     XMVECTOR V01;
 449     XMVECTOR C1000;
 450     XMVECTOR SignMask;
 451     XMVECTOR S0;
 452     XMVECTOR S1;
 453     XMVECTOR Sign;
 454     XMVECTOR Control;
 455     XMVECTOR Result;
 456     XMVECTOR Zero;
 457     CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
 458
 459     XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
 460
 461     CosOmega = XMQuaternionDot(Q0, Q1);
 462
 463     Zero = XMVectorZero();
 464     Control = XMVectorLess(CosOmega, Zero);
 465     Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
 466
 467     CosOmega = XMVectorMultiply(CosOmega, Sign);
 468
 469     Control = XMVectorLess(CosOmega, OneMinusEpsilon);
 470
 471     SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
 472     SinOmega = XMVectorSqrt(SinOmega);
 473
 474     Omega = XMVectorATan2(SinOmega, CosOmega);
 475
 476     SignMask = XMVectorSplatSignMask();
 477     C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
 478     V01 = XMVectorShiftLeft(T, Zero, 2);
 479     SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
 480     V01 = XMVectorXorInt(V01, SignMask);
 481     V01 = XMVectorAdd(C1000, V01);
 482
 483     InvSinOmega = XMVectorReciprocal(SinOmega);
 484
 485     S0 = XMVectorMultiply(V01, Omega);
 486     S0 = XMVectorSin(S0);
 487     S0 = XMVectorMultiply(S0, InvSinOmega);
 488
 489     S0 = XMVectorSelect(V01, S0, Control);
 490
 491     S1 = XMVectorSplatY(S0);
 492     S0 = XMVectorSplatX(S0);
 493
 494     S1 = XMVectorMultiply(S1, Sign);
 495
 496     Result = XMVectorMultiply(Q0, S0);
 497     Result = XMVectorMultiplyAdd(Q1, S1, Result);
 498
 499     return Result;
 500
 501 #elif defined(_XM_SSE_INTRINSICS_)
 502     // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
 503     XMVECTOR Omega;
 504     XMVECTOR CosOmega;
 505     XMVECTOR SinOmega;
 506     XMVECTOR V01;
 507     XMVECTOR S0;
 508     XMVECTOR S1;
 509     XMVECTOR Sign;
 510     XMVECTOR Control;
 511     XMVECTOR Result;
 512     XMVECTOR Zero;
 513     static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
 514     static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
 515     static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
 516
 517     XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
 518
 519     CosOmega = XMQuaternionDot(Q0, Q1);
 520
 521     Zero = XMVectorZero();
 522     Control = XMVectorLess(CosOmega, Zero);
 523     Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
 524
 525     CosOmega = _mm_mul_ps(CosOmega, Sign);
 526
 527     Control = XMVectorLess(CosOmega, OneMinusEpsilon);
 528
 529     SinOmega = _mm_mul_ps(CosOmega,CosOmega);
 530     SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
 531     SinOmega = _mm_sqrt_ps(SinOmega);
 532
 533     Omega = XMVectorATan2(SinOmega, CosOmega);
 534
 535     V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
 536     V01 = _mm_and_ps(V01,MaskXY);
 537     V01 = _mm_xor_ps(V01,SignMask2);
 538     V01 = _mm_add_ps(g_XMIdentityR0, V01);
 539
 540     S0 = _mm_mul_ps(V01, Omega);
 541     S0 = XMVectorSin(S0);
 542     S0 = _mm_div_ps(S0, SinOmega);
 543
 544     S0 = XMVectorSelect(V01, S0, Control);
 545
 546     S1 = XMVectorSplatY(S0);
 547     S0 = XMVectorSplatX(S0);
 548
 549     S1 = _mm_mul_ps(S1, Sign);
 550     Result = _mm_mul_ps(Q0, S0);
 551     S1 = _mm_mul_ps(S1, Q1);
 552     Result = _mm_add_ps(Result,S1);
 553     return Result;
 554 #else // _XM_VMX128_INTRINSICS_
 555 #endif // _XM_VMX128_INTRINSICS_
 556 }
 557
 558 //------------------------------------------------------------------------------
 559
 560 XMFINLINE XMVECTOR XMQuaternionSquad
 561 (
 562     FXMVECTOR Q0,
 563     FXMVECTOR Q1,
 564     FXMVECTOR Q2,
 565     CXMVECTOR Q3,
 566     FLOAT    t
 567 )
 568 {
 569     XMVECTOR T = XMVectorReplicate(t);
 570     return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
 571 }
 572
 573 //------------------------------------------------------------------------------
 574
 575 XMFINLINE XMVECTOR XMQuaternionSquadV
 576 (
 577     FXMVECTOR Q0,
 578     FXMVECTOR Q1,
 579     FXMVECTOR Q2,
 580     CXMVECTOR Q3,
 581     CXMVECTOR T
 582 )
 583 {
 584     XMVECTOR Q03;
 585     XMVECTOR Q12;
 586     XMVECTOR TP;
 587     XMVECTOR Two;
 588     XMVECTOR Result;
 589
 590     XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
 591
 592     TP = T;
 593     Two = XMVectorSplatConstant(2, 0);
 594
 595     Q03 = XMQuaternionSlerpV(Q0, Q3, T);
 596     Q12 = XMQuaternionSlerpV(Q1, Q2, T);
 597
 598     TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
 599     TP = XMVectorMultiply(TP, Two);
 600
 601     Result = XMQuaternionSlerpV(Q03, Q12, TP);
 602
 603     return Result;
 604
 605 }
 606
 607 //------------------------------------------------------------------------------
 608
 609 XMINLINE VOID XMQuaternionSquadSetup
 610 (
 611     XMVECTOR* pA,
 612     XMVECTOR* pB,
 613     XMVECTOR* pC,
 614     FXMVECTOR  Q0,
 615     FXMVECTOR  Q1,
 616     FXMVECTOR  Q2,
 617     CXMVECTOR  Q3
 618 )
 619 {
 620     XMVECTOR SQ0, SQ2, SQ3;
 621     XMVECTOR InvQ1, InvQ2;
 622     XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
 623     XMVECTOR ExpQ02, ExpQ13;
 624     XMVECTOR LS01, LS12, LS23;
 625     XMVECTOR LD01, LD12, LD23;
 626     XMVECTOR Control0, Control1, Control2;
 627     XMVECTOR NegativeOneQuarter;
 628
 629     XMASSERT(pA);
 630     XMASSERT(pB);
 631     XMASSERT(pC);
 632
 633     LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
 634     LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
 635     SQ2 = XMVectorNegate(Q2);
 636
 637     Control1 = XMVectorLess(LS12, LD12);
 638     SQ2 = XMVectorSelect(Q2, SQ2, Control1);
 639
 640     LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
 641     LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
 642     SQ0 = XMVectorNegate(Q0);
 643
 644     LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
 645     LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
 646     SQ3 = XMVectorNegate(Q3);
 647
 648     Control0 = XMVectorLess(LS01, LD01);
 649     Control2 = XMVectorLess(LS23, LD23);
 650
 651     SQ0 = XMVectorSelect(Q0, SQ0, Control0);
 652     SQ3 = XMVectorSelect(Q3, SQ3, Control2);
 653
 654     InvQ1 = XMQuaternionInverse(Q1);
 655     InvQ2 = XMQuaternionInverse(SQ2);
 656
 657     LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
 658     LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
 659     LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
 660     LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
 661
 662     NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
 663
 664     ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
 665     ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
 666     ExpQ02 = XMQuaternionExp(ExpQ02);
 667     ExpQ13 = XMQuaternionExp(ExpQ13);
 668
 669     *pA = XMQuaternionMultiply(Q1, ExpQ02);
 670     *pB = XMQuaternionMultiply(SQ2, ExpQ13);
 671     *pC = SQ2;
 672 }
 673
 674 //------------------------------------------------------------------------------
 675
 676 XMFINLINE XMVECTOR XMQuaternionBaryCentric
 677 (
 678     FXMVECTOR Q0,
 679     FXMVECTOR Q1,
 680     FXMVECTOR Q2,
 681     FLOAT    f,
 682     FLOAT    g
 683 )
 684 {
 685     XMVECTOR Q01;
 686     XMVECTOR Q02;
 687     FLOAT    s;
 688     XMVECTOR Result;
 689
 690     s = f + g;
 691
 692     if ((s < 0.00001f) && (s > -0.00001f))
 693     {
 694         Result = Q0;
 695     }
 696     else
 697     {
 698         Q01 = XMQuaternionSlerp(Q0, Q1, s);
 699         Q02 = XMQuaternionSlerp(Q0, Q2, s);
 700
 701         Result = XMQuaternionSlerp(Q01, Q02, g / s);
 702     }
 703
 704     return Result;
 705 }
 706
 707 //------------------------------------------------------------------------------
 708
 709 XMFINLINE XMVECTOR XMQuaternionBaryCentricV
 710 (
 711     FXMVECTOR Q0,
 712     FXMVECTOR Q1,
 713     FXMVECTOR Q2,
 714     CXMVECTOR F,
 715     CXMVECTOR G
 716 )
 717 {
 718     XMVECTOR Q01;
 719     XMVECTOR Q02;
 720     XMVECTOR S, GS;
 721     XMVECTOR Epsilon;
 722     XMVECTOR Result;
 723
 724     XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
 725     XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
 726
 727     Epsilon = XMVectorSplatConstant(1, 16);
 728
 729     S = XMVectorAdd(F, G);
 730
 731     if (XMVector4InBounds(S, Epsilon))
 732     {
 733         Result = Q0;
 734     }
 735     else
 736     {
 737         Q01 = XMQuaternionSlerpV(Q0, Q1, S);
 738         Q02 = XMQuaternionSlerpV(Q0, Q2, S);
 739         GS = XMVectorReciprocal(S);
 740         GS = XMVectorMultiply(G, GS);
 741
 742         Result = XMQuaternionSlerpV(Q01, Q02, GS);
 743     }
 744
 745     return Result;
 746 }
 747
 748 //------------------------------------------------------------------------------
 749 // Transformation operations
 750 //------------------------------------------------------------------------------
 751
 752 //------------------------------------------------------------------------------
 753
 754 XMFINLINE XMVECTOR XMQuaternionIdentity()
 755 {
 756 #if defined(_XM_NO_INTRINSICS_)
 757     return g_XMIdentityR3.v;
 758 #elif defined(_XM_SSE_INTRINSICS_)
 759     return g_XMIdentityR3;
 760 #else // _XM_VMX128_INTRINSICS_
 761 #endif // _XM_VMX128_INTRINSICS_
 762 }
 763
 764 //------------------------------------------------------------------------------
 765
 766 XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
 767 (
 768     FLOAT Pitch,
 769     FLOAT Yaw,
 770     FLOAT Roll
 771 )
 772 {
 773     XMVECTOR Angles;
 774     XMVECTOR Q;
 775
 776     Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
 777     Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
 778
 779     return Q;
 780 }
 781
 782 //------------------------------------------------------------------------------
 783
 784 XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
 785 (
 786     FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
 787 )
 788 {
 789 #if defined(_XM_NO_INTRINSICS_)
 790
 791     XMVECTOR                Q, Q0, Q1;
 792     XMVECTOR                P0, P1, Y0, Y1, R0, R1;
 793     XMVECTOR                HalfAngles;
 794     XMVECTOR                SinAngles, CosAngles;
 795     static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
 796     static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
 797     static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
 798     static CONST XMVECTOR   Sign = {1.0f, -1.0f, -1.0f, 1.0f};
 799
 800     HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
 801     XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
 802
 803     P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
 804     Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
 805     R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
 806     P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
 807     Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
 808     R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
 809
 810     Q1 = XMVectorMultiply(P1, Sign);
 811     Q0 = XMVectorMultiply(P0, Y0);
 812     Q1 = XMVectorMultiply(Q1, Y1);
 813     Q0 = XMVectorMultiply(Q0, R0);
 814     Q = XMVectorMultiplyAdd(Q1, R1, Q0);
 815
 816     return Q;
 817
 818 #elif defined(_XM_SSE_INTRINSICS_)
 819     XMVECTOR                Q, Q0, Q1;
 820     XMVECTOR                P0, P1, Y0, Y1, R0, R1;
 821     XMVECTOR                HalfAngles;
 822     XMVECTOR                SinAngles, CosAngles;
 823     static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
 824     static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
 825     static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
 826     static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
 827
 828     HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
 829     XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
 830
 831     P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
 832     Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
 833     R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
 834     P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
 835     Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
 836     R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
 837
 838     Q1 = _mm_mul_ps(P1, Sign);
 839     Q0 = _mm_mul_ps(P0, Y0);
 840     Q1 = _mm_mul_ps(Q1, Y1);
 841     Q0 = _mm_mul_ps(Q0, R0);
 842     Q = _mm_mul_ps(Q1, R1);
 843     Q = _mm_add_ps(Q,Q0);
 844     return Q;
 845 #else // _XM_VMX128_INTRINSICS_
 846 #endif // _XM_VMX128_INTRINSICS_
 847 }
 848
 849 //------------------------------------------------------------------------------
 850
 851 XMFINLINE XMVECTOR XMQuaternionRotationNormal
 852 (
 853     FXMVECTOR NormalAxis,
 854     FLOAT    Angle
 855 )
 856 {
 857 #if defined(_XM_NO_INTRINSICS_)
 858
 859     XMVECTOR Q;
 860     XMVECTOR N;
 861     XMVECTOR Scale;
 862
 863     N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
 864
 865     XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
 866
 867     Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
 868
 869     Q = XMVectorMultiply(N, Scale);
 870
 871     return Q;
 872
 873 #elif defined(_XM_SSE_INTRINSICS_)
 874     XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
 875     N = _mm_or_ps(N,g_XMIdentityR3);
 876     XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
 877     XMVECTOR vSine;
 878     XMVECTOR vCosine;
 879     XMVectorSinCos(&vSine,&vCosine,Scale);
 880     Scale = _mm_and_ps(vSine,g_XMMask3);
 881     vCosine = _mm_and_ps(vCosine,g_XMMaskW);
 882     Scale = _mm_or_ps(Scale,vCosine);
 883     N = _mm_mul_ps(N,Scale);
 884     return N;
 885 #else // _XM_VMX128_INTRINSICS_
 886 #endif // _XM_VMX128_INTRINSICS_
 887 }
 888
 889 //------------------------------------------------------------------------------
 890
 891 XMFINLINE XMVECTOR XMQuaternionRotationAxis
 892 (
 893     FXMVECTOR Axis,
 894     FLOAT    Angle
 895 )
 896 {
 897 #if defined(_XM_NO_INTRINSICS_)
 898
 899     XMVECTOR Normal;
 900     XMVECTOR Q;
 901
 902     XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
 903     XMASSERT(!XMVector3IsInfinite(Axis));
 904
 905     Normal = XMVector3Normalize(Axis);
 906     Q = XMQuaternionRotationNormal(Normal, Angle);
 907
 908     return Q;
 909
 910 #elif defined(_XM_SSE_INTRINSICS_)
 911     XMVECTOR Normal;
 912     XMVECTOR Q;
 913
 914     XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
 915     XMASSERT(!XMVector3IsInfinite(Axis));
 916
 917     Normal = XMVector3Normalize(Axis);
 918     Q = XMQuaternionRotationNormal(Normal, Angle);
 919     return Q;
 920 #else // _XM_VMX128_INTRINSICS_
 921 #endif // _XM_VMX128_INTRINSICS_
 922 }
 923
 924 //------------------------------------------------------------------------------
 925
 926 XMINLINE XMVECTOR XMQuaternionRotationMatrix
 927 (
 928     CXMMATRIX M
 929 )
 930 {
 931 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
 932
 933     XMVECTOR Q0, Q1, Q2;
 934     XMVECTOR M00, M11, M22;
 935     XMVECTOR CQ0, CQ1, C;
 936     XMVECTOR CX, CY, CZ, CW;
 937     XMVECTOR SQ1, Scale;
 938     XMVECTOR Rsq, Sqrt, VEqualsNaN;
 939     XMVECTOR A, B, P;
 940     XMVECTOR PermuteSplat, PermuteSplatT;
 941     XMVECTOR SignB, SignBT;
 942     XMVECTOR PermuteControl, PermuteControlT;
 943     XMVECTOR Result;
 944     static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
 945     static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
 946     static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
 947     static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
 948     static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
 949     static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
 950     static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
 951     static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
 952     static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
 953     static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
 954     static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
 955     static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
 956     static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
 957     static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
 958     static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
 959     static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
 960     static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
 961     static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
 962     static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
 963     static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
 964
 965     M00 = XMVectorSplatX(M.r[0]);
 966     M11 = XMVectorSplatY(M.r[1]);
 967     M22 = XMVectorSplatZ(M.r[2]);
 968
 969     Q0 = XMVectorMultiply(SignPNNP.v, M00);
 970     Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);
 971     Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);
 972
 973     Q1 = XMVectorAdd(Q0, g_XMOne.v);
 974
 975     Rsq = XMVectorReciprocalSqrt(Q1);
 976     VEqualsNaN = XMVectorIsNaN(Rsq);
 977     Sqrt = XMVectorMultiply(Q1, Rsq);
 978     Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);
 979
 980     Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
 981
 982     SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
 983
 984     CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
 985     CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);
 986     C = XMVectorGreaterOrEqual(CQ0, CQ1);
 987
 988     CX = XMVectorSplatX(C);
 989     CY = XMVectorSplatY(C);
 990     CZ = XMVectorSplatZ(C);
 991     CW = XMVectorSplatW(C);
 992
 993     PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
 994     SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);
 995     PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
 996
 997     PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
 998     SignB = XMVectorSelect(SignB, SignNPPP.v, CX);
 999     PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
1000
1001     PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
1002     SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);
1003     PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
1004
1005     PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
1006     SignB = XMVectorSelect(SignB, SignBT, CX);
1007     PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
1008
1009     PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
1010     SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);
1011     PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
1012
1013     Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
1014
1015     P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v);  // {M10, M12, M20, M21}
1016     A = XMVectorPermute(M.r[0], P, PermuteA.v);       // {M01, M12, M20, M03}
1017     B = XMVectorPermute(M.r[0], P, PermuteB.v);       // {M10, M21, M02, M03}
1018
1019     Q2 = XMVectorMultiplyAdd(SignB, B, A);
1020     Q2 = XMVectorMultiply(Q2, Scale);
1021
1022     Result = XMVectorPermute(Q1, Q2, PermuteControl);
1023
1024     return Result;
1025
1026 #else // _XM_VMX128_INTRINSICS_
1027 #endif // _XM_VMX128_INTRINSICS_
1028 }
1029
1030 //------------------------------------------------------------------------------
1031 // Conversion operations
1032 //------------------------------------------------------------------------------
1033
1034 //------------------------------------------------------------------------------
1035
1036 XMFINLINE VOID XMQuaternionToAxisAngle
1037 (
1038     XMVECTOR* pAxis,
1039     FLOAT*    pAngle,
1040     FXMVECTOR  Q
1041 )
1042 {
1043     XMASSERT(pAxis);
1044     XMASSERT(pAngle);
1045
1046     *pAxis = Q;
1047
1048 #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1049     *pAngle = 2.0f * acosf(XMVectorGetW(Q));
1050 #else
1051     *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
1052 #endif
1053 }
1054
1055 /****************************************************************************
1056  *
1057  * Plane
1058  *
1059  ****************************************************************************/
1060
1061 //------------------------------------------------------------------------------
1062 // Comparison operations
1063 //------------------------------------------------------------------------------
1064
1065 //------------------------------------------------------------------------------
1066
1067 XMFINLINE BOOL XMPlaneEqual
1068 (
1069     FXMVECTOR P1,
1070     FXMVECTOR P2
1071 )
1072 {
1073     return XMVector4Equal(P1, P2);
1074 }
1075
1076 //------------------------------------------------------------------------------
1077
1078 XMFINLINE BOOL XMPlaneNearEqual
1079 (
1080     FXMVECTOR P1,
1081     FXMVECTOR P2,
1082     FXMVECTOR Epsilon
1083 )
1084 {
1085     XMVECTOR NP1 = XMPlaneNormalize(P1);
1086     XMVECTOR NP2 = XMPlaneNormalize(P2);
1087     return XMVector4NearEqual(NP1, NP2, Epsilon);
1088 }
1089
1090 //------------------------------------------------------------------------------
1091
1092 XMFINLINE BOOL XMPlaneNotEqual
1093 (
1094     FXMVECTOR P1,
1095     FXMVECTOR P2
1096 )
1097 {
1098     return XMVector4NotEqual(P1, P2);
1099 }
1100
1101 //------------------------------------------------------------------------------
1102
1103 XMFINLINE BOOL XMPlaneIsNaN
1104 (
1105     FXMVECTOR P
1106 )
1107 {
1108     return XMVector4IsNaN(P);
1109 }
1110
1111 //------------------------------------------------------------------------------
1112
1113 XMFINLINE BOOL XMPlaneIsInfinite
1114 (
1115     FXMVECTOR P
1116 )
1117 {
1118     return XMVector4IsInfinite(P);
1119 }
1120
1121 //------------------------------------------------------------------------------
1122 // Computation operations
1123 //------------------------------------------------------------------------------
1124
1125 //------------------------------------------------------------------------------
1126
1127 XMFINLINE XMVECTOR XMPlaneDot
1128 (
1129     FXMVECTOR P,
1130     FXMVECTOR V
1131 )
1132 {
1133 #if defined(_XM_NO_INTRINSICS_)
1134
1135     return XMVector4Dot(P, V);
1136
1137 #elif defined(_XM_SSE_INTRINSICS_)
1138     __m128 vTemp2 = V;
1139     __m128 vTemp = _mm_mul_ps(P,vTemp2);
1140     vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1141     vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
1142     vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
1143     vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
1144     return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
1145 #else // _XM_VMX128_INTRINSICS_
1146 #endif // _XM_VMX128_INTRINSICS_
1147 }
1148
1149 //------------------------------------------------------------------------------
1150
1151 XMFINLINE XMVECTOR XMPlaneDotCoord
1152 (
1153     FXMVECTOR P,
1154     FXMVECTOR V
1155 )
1156 {
1157 #if defined(_XM_NO_INTRINSICS_)
1158
1159     XMVECTOR V3;
1160     XMVECTOR Result;
1161
1162     // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
1163     V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
1164     Result = XMVector4Dot(P, V3);
1165
1166     return Result;
1167
1168 #elif defined(_XM_SSE_INTRINSICS_)
1169     XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
1170     vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
1171     XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
1172     vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1173     vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
1174     vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
1175     vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
1176     return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
1177 #else // _XM_VMX128_INTRINSICS_
1178 #endif // _XM_VMX128_INTRINSICS_
1179 }
1180
1181 //------------------------------------------------------------------------------
1182
1183 XMFINLINE XMVECTOR XMPlaneDotNormal
1184 (
1185     FXMVECTOR P,
1186     FXMVECTOR V
1187 )
1188 {
1189     return XMVector3Dot(P, V);
1190 }
1191
1192 //------------------------------------------------------------------------------
1193 // XMPlaneNormalizeEst uses a reciprocal estimate and
1194 // returns QNaN on zero and infinite vectors.
1195
1196 XMFINLINE XMVECTOR XMPlaneNormalizeEst
1197 (
1198     FXMVECTOR P
1199 )
1200 {
1201 #if defined(_XM_NO_INTRINSICS_)
1202
1203     XMVECTOR Result;
1204     Result = XMVector3ReciprocalLength(P);
1205     Result = XMVectorMultiply(P, Result);
1206     return Result;
1207
1208 #elif defined(_XM_SSE_INTRINSICS_)
1209     // Perform the dot product
1210     XMVECTOR vDot = _mm_mul_ps(P,P);
1211     // x=Dot.y, y=Dot.z
1212     XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
1213     // Result.x = x+y
1214     vDot = _mm_add_ss(vDot,vTemp);
1215     // x=Dot.z
1216     vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1217     // Result.x = (x+y)+z
1218     vDot = _mm_add_ss(vDot,vTemp);
1219     // Splat x
1220         vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
1221     // Get the reciprocal
1222     vDot = _mm_rsqrt_ps(vDot);
1223     // Get the reciprocal
1224     vDot = _mm_mul_ps(vDot,P);
1225     return vDot;
1226 #else // _XM_VMX128_INTRINSICS_
1227 #endif // _XM_VMX128_INTRINSICS_
1228 }
1229
1230 //------------------------------------------------------------------------------
1231
1232 XMFINLINE XMVECTOR XMPlaneNormalize
1233 (
1234     FXMVECTOR P
1235 )
1236 {
1237 #if defined(_XM_NO_INTRINSICS_)
1238     FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
1239     // Prevent divide by zero
1240     if (fLengthSq) {
1241         fLengthSq = 1.0f/fLengthSq;
1242     }
1243     {
1244     XMVECTOR vResult = {
1245         P.vector4_f32[0]*fLengthSq,
1246         P.vector4_f32[1]*fLengthSq,
1247         P.vector4_f32[2]*fLengthSq,
1248         P.vector4_f32[3]*fLengthSq
1249     };
1250     return vResult;
1251     }
1252 #elif defined(_XM_SSE_INTRINSICS_)
1253     // Perform the dot product on x,y and z only
1254     XMVECTOR vLengthSq = _mm_mul_ps(P,P);
1255     XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
1256     vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1257     vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1258     vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1259         vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
1260     // Prepare for the division
1261     XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1262     // Failsafe on zero (Or epsilon) length planes
1263     // If the length is infinity, set the elements to zero
1264     vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
1265     // Reciprocal mul to perform the normalization
1266     vResult = _mm_div_ps(P,vResult);
1267     // Any that are infinity, set to zero
1268     vResult = _mm_and_ps(vResult,vLengthSq);
1269     return vResult;
1270 #else // _XM_VMX128_INTRINSICS_
1271 #endif // _XM_VMX128_INTRINSICS_
1272 }
1273
1274 //------------------------------------------------------------------------------
1275
1276 XMFINLINE XMVECTOR XMPlaneIntersectLine
1277 (
1278     FXMVECTOR P,
1279     FXMVECTOR LinePoint1,
1280     FXMVECTOR LinePoint2
1281 )
1282 {
1283 #if defined(_XM_NO_INTRINSICS_)
1284
1285     XMVECTOR V1;
1286     XMVECTOR V2;
1287     XMVECTOR D;
1288     XMVECTOR ReciprocalD;
1289     XMVECTOR VT;
1290     XMVECTOR Point;
1291     XMVECTOR Zero;
1292     XMVECTOR Control;
1293     XMVECTOR Result;
1294
1295     V1 = XMVector3Dot(P, LinePoint1);
1296     V2 = XMVector3Dot(P, LinePoint2);
1297     D = XMVectorSubtract(V1, V2);
1298
1299     ReciprocalD = XMVectorReciprocal(D);
1300     VT = XMPlaneDotCoord(P, LinePoint1);
1301     VT = XMVectorMultiply(VT, ReciprocalD);
1302
1303     Point = XMVectorSubtract(LinePoint2, LinePoint1);
1304     Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
1305
1306     Zero = XMVectorZero();
1307     Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
1308
1309     Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
1310
1311     return Result;
1312
1313 #elif defined(_XM_SSE_INTRINSICS_)
1314     XMVECTOR V1;
1315     XMVECTOR V2;
1316     XMVECTOR D;
1317     XMVECTOR VT;
1318     XMVECTOR Point;
1319     XMVECTOR Zero;
1320     XMVECTOR Control;
1321     XMVECTOR Result;
1322
1323     V1 = XMVector3Dot(P, LinePoint1);
1324     V2 = XMVector3Dot(P, LinePoint2);
1325     D = _mm_sub_ps(V1, V2);
1326
1327     VT = XMPlaneDotCoord(P, LinePoint1);
1328     VT = _mm_div_ps(VT, D);
1329
1330     Point = _mm_sub_ps(LinePoint2, LinePoint1);
1331     Point = _mm_mul_ps(Point,VT);
1332     Point = _mm_add_ps(Point,LinePoint1);
1333     Zero = XMVectorZero();
1334     Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
1335     Result = XMVectorSelect(Point, g_XMQNaN, Control);
1336     return Result;
1337 #else // _XM_VMX128_INTRINSICS_
1338 #endif // _XM_VMX128_INTRINSICS_
1339 }
1340
1341 //------------------------------------------------------------------------------
1342
1343 XMINLINE VOID XMPlaneIntersectPlane
1344 (
1345     XMVECTOR* pLinePoint1,
1346     XMVECTOR* pLinePoint2,
1347     FXMVECTOR  P1,
1348     FXMVECTOR  P2
1349 )
1350 {
1351 #if defined(_XM_NO_INTRINSICS_)
1352
1353     XMVECTOR V1;
1354     XMVECTOR V2;
1355     XMVECTOR V3;
1356     XMVECTOR LengthSq;
1357     XMVECTOR RcpLengthSq;
1358     XMVECTOR Point;
1359     XMVECTOR P1W;
1360     XMVECTOR P2W;
1361     XMVECTOR Control;
1362     XMVECTOR LinePoint1;
1363     XMVECTOR LinePoint2;
1364
1365     XMASSERT(pLinePoint1);
1366     XMASSERT(pLinePoint2);
1367
1368     V1 = XMVector3Cross(P2, P1);
1369
1370     LengthSq = XMVector3LengthSq(V1);
1371
1372     V2 = XMVector3Cross(P2, V1);
1373
1374     P1W = XMVectorSplatW(P1);
1375     Point = XMVectorMultiply(V2, P1W);
1376
1377     V3 = XMVector3Cross(V1, P1);
1378
1379     P2W = XMVectorSplatW(P2);
1380     Point = XMVectorMultiplyAdd(V3, P2W, Point);
1381
1382     RcpLengthSq = XMVectorReciprocal(LengthSq);
1383     LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
1384
1385     LinePoint2 = XMVectorAdd(LinePoint1, V1);
1386
1387     Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
1388     *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
1389     *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
1390
1391 #elif defined(_XM_SSE_INTRINSICS_)
1392     XMASSERT(pLinePoint1);
1393     XMASSERT(pLinePoint2);
1394     XMVECTOR V1;
1395     XMVECTOR V2;
1396     XMVECTOR V3;
1397     XMVECTOR LengthSq;
1398     XMVECTOR Point;
1399     XMVECTOR P1W;
1400     XMVECTOR P2W;
1401     XMVECTOR Control;
1402     XMVECTOR LinePoint1;
1403     XMVECTOR LinePoint2;
1404
1405     V1 = XMVector3Cross(P2, P1);
1406
1407     LengthSq = XMVector3LengthSq(V1);
1408
1409     V2 = XMVector3Cross(P2, V1);
1410
1411     P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
1412     Point = _mm_mul_ps(V2, P1W);
1413
1414     V3 = XMVector3Cross(V1, P1);
1415
1416     P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
1417     V3 = _mm_mul_ps(V3,P2W);
1418     Point = _mm_add_ps(Point,V3);
1419     LinePoint1 = _mm_div_ps(Point,LengthSq);
1420
1421     LinePoint2 = _mm_add_ps(LinePoint1, V1);
1422
1423     Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
1424     *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
1425     *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
1426 #else // _XM_VMX128_INTRINSICS_
1427 #endif // _XM_VMX128_INTRINSICS_
1428 }
1429
1430 //------------------------------------------------------------------------------
1431
1432 XMFINLINE XMVECTOR XMPlaneTransform
1433 (
1434     FXMVECTOR P,
1435     CXMMATRIX M
1436 )
1437 {
1438 #if defined(_XM_NO_INTRINSICS_)
1439
1440     XMVECTOR X;
1441     XMVECTOR Y;
1442     XMVECTOR Z;
1443     XMVECTOR W;
1444     XMVECTOR Result;
1445
1446     W = XMVectorSplatW(P);
1447     Z = XMVectorSplatZ(P);
1448     Y = XMVectorSplatY(P);
1449     X = XMVectorSplatX(P);
1450
1451     Result = XMVectorMultiply(W, M.r[3]);
1452     Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
1453     Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
1454     Result = XMVectorMultiplyAdd(X, M.r[0], Result);
1455
1456     return Result;
1457
1458 #elif defined(_XM_SSE_INTRINSICS_)
1459     XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
1460     XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
1461     XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
1462     XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
1463     X = _mm_mul_ps(X, M.r[0]);
1464     Y = _mm_mul_ps(Y, M.r[1]);
1465     Z = _mm_mul_ps(Z, M.r[2]);
1466     W = _mm_mul_ps(W, M.r[3]);
1467     X = _mm_add_ps(X,Z);
1468     Y = _mm_add_ps(Y,W);
1469     X = _mm_add_ps(X,Y);
1470     return X;
1471 #else // _XM_VMX128_INTRINSICS_
1472 #endif // _XM_VMX128_INTRINSICS_
1473 }
1474
1475 //------------------------------------------------------------------------------
1476
1477 XMFINLINE XMFLOAT4* XMPlaneTransformStream
1478 (
1479     XMFLOAT4*       pOutputStream,
1480     size_t          OutputStride,
1481     CONST XMFLOAT4* pInputStream,
1482     size_t          InputStride,
1483     size_t          PlaneCount,
1484     CXMMATRIX       M
1485 )
1486 {
1487     return XMVector4TransformStream(pOutputStream,
1488                                     OutputStride,
1489                                     pInputStream,
1490                                     InputStride,
1491                                     PlaneCount,
1492                                     M);
1493 }
1494
1495 //------------------------------------------------------------------------------
1496 // Conversion operations
1497 //------------------------------------------------------------------------------
1498
1499 //------------------------------------------------------------------------------
1500
1501 XMFINLINE XMVECTOR XMPlaneFromPointNormal
1502 (
1503     FXMVECTOR Point,
1504     FXMVECTOR Normal
1505 )
1506 {
1507 #if defined(_XM_NO_INTRINSICS_)
1508
1509     XMVECTOR W;
1510     XMVECTOR Result;
1511
1512     W = XMVector3Dot(Point, Normal);
1513     W = XMVectorNegate(W);
1514     Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
1515
1516     return Result;
1517
1518 #elif defined(_XM_SSE_INTRINSICS_)
1519     XMVECTOR W;
1520     XMVECTOR Result;
1521     W = XMVector3Dot(Point,Normal);
1522     W = _mm_mul_ps(W,g_XMNegativeOne);
1523     Result = _mm_and_ps(Normal,g_XMMask3);
1524     W = _mm_and_ps(W,g_XMMaskW);
1525     Result = _mm_or_ps(Result,W);
1526     return Result;
1527 #else // _XM_VMX128_INTRINSICS_
1528 #endif // _XM_VMX128_INTRINSICS_
1529 }
1530
1531 //------------------------------------------------------------------------------
1532
1533 XMFINLINE XMVECTOR XMPlaneFromPoints
1534 (
1535     FXMVECTOR Point1,
1536     FXMVECTOR Point2,
1537     FXMVECTOR Point3
1538 )
1539 {
1540 #if defined(_XM_NO_INTRINSICS_)
1541
1542     XMVECTOR N;
1543     XMVECTOR D;
1544     XMVECTOR V21;
1545     XMVECTOR V31;
1546     XMVECTOR Result;
1547
1548     V21 = XMVectorSubtract(Point1, Point2);
1549     V31 = XMVectorSubtract(Point1, Point3);
1550
1551     N = XMVector3Cross(V21, V31);
1552     N = XMVector3Normalize(N);
1553
1554     D = XMPlaneDotNormal(N, Point1);
1555     D = XMVectorNegate(D);
1556
1557     Result = XMVectorSelect(D, N, g_XMSelect1110.v);
1558
1559     return Result;
1560
1561 #elif defined(_XM_SSE_INTRINSICS_)
1562     XMVECTOR N;
1563     XMVECTOR D;
1564     XMVECTOR V21;
1565     XMVECTOR V31;
1566     XMVECTOR Result;
1567
1568     V21 = _mm_sub_ps(Point1, Point2);
1569     V31 = _mm_sub_ps(Point1, Point3);
1570
1571     N = XMVector3Cross(V21, V31);
1572     N = XMVector3Normalize(N);
1573
1574     D = XMPlaneDotNormal(N, Point1);
1575     D = _mm_mul_ps(D,g_XMNegativeOne);
1576     N = _mm_and_ps(N,g_XMMask3);
1577     D = _mm_and_ps(D,g_XMMaskW);
1578     Result = _mm_or_ps(D,N);
1579     return Result;
1580 #else // _XM_VMX128_INTRINSICS_
1581 #endif // _XM_VMX128_INTRINSICS_
1582 }
1583
1584 /****************************************************************************
1585  *
1586  * Color
1587  *
1588  ****************************************************************************/
1589
1590 //------------------------------------------------------------------------------
1591 // Comparison operations
1592 //------------------------------------------------------------------------------
1593
1594 //------------------------------------------------------------------------------
1595
1596 XMFINLINE BOOL XMColorEqual
1597 (
1598     FXMVECTOR C1,
1599     FXMVECTOR C2
1600 )
1601 {
1602     return XMVector4Equal(C1, C2);
1603 }
1604
1605 //------------------------------------------------------------------------------
1606
1607 XMFINLINE BOOL XMColorNotEqual
1608 (
1609     FXMVECTOR C1,
1610     FXMVECTOR C2
1611 )
1612 {
1613     return XMVector4NotEqual(C1, C2);
1614 }
1615
1616 //------------------------------------------------------------------------------
1617
1618 XMFINLINE BOOL XMColorGreater
1619 (
1620     FXMVECTOR C1,
1621     FXMVECTOR C2
1622 )
1623 {
1624     return XMVector4Greater(C1, C2);
1625 }
1626
1627 //------------------------------------------------------------------------------
1628
1629 XMFINLINE BOOL XMColorGreaterOrEqual
1630 (
1631     FXMVECTOR C1,
1632     FXMVECTOR C2
1633 )
1634 {
1635     return XMVector4GreaterOrEqual(C1, C2);
1636 }
1637
1638 //------------------------------------------------------------------------------
1639
1640 XMFINLINE BOOL XMColorLess
1641 (
1642     FXMVECTOR C1,
1643     FXMVECTOR C2
1644 )
1645 {
1646     return XMVector4Less(C1, C2);
1647 }
1648
1649 //------------------------------------------------------------------------------
1650
1651 XMFINLINE BOOL XMColorLessOrEqual
1652 (
1653     FXMVECTOR C1,
1654     FXMVECTOR C2
1655 )
1656 {
1657     return XMVector4LessOrEqual(C1, C2);
1658 }
1659
1660 //------------------------------------------------------------------------------
1661
1662 XMFINLINE BOOL XMColorIsNaN
1663 (
1664     FXMVECTOR C
1665 )
1666 {
1667     return XMVector4IsNaN(C);
1668 }
1669
1670 //------------------------------------------------------------------------------
1671
1672 XMFINLINE BOOL XMColorIsInfinite
1673 (
1674     FXMVECTOR C
1675 )
1676 {
1677     return XMVector4IsInfinite(C);
1678 }
1679
1680 //------------------------------------------------------------------------------
1681 // Computation operations
1682 //------------------------------------------------------------------------------
1683
1684 //------------------------------------------------------------------------------
1685
1686 XMFINLINE XMVECTOR XMColorNegative
1687 (
1688     FXMVECTOR vColor
1689 )
1690 {
1691 #if defined(_XM_NO_INTRINSICS_)
1692 //    XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
1693 //    XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
1694     XMVECTOR vResult = {
1695         1.0f - vColor.vector4_f32[0],
1696         1.0f - vColor.vector4_f32[1],
1697         1.0f - vColor.vector4_f32[2],
1698         vColor.vector4_f32[3]
1699     };
1700     return vResult;
1701
1702 #elif defined(_XM_SSE_INTRINSICS_)
1703     // Negate only x,y and z.
1704     XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
1705     // Add 1,1,1,0 to -x,-y,-z,w
1706         return _mm_add_ps(vTemp,g_XMOne3);
1707 #else // _XM_VMX128_INTRINSICS_
1708 #endif // _XM_VMX128_INTRINSICS_
1709 }
1710
1711 //------------------------------------------------------------------------------
1712
1713 XMFINLINE XMVECTOR XMColorModulate
1714 (
1715     FXMVECTOR C1,
1716     FXMVECTOR C2
1717 )
1718 {
1719     return XMVectorMultiply(C1, C2);
1720 }
1721
1722 //------------------------------------------------------------------------------
1723
1724 XMFINLINE XMVECTOR XMColorAdjustSaturation
1725 (
1726     FXMVECTOR vColor,
1727     FLOAT    fSaturation
1728 )
1729 {
1730 #if defined(_XM_NO_INTRINSICS_)
1731     CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1732
1733     // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
1734     // Result = (C - Luminance) * Saturation + Luminance;
1735
1736     FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
1737     XMVECTOR vResult = {
1738         ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
1739         ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
1740         ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
1741         vColor.vector4_f32[3]};
1742     return vResult;
1743
1744 #elif defined(_XM_SSE_INTRINSICS_)
1745     static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1746 // Mul RGB by intensity constants
1747     XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
1748 // vResult.x = vLuminance.y, vResult.y = vLuminance.y,
1749 // vResult.z = vLuminance.z, vResult.w = vLuminance.z
1750     XMVECTOR vResult = vLuminance;
1751     vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
1752 // vLuminance.x += vLuminance.y
1753     vLuminance = _mm_add_ss(vLuminance,vResult);
1754 // Splat vLuminance.z
1755     vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
1756 // vLuminance.x += vLuminance.z (Dot product)
1757     vLuminance = _mm_add_ss(vLuminance,vResult);
1758 // Splat vLuminance
1759     vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
1760 // Splat fSaturation
1761     XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
1762 // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
1763     vResult = _mm_sub_ps(vColor,vLuminance);
1764     vResult = _mm_mul_ps(vResult,vSaturation);
1765     vResult = _mm_add_ps(vResult,vLuminance);
1766 // Retain w from the source color
1767     vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1768     vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1769     return vResult;
1770 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1771 #endif // _XM_VMX128_INTRINSICS_
1772 }
1773
1774 //------------------------------------------------------------------------------
1775
1776 XMFINLINE XMVECTOR XMColorAdjustContrast
1777 (
1778     FXMVECTOR vColor,
1779     FLOAT    fContrast
1780 )
1781 {
1782 #if defined(_XM_NO_INTRINSICS_)
1783     // Result = (vColor - 0.5f) * fContrast + 0.5f;
1784     XMVECTOR vResult = {
1785         ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
1786         ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
1787         ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
1788         vColor.vector4_f32[3]        // Leave W untouched
1789     };
1790     return vResult;
1791
1792 #elif defined(_XM_SSE_INTRINSICS_)
1793     XMVECTOR vScale = _mm_set_ps1(fContrast);           // Splat the scale
1794     XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf);  // Subtract 0.5f from the source (Saving source)
1795     vResult = _mm_mul_ps(vResult,vScale);               // Mul by scale
1796     vResult = _mm_add_ps(vResult,g_XMOneHalf);          // Add 0.5f
1797 // Retain w from the source color
1798     vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1799     vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1800     return vResult;
1801 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1802 #endif // _XM_VMX128_INTRINSICS_
1803 }
1804
1805 /****************************************************************************
1806  *
1807  * Miscellaneous
1808  *
1809  ****************************************************************************/
1810
1811 //------------------------------------------------------------------------------
1812
1813 XMINLINE BOOL XMVerifyCPUSupport()
1814 {
1815 #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
1816         return TRUE;
1817 #else // _XM_SSE_INTRINSICS_
1818         // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
1819         // Detecting SSE2 on older versions of Windows would require using cpuid directly
1820         return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
1821 #endif
1822 }
1823
1824
1825 //------------------------------------------------------------------------------
1826
1827 #define XMASSERT_LINE_STRING_SIZE 16
1828
1829 XMINLINE VOID XMAssert
1830 (
1831     CONST CHAR* pExpression,
1832     CONST CHAR* pFileName,
1833     UINT        LineNumber
1834 )
1835 {
1836     CHAR        aLineString[XMASSERT_LINE_STRING_SIZE];
1837     CHAR*       pLineString;
1838     UINT        Line;
1839
1840     aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
1841     aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
1842     for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
1843          Line != 0 && pLineString >= aLineString;
1844          Line /= 10, pLineString--)
1845     {
1846         *pLineString = (CHAR)('0' + (Line % 10));
1847     }
1848
1849 #ifndef NO_OUTPUT_DEBUG_STRING
1850     OutputDebugStringA("Assertion failed: ");
1851     OutputDebugStringA(pExpression);
1852     OutputDebugStringA(", file ");
1853     OutputDebugStringA(pFileName);
1854     OutputDebugStringA(", line ");
1855     OutputDebugStringA(pLineString + 1);
1856     OutputDebugStringA("\r\n");
1857 #else
1858     DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
1859 #endif
1860
1861     __debugbreak();
1862 }
1863
1864 //------------------------------------------------------------------------------
1865
1866 XMFINLINE XMVECTOR XMFresnelTerm
1867 (
1868     FXMVECTOR CosIncidentAngle,
1869     FXMVECTOR RefractionIndex
1870 )
1871 {
1872 #if defined(_XM_NO_INTRINSICS_)
1873
1874     XMVECTOR G;
1875     XMVECTOR D, S;
1876     XMVECTOR V0, V1, V2, V3;
1877     XMVECTOR Result;
1878
1879     // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
1880     // c = CosIncidentAngle
1881     // g = sqrt(c^2 + RefractionIndex^2 - 1)
1882
1883     XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
1884
1885     G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
1886     G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
1887     G = XMVectorAbs(G);
1888     G = XMVectorSqrt(G);
1889
1890     S = XMVectorAdd(G, CosIncidentAngle);
1891     D = XMVectorSubtract(G, CosIncidentAngle);
1892
1893     V0 = XMVectorMultiply(D, D);
1894     V1 = XMVectorMultiply(S, S);
1895     V1 = XMVectorReciprocal(V1);
1896     V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
1897     V0 = XMVectorMultiply(V0, V1);
1898
1899     V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
1900     V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
1901     V2 = XMVectorMultiply(V2, V2);
1902     V3 = XMVectorMultiply(V3, V3);
1903     V3 = XMVectorReciprocal(V3);
1904     V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
1905
1906     Result = XMVectorMultiply(V0, V2);
1907
1908     Result = XMVectorSaturate(Result);
1909
1910     return Result;
1911
1912 #elif defined(_XM_SSE_INTRINSICS_)
1913     // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
1914     // c = CosIncidentAngle
1915     // g = sqrt(c^2 + RefractionIndex^2 - 1)
1916
1917     XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
1918
1919     // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
1920     XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
1921     XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
1922     G = _mm_sub_ps(G,g_XMOne);
1923     vTemp = _mm_add_ps(vTemp,G);
1924     // max((0-vTemp),vTemp) == abs(vTemp)
1925     // The abs is needed to deal with refraction and cosine being zero
1926         G = _mm_setzero_ps();
1927         G = _mm_sub_ps(G,vTemp);
1928         G = _mm_max_ps(G,vTemp);
1929     // Last operation, the sqrt()
1930     G = _mm_sqrt_ps(G);
1931
1932     // Calc G-C and G+C
1933     XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
1934     XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
1935     // Perform the term (0.5f *(g - c)^2) / (g + c)^2
1936     XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
1937     vTemp = _mm_mul_ps(GAddC,GAddC);
1938     vResult = _mm_mul_ps(vResult,g_XMOneHalf);
1939     vResult = _mm_div_ps(vResult,vTemp);
1940     // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
1941     GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
1942     GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
1943     GAddC = _mm_sub_ps(GAddC,g_XMOne);
1944     GSubC = _mm_add_ps(GSubC,g_XMOne);
1945     GAddC = _mm_mul_ps(GAddC,GAddC);
1946     GSubC = _mm_mul_ps(GSubC,GSubC);
1947     GAddC = _mm_div_ps(GAddC,GSubC);
1948     GAddC = _mm_add_ps(GAddC,g_XMOne);
1949     // Multiply the two term parts
1950     vResult = _mm_mul_ps(vResult,GAddC);
1951     // Clamp to 0.0 - 1.0f
1952     vResult = _mm_max_ps(vResult,g_XMZero);
1953     vResult = _mm_min_ps(vResult,g_XMOne);
1954     return vResult;
1955 #else // _XM_VMX128_INTRINSICS_
1956 #endif // _XM_VMX128_INTRINSICS_
1957 }
1958
1959 //------------------------------------------------------------------------------
1960
1961 XMFINLINE BOOL XMScalarNearEqual
1962 (
1963     FLOAT S1,
1964     FLOAT S2,
1965     FLOAT Epsilon
1966 )
1967 {
1968     FLOAT Delta = S1 - S2;
1969 #if defined(_XM_NO_INTRINSICS_)
1970     UINT  AbsDelta = *(const UINT*)&Delta & 0x7FFFFFFF;
1971     return (*(FLOAT*)&AbsDelta <= Epsilon);
1972 #elif defined(_XM_SSE_INTRINSICS_)
1973     return (fabsf(Delta) <= Epsilon);
1974 #else
1975     return (__fabs(Delta) <= Epsilon);
1976 #endif
1977 }
1978
1979 //------------------------------------------------------------------------------
1980 // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
1981 XMFINLINE FLOAT XMScalarModAngle
1982 (
1983     FLOAT Angle
1984 )
1985 {
1986     // Note: The modulo is performed with unsigned math only to work
1987     // around a precision error on numbers that are close to PI
1988     float fTemp;
1989 #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
1990     // Normalize the range from 0.0f to XM_2PI
1991     Angle = Angle + XM_PI;
1992     // Perform the modulo, unsigned
1993     fTemp = fabsf(Angle);
1994     fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
1995     // Restore the number to the range of -XM_PI to XM_PI-epsilon
1996     fTemp = fTemp - XM_PI;
1997     // If the modulo'd value was negative, restore negation
1998     if (Angle<0.0f) {
1999         fTemp = -fTemp;
2000     }
2001     return fTemp;
2002 #else
2003 #endif
2004 }
2005
2006 //------------------------------------------------------------------------------
2007
2008 XMINLINE FLOAT XMScalarSin
2009 (
2010     FLOAT Value
2011 )
2012 {
2013 #if defined(_XM_NO_INTRINSICS_)
2014
2015     FLOAT                  ValueMod;
2016     FLOAT                  ValueSq;
2017     XMVECTOR               V0123, V0246, V1357, V9111315, V17192123;
2018     XMVECTOR               V1, V7, V8;
2019     XMVECTOR               R0, R1, R2;
2020
2021     ValueMod = XMScalarModAngle(Value);
2022
2023     // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2024     //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2025
2026     ValueSq = ValueMod * ValueMod;
2027
2028     V0123     = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2029     V1        = XMVectorSplatY(V0123);
2030     V0246     = XMVectorMultiply(V0123, V0123);
2031     V1357     = XMVectorMultiply(V0246, V1);
2032     V7        = XMVectorSplatW(V1357);
2033     V8        = XMVectorMultiply(V7, V1);
2034     V9111315  = XMVectorMultiply(V1357, V8);
2035     V17192123 = XMVectorMultiply(V9111315, V8);
2036
2037     R0        = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2038     R1        = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2039     R2        = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2040
2041     return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2042
2043 #elif defined(_XM_SSE_INTRINSICS_)
2044     return sinf( Value );
2045 #else // _XM_VMX128_INTRINSICS_
2046 #endif // _XM_VMX128_INTRINSICS_
2047 }
2048
2049 //------------------------------------------------------------------------------
2050
2051 XMINLINE FLOAT XMScalarCos
2052 (
2053     FLOAT Value
2054 )
2055 {
2056 #if defined(_XM_NO_INTRINSICS_)
2057
2058     FLOAT                  ValueMod;
2059     FLOAT                  ValueSq;
2060     XMVECTOR               V0123, V0246, V8101214, V16182022;
2061     XMVECTOR               V2, V6, V8;
2062     XMVECTOR               R0, R1, R2;
2063
2064     ValueMod = XMScalarModAngle(Value);
2065
2066     // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2067     //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2068
2069     ValueSq = ValueMod * ValueMod;
2070
2071     V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2072     V0246 = XMVectorMultiply(V0123, V0123);
2073
2074     V2 = XMVectorSplatZ(V0123);
2075     V6 = XMVectorSplatW(V0246);
2076     V8 = XMVectorMultiply(V6, V2);
2077
2078     V8101214 = XMVectorMultiply(V0246, V8);
2079     V16182022 = XMVectorMultiply(V8101214, V8);
2080
2081     R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2082     R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2083     R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2084
2085     return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2086
2087 #elif defined(_XM_SSE_INTRINSICS_)
2088     return cosf(Value);
2089 #else // _XM_VMX128_INTRINSICS_
2090 #endif // _XM_VMX128_INTRINSICS_
2091 }
2092
2093 //------------------------------------------------------------------------------
2094
2095 XMINLINE VOID XMScalarSinCos
2096 (
2097     FLOAT* pSin,
2098     FLOAT* pCos,
2099     FLOAT  Value
2100 )
2101 {
2102 #if defined(_XM_NO_INTRINSICS_)
2103
2104     FLOAT                  ValueMod;
2105     FLOAT                  ValueSq;
2106     XMVECTOR               V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
2107     XMVECTOR               V1, V2, V6, V8;
2108     XMVECTOR               S0, S1, S2, C0, C1, C2;
2109
2110     XMASSERT(pSin);
2111     XMASSERT(pCos);
2112
2113     ValueMod = XMScalarModAngle(Value);
2114
2115     // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2116     //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2117     // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2118     //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2119
2120     ValueSq = ValueMod * ValueMod;
2121
2122     V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2123
2124     V1 = XMVectorSplatY(V0123);
2125     V2 = XMVectorSplatZ(V0123);
2126
2127     V0246 = XMVectorMultiply(V0123, V0123);
2128     V1357 = XMVectorMultiply(V0246, V1);
2129
2130     V6 = XMVectorSplatW(V0246);
2131     V8 = XMVectorMultiply(V6, V2);
2132
2133     V8101214 = XMVectorMultiply(V0246, V8);
2134     V9111315 = XMVectorMultiply(V1357, V8);
2135     V16182022 = XMVectorMultiply(V8101214, V8);
2136     V17192123 = XMVectorMultiply(V9111315, V8);
2137
2138     C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2139     S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2140     C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2141     S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2142     C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2143     S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2144
2145     *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
2146     *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
2147
2148 #elif defined(_XM_SSE_INTRINSICS_)
2149     XMASSERT(pSin);
2150     XMASSERT(pCos);
2151
2152     *pSin = sinf(Value);
2153     *pCos = cosf(Value);
2154 #else // _XM_VMX128_INTRINSICS_
2155 #endif // _XM_VMX128_INTRINSICS_
2156 }
2157
2158 //------------------------------------------------------------------------------
2159
2160 XMINLINE FLOAT XMScalarASin
2161 (
2162     FLOAT Value
2163 )
2164 {
2165 #if defined(_XM_NO_INTRINSICS_)
2166
2167     FLOAT AbsValue, Value2, Value3, D;
2168     XMVECTOR AbsV, R0, R1, Result;
2169     XMVECTOR V3;
2170
2171     *(UINT*)&AbsValue = *(const UINT*)&Value & 0x7FFFFFFF;
2172
2173     Value2 = Value * AbsValue;
2174     Value3 = Value * Value2;
2175     D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
2176
2177     AbsV = XMVectorReplicate(AbsValue);
2178
2179     V3.vector4_f32[0] = Value3;
2180     V3.vector4_f32[1] = 1.0f;
2181     V3.vector4_f32[2] = Value3;
2182     V3.vector4_f32[3] = 1.0f;
2183
2184     R1 = XMVectorSet(D, D, Value, Value);
2185     R1 = XMVectorMultiply(R1, V3);
2186
2187     R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
2188     R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
2189
2190     Result = XMVector4Dot(R0, R1);
2191
2192     return Result.vector4_f32[0];
2193
2194 #elif defined(_XM_SSE_INTRINSICS_)
2195     return asinf(Value);
2196 #else // _XM_VMX128_INTRINSICS_
2197 #endif // _XM_VMX128_INTRINSICS_
2198 }
2199
2200 //------------------------------------------------------------------------------
2201
2202 XMINLINE FLOAT XMScalarACos
2203 (
2204     FLOAT Value
2205 )
2206 {
2207 #if defined(_XM_NO_INTRINSICS_)
2208
2209     return XM_PIDIV2 - XMScalarASin(Value);
2210
2211 #elif defined(_XM_SSE_INTRINSICS_)
2212     return acosf(Value);
2213 #else // _XM_VMX128_INTRINSICS_
2214 #endif // _XM_VMX128_INTRINSICS_
2215 }
2216
2217 //------------------------------------------------------------------------------
2218
2219 XMFINLINE FLOAT XMScalarSinEst
2220 (
2221     FLOAT Value
2222 )
2223 {
2224 #if defined(_XM_NO_INTRINSICS_)
2225
2226     FLOAT                  ValueSq;
2227     XMVECTOR               V;
2228     XMVECTOR               Y;
2229     XMVECTOR               Result;
2230
2231     XMASSERT(Value >= -XM_PI);
2232     XMASSERT(Value < XM_PI);
2233
2234     // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2235
2236     ValueSq = Value * Value;
2237
2238     V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2239     Y = XMVectorSplatY(V);
2240     V = XMVectorMultiply(V, V);
2241     V = XMVectorMultiply(V, Y);
2242
2243     Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
2244
2245     return Result.vector4_f32[0];
2246
2247 #elif defined(_XM_SSE_INTRINSICS_)
2248     XMASSERT(Value >= -XM_PI);
2249     XMASSERT(Value < XM_PI);
2250     float ValueSq = Value*Value;
2251     XMVECTOR vValue = _mm_set_ps1(Value);
2252     XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
2253     vTemp = _mm_mul_ps(vTemp,vTemp);
2254     vTemp = _mm_mul_ps(vTemp,vValue);
2255     // vTemp = Value,Value^3,Value^5,Value^7
2256     vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
2257     vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2258     vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;
2259     vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
2260     vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together
2261     vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
2262 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2263     return _mm_cvtss_f32(vTemp);
2264 #else
2265     return vTemp.m128_f32[0];
2266 #endif
2267 #else // _XM_VMX128_INTRINSICS_
2268 #endif // _XM_VMX128_INTRINSICS_
2269 }
2270
2271 //------------------------------------------------------------------------------
2272
2273 XMFINLINE FLOAT XMScalarCosEst
2274 (
2275     FLOAT Value
2276 )
2277 {
2278 #if defined(_XM_NO_INTRINSICS_)
2279     FLOAT    ValueSq;
2280     XMVECTOR V;
2281     XMVECTOR Result;
2282     XMASSERT(Value >= -XM_PI);
2283     XMASSERT(Value < XM_PI);
2284     // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2285     ValueSq = Value * Value;
2286     V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2287     V = XMVectorMultiply(V, V);
2288     Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
2289     return Result.vector4_f32[0];
2290 #elif defined(_XM_SSE_INTRINSICS_)
2291     XMASSERT(Value >= -XM_PI);
2292     XMASSERT(Value < XM_PI);
2293     float ValueSq = Value*Value;
2294     XMVECTOR vValue = _mm_setzero_ps();
2295     XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
2296     vTemp = _mm_mul_ps(vTemp,vTemp);
2297     // vTemp = 1.0f,Value^2,Value^4,Value^6
2298     vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
2299     vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2300     vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;
2301     vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
2302     vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together
2303     vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
2304 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2305     return _mm_cvtss_f32(vTemp);
2306 #else
2307     return vTemp.m128_f32[0];
2308 #endif
2309 #else // _XM_VMX128_INTRINSICS_
2310 #endif // _XM_VMX128_INTRINSICS_
2311 }
2312
2313 //------------------------------------------------------------------------------
2314
2315 XMFINLINE VOID XMScalarSinCosEst
2316 (
2317     FLOAT* pSin,
2318     FLOAT* pCos,
2319     FLOAT  Value
2320 )
2321 {
2322 #if defined(_XM_NO_INTRINSICS_)
2323
2324     FLOAT    ValueSq;
2325     XMVECTOR V, Sin, Cos;
2326     XMVECTOR Y;
2327
2328     XMASSERT(pSin);
2329     XMASSERT(pCos);
2330     XMASSERT(Value >= -XM_PI);
2331     XMASSERT(Value < XM_PI);
2332
2333     // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2334     // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2335
2336     ValueSq = Value * Value;
2337     V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
2338     Y = XMVectorSplatY(V);
2339     Cos = XMVectorMultiply(V, V);
2340     Sin = XMVectorMultiply(Cos, Y);
2341
2342     Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
2343     Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
2344
2345     *pCos = Cos.vector4_f32[0];
2346     *pSin = Sin.vector4_f32[0];
2347
2348 #elif defined(_XM_SSE_INTRINSICS_)
2349     XMASSERT(pSin);
2350     XMASSERT(pCos);
2351     XMASSERT(Value >= -XM_PI);
2352     XMASSERT(Value < XM_PI);
2353     float ValueSq = Value * Value;
2354     XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
2355     XMVECTOR Sin = _mm_set_ps1(Value);
2356     Cos = _mm_mul_ps(Cos,Cos);
2357     Sin = _mm_mul_ps(Sin,Cos);
2358     // Cos = 1.0f,Value^2,Value^4,Value^6
2359     Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
2360     _mm_store_ss(pCos,Cos);
2361     // Sin = Value,Value^3,Value^5,Value^7
2362     Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
2363     _mm_store_ss(pSin,Sin);
2364 #else // _XM_VMX128_INTRINSICS_
2365 #endif // _XM_VMX128_INTRINSICS_
2366 }
2367
2368 //------------------------------------------------------------------------------
2369
2370 XMFINLINE FLOAT XMScalarASinEst
2371 (
2372     FLOAT Value
2373 )
2374 {
2375 #if defined(_XM_NO_INTRINSICS_)
2376
2377     XMVECTOR VR, CR, CS;
2378     XMVECTOR Result;
2379     FLOAT AbsV, V2, D;
2380     CONST FLOAT OnePlusEps = 1.00000011921f;
2381
2382     *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
2383     V2 = Value * AbsV;
2384     D = OnePlusEps - AbsV;
2385
2386     CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2387     VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2388     CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2389
2390     Result = XMVector4Dot(VR, CR);
2391
2392     return Result.vector4_f32[0];
2393
2394 #elif defined(_XM_SSE_INTRINSICS_)
2395     CONST FLOAT OnePlusEps = 1.00000011921f;
2396     FLOAT AbsV = fabsf(Value);
2397     FLOAT V2 = Value * AbsV;    // Square with sign retained
2398     FLOAT D = OnePlusEps - AbsV;
2399
2400     XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2401     XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2402     Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
2403     Result = XMVector4Dot(VR,Result);
2404 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2405     return _mm_cvtss_f32(Result);
2406 #else
2407     return Result.m128_f32[0];
2408 #endif
2409 #else // _XM_VMX128_INTRINSICS_
2410 #endif // _XM_VMX128_INTRINSICS_
2411 }
2412
2413 //------------------------------------------------------------------------------
2414
2415 XMFINLINE FLOAT XMScalarACosEst
2416 (
2417     FLOAT Value
2418 )
2419 {
2420 #if defined(_XM_NO_INTRINSICS_)
2421
2422     XMVECTOR VR, CR, CS;
2423     XMVECTOR Result;
2424     FLOAT AbsV, V2, D;
2425     CONST FLOAT OnePlusEps = 1.00000011921f;
2426
2427     // return XM_PIDIV2 - XMScalarASin(Value);
2428
2429     *(UINT*)&AbsV = *(const UINT*)&Value & 0x7FFFFFFF;
2430     V2 = Value * AbsV;
2431     D = OnePlusEps - AbsV;
2432
2433     CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2434     VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2435     CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2436
2437     Result = XMVector4Dot(VR, CR);
2438
2439     return XM_PIDIV2 - Result.vector4_f32[0];
2440
2441 #elif defined(_XM_SSE_INTRINSICS_)
2442     CONST FLOAT OnePlusEps = 1.00000011921f;
2443     FLOAT AbsV = fabsf(Value);
2444     FLOAT V2 = Value * AbsV;    // Value^2 retaining sign
2445     FLOAT D = OnePlusEps - AbsV;
2446     XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2447     XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2448     Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
2449     Result = XMVector4Dot(VR,Result);
2450 #if defined(_MSC_VER) && (_MSC_VER>=1500)
2451     return XM_PIDIV2 - _mm_cvtss_f32(Result);
2452 #else
2453     return XM_PIDIV2 - Result.m128_f32[0];
2454 #endif
2455 #else // _XM_VMX128_INTRINSICS_
2456 #endif // _XM_VMX128_INTRINSICS_
2457 }
2458
2459 #endif // __XNAMATHMISC_INL__
2460