1 /************************************************************************
3 * xnamathvector.inl -- SIMD C++ Math library for Windows and Xbox 360 *
6 * Copyright (c) Microsoft Corp. All rights reserved. *
8 ************************************************************************/
10 #if defined(_MSC_VER) && (_MSC_VER > 1000)
14 #ifndef __XNAMATHVECTOR_INL__
15 #define __XNAMATHVECTOR_INL__
17 #if defined(_XM_NO_INTRINSICS_)
18 #define XMISNAN(x) ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0)
19 #define XMISINF(x) ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000)
22 /****************************************************************************
26 ****************************************************************************/
28 //------------------------------------------------------------------------------
29 // Assignment operations
30 //------------------------------------------------------------------------------
32 //------------------------------------------------------------------------------
33 // Return a vector with all elements equaling zero
34 XMFINLINE XMVECTOR XMVectorZero()
36 #if defined(_XM_NO_INTRINSICS_)
37 XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
39 #elif defined(_XM_SSE_INTRINSICS_)
40 return _mm_setzero_ps();
41 #else // _XM_VMX128_INTRINSICS_
42 #endif // _XM_VMX128_INTRINSICS_
45 //------------------------------------------------------------------------------
46 // Initialize a vector with four floating point values
47 XMFINLINE XMVECTOR XMVectorSet
55 #if defined(_XM_NO_INTRINSICS_)
56 XMVECTORF32 vResult = {x,y,z,w};
58 #elif defined(_XM_SSE_INTRINSICS_)
59 return _mm_set_ps( w, z, y, x );
60 #else // _XM_VMX128_INTRINSICS_
61 #endif // _XM_VMX128_INTRINSICS_
64 //------------------------------------------------------------------------------
65 // Initialize a vector with four integer values
66 XMFINLINE XMVECTOR XMVectorSetInt
74 #if defined(_XM_NO_INTRINSICS_)
75 XMVECTORU32 vResult = {x,y,z,w};
77 #elif defined(_XM_SSE_INTRINSICS_)
78 __m128i V = _mm_set_epi32( w, z, y, x );
79 return reinterpret_cast<__m128 *>(&V)[0];
80 #else // _XM_VMX128_INTRINSICS_
81 #endif // _XM_VMX128_INTRINSICS_
84 //------------------------------------------------------------------------------
85 // Initialize a vector with a replicated floating point value
86 XMFINLINE XMVECTOR XMVectorReplicate
91 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
92 XMVECTORF32 vResult = {Value,Value,Value,Value};
94 #elif defined(_XM_SSE_INTRINSICS_)
95 return _mm_set_ps1( Value );
96 #else // _XM_VMX128_INTRINSICS_
97 #endif // _XM_VMX128_INTRINSICS_
100 //------------------------------------------------------------------------------
101 // Initialize a vector with a replicated floating point value passed by pointer
102 XMFINLINE XMVECTOR XMVectorReplicatePtr
107 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
108 FLOAT Value = pValue[0];
109 XMVECTORF32 vResult = {Value,Value,Value,Value};
111 #elif defined(_XM_SSE_INTRINSICS_)
112 return _mm_load_ps1( pValue );
113 #else // _XM_VMX128_INTRINSICS_
114 #endif // _XM_VMX128_INTRINSICS_
117 //------------------------------------------------------------------------------
118 // Initialize a vector with a replicated integer value
119 XMFINLINE XMVECTOR XMVectorReplicateInt
124 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
125 XMVECTORU32 vResult = {Value,Value,Value,Value};
127 #elif defined(_XM_SSE_INTRINSICS_)
128 __m128i vTemp = _mm_set1_epi32( Value );
129 return reinterpret_cast<const __m128 *>(&vTemp)[0];
130 #else // _XM_VMX128_INTRINSICS_
131 #endif // _XM_VMX128_INTRINSICS_
134 //------------------------------------------------------------------------------
135 // Initialize a vector with a replicated integer value passed by pointer
136 XMFINLINE XMVECTOR XMVectorReplicateIntPtr
141 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
142 UINT Value = pValue[0];
143 XMVECTORU32 vResult = {Value,Value,Value,Value};
145 #elif defined(_XM_SSE_INTRINSICS_)
146 return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
147 #else // _XM_VMX128_INTRINSICS_
148 #endif // _XM_VMX128_INTRINSICS_
151 //------------------------------------------------------------------------------
152 // Initialize a vector with all bits set (true mask)
153 XMFINLINE XMVECTOR XMVectorTrueInt()
155 #if defined(_XM_NO_INTRINSICS_)
156 XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
158 #elif defined(_XM_SSE_INTRINSICS_)
159 __m128i V = _mm_set1_epi32(-1);
160 return reinterpret_cast<__m128 *>(&V)[0];
161 #else // _XM_VMX128_INTRINSICS_
162 #endif // _XM_VMX128_INTRINSICS_
165 //------------------------------------------------------------------------------
166 // Initialize a vector with all bits clear (false mask)
167 XMFINLINE XMVECTOR XMVectorFalseInt()
169 #if defined(_XM_NO_INTRINSICS_)
170 XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
172 #elif defined(_XM_SSE_INTRINSICS_)
173 return _mm_setzero_ps();
174 #else // _XM_VMX128_INTRINSICS_
175 #endif // _XM_VMX128_INTRINSICS_
178 //------------------------------------------------------------------------------
179 // Replicate the x component of the vector
180 XMFINLINE XMVECTOR XMVectorSplatX
185 #if defined(_XM_NO_INTRINSICS_)
187 vResult.vector4_f32[0] =
188 vResult.vector4_f32[1] =
189 vResult.vector4_f32[2] =
190 vResult.vector4_f32[3] = V.vector4_f32[0];
192 #elif defined(_XM_SSE_INTRINSICS_)
193 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
194 #else // _XM_VMX128_INTRINSICS_
195 #endif // _XM_VMX128_INTRINSICS_
198 //------------------------------------------------------------------------------
199 // Replicate the y component of the vector
200 XMFINLINE XMVECTOR XMVectorSplatY
205 #if defined(_XM_NO_INTRINSICS_)
207 vResult.vector4_f32[0] =
208 vResult.vector4_f32[1] =
209 vResult.vector4_f32[2] =
210 vResult.vector4_f32[3] = V.vector4_f32[1];
212 #elif defined(_XM_SSE_INTRINSICS_)
213 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
214 #else // _XM_VMX128_INTRINSICS_
215 #endif // _XM_VMX128_INTRINSICS_
218 //------------------------------------------------------------------------------
219 // Replicate the z component of the vector
220 XMFINLINE XMVECTOR XMVectorSplatZ
225 #if defined(_XM_NO_INTRINSICS_)
227 vResult.vector4_f32[0] =
228 vResult.vector4_f32[1] =
229 vResult.vector4_f32[2] =
230 vResult.vector4_f32[3] = V.vector4_f32[2];
232 #elif defined(_XM_SSE_INTRINSICS_)
233 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
234 #else // _XM_VMX128_INTRINSICS_
235 #endif // _XM_VMX128_INTRINSICS_
238 //------------------------------------------------------------------------------
239 // Replicate the w component of the vector
240 XMFINLINE XMVECTOR XMVectorSplatW
245 #if defined(_XM_NO_INTRINSICS_)
247 vResult.vector4_f32[0] =
248 vResult.vector4_f32[1] =
249 vResult.vector4_f32[2] =
250 vResult.vector4_f32[3] = V.vector4_f32[3];
252 #elif defined(_XM_SSE_INTRINSICS_)
253 return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
254 #else // _XM_VMX128_INTRINSICS_
255 #endif // _XM_VMX128_INTRINSICS_
258 //------------------------------------------------------------------------------
259 // Return a vector of 1.0f,1.0f,1.0f,1.0f
260 XMFINLINE XMVECTOR XMVectorSplatOne()
262 #if defined(_XM_NO_INTRINSICS_)
264 vResult.vector4_f32[0] =
265 vResult.vector4_f32[1] =
266 vResult.vector4_f32[2] =
267 vResult.vector4_f32[3] = 1.0f;
269 #elif defined(_XM_SSE_INTRINSICS_)
271 #else // _XM_VMX128_INTRINSICS_
272 #endif // _XM_VMX128_INTRINSICS_
275 //------------------------------------------------------------------------------
276 // Return a vector of INF,INF,INF,INF
277 XMFINLINE XMVECTOR XMVectorSplatInfinity()
279 #if defined(_XM_NO_INTRINSICS_)
281 vResult.vector4_u32[0] =
282 vResult.vector4_u32[1] =
283 vResult.vector4_u32[2] =
284 vResult.vector4_u32[3] = 0x7F800000;
286 #elif defined(_XM_SSE_INTRINSICS_)
288 #else // _XM_VMX128_INTRINSICS_
289 #endif // _XM_VMX128_INTRINSICS_
292 //------------------------------------------------------------------------------
293 // Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
294 XMFINLINE XMVECTOR XMVectorSplatQNaN()
296 #if defined(_XM_NO_INTRINSICS_)
298 vResult.vector4_u32[0] =
299 vResult.vector4_u32[1] =
300 vResult.vector4_u32[2] =
301 vResult.vector4_u32[3] = 0x7FC00000;
303 #elif defined(_XM_SSE_INTRINSICS_)
305 #else // _XM_VMX128_INTRINSICS_
306 #endif // _XM_VMX128_INTRINSICS_
309 //------------------------------------------------------------------------------
310 // Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
311 XMFINLINE XMVECTOR XMVectorSplatEpsilon()
313 #if defined(_XM_NO_INTRINSICS_)
315 vResult.vector4_u32[0] =
316 vResult.vector4_u32[1] =
317 vResult.vector4_u32[2] =
318 vResult.vector4_u32[3] = 0x34000000;
320 #elif defined(_XM_SSE_INTRINSICS_)
322 #else // _XM_VMX128_INTRINSICS_
323 #endif // _XM_VMX128_INTRINSICS_
326 //------------------------------------------------------------------------------
327 // Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
328 XMFINLINE XMVECTOR XMVectorSplatSignMask()
330 #if defined(_XM_NO_INTRINSICS_)
332 vResult.vector4_u32[0] =
333 vResult.vector4_u32[1] =
334 vResult.vector4_u32[2] =
335 vResult.vector4_u32[3] = 0x80000000U;
337 #elif defined(_XM_SSE_INTRINSICS_)
338 __m128i V = _mm_set1_epi32( 0x80000000 );
339 return reinterpret_cast<__m128*>(&V)[0];
340 #else // _XM_VMX128_INTRINSICS_
341 #endif // _XM_VMX128_INTRINSICS_
344 //------------------------------------------------------------------------------
345 // Return a floating point value via an index. This is not a recommended
346 // function to use due to performance loss.
347 XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
350 #if defined(_XM_NO_INTRINSICS_)
351 return V.vector4_f32[i];
352 #elif defined(_XM_SSE_INTRINSICS_)
353 return V.m128_f32[i];
354 #else // _XM_VMX128_INTRINSICS_
355 #endif // _XM_VMX128_INTRINSICS_
358 //------------------------------------------------------------------------------
359 // Return the X component in an FPU register.
360 // This causes Load/Hit/Store on VMX targets
361 XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
363 #if defined(_XM_NO_INTRINSICS_)
364 return V.vector4_f32[0];
365 #elif defined(_XM_SSE_INTRINSICS_)
366 #if defined(_MSC_VER) && (_MSC_VER>=1500)
367 return _mm_cvtss_f32(V);
369 return V.m128_f32[0];
371 #else // _XM_VMX128_INTRINSICS_
372 #endif // _XM_VMX128_INTRINSICS_
375 // Return the Y component in an FPU register.
376 // This causes Load/Hit/Store on VMX targets
377 XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
379 #if defined(_XM_NO_INTRINSICS_)
380 return V.vector4_f32[1];
381 #elif defined(_XM_SSE_INTRINSICS_)
382 #if defined(_MSC_VER) && (_MSC_VER>=1500)
383 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
384 return _mm_cvtss_f32(vTemp);
386 return V.m128_f32[1];
388 #else // _XM_VMX128_INTRINSICS_
389 #endif // _XM_VMX128_INTRINSICS_
392 // Return the Z component in an FPU register.
393 // This causes Load/Hit/Store on VMX targets
394 XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
396 #if defined(_XM_NO_INTRINSICS_)
397 return V.vector4_f32[2];
398 #elif defined(_XM_SSE_INTRINSICS_)
399 #if defined(_MSC_VER) && (_MSC_VER>=1500)
400 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
401 return _mm_cvtss_f32(vTemp);
403 return V.m128_f32[2];
405 #else // _XM_VMX128_INTRINSICS_
406 #endif // _XM_VMX128_INTRINSICS_
409 // Return the W component in an FPU register.
410 // This causes Load/Hit/Store on VMX targets
411 XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
413 #if defined(_XM_NO_INTRINSICS_)
414 return V.vector4_f32[3];
415 #elif defined(_XM_SSE_INTRINSICS_)
416 #if defined(_MSC_VER) && (_MSC_VER>=1500)
417 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
418 return _mm_cvtss_f32(vTemp);
420 return V.m128_f32[3];
422 #else // _XM_VMX128_INTRINSICS_
423 #endif // _XM_VMX128_INTRINSICS_
426 //------------------------------------------------------------------------------
428 // Store a component indexed by i into a 32 bit float location in memory.
429 // This causes Load/Hit/Store on VMX targets
430 XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
434 #if defined(_XM_NO_INTRINSICS_)
435 *f = V.vector4_f32[i];
436 #elif defined(_XM_SSE_INTRINSICS_)
438 #else // _XM_VMX128_INTRINSICS_
439 #endif // _XM_VMX128_INTRINSICS_
442 //------------------------------------------------------------------------------
444 // Store the X component into a 32 bit float location in memory.
445 XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
448 #if defined(_XM_NO_INTRINSICS_)
449 *x = V.vector4_f32[0];
450 #elif defined(_XM_SSE_INTRINSICS_)
452 #else // _XM_VMX128_INTRINSICS_
453 #endif // _XM_VMX128_INTRINSICS_
456 // Store the Y component into a 32 bit float location in memory.
457 XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
460 #if defined(_XM_NO_INTRINSICS_)
461 *y = V.vector4_f32[1];
462 #elif defined(_XM_SSE_INTRINSICS_)
463 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
464 _mm_store_ss(y,vResult);
465 #else // _XM_VMX128_INTRINSICS_
466 #endif // _XM_VMX128_INTRINSICS_
469 // Store the Z component into a 32 bit float location in memory.
470 XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
473 #if defined(_XM_NO_INTRINSICS_)
474 *z = V.vector4_f32[2];
475 #elif defined(_XM_SSE_INTRINSICS_)
476 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
477 _mm_store_ss(z,vResult);
478 #else // _XM_VMX128_INTRINSICS_
479 #endif // _XM_VMX128_INTRINSICS_
482 // Store the W component into a 32 bit float location in memory.
483 XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
486 #if defined(_XM_NO_INTRINSICS_)
487 *w = V.vector4_f32[3];
488 #elif defined(_XM_SSE_INTRINSICS_)
489 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
490 _mm_store_ss(w,vResult);
491 #else // _XM_VMX128_INTRINSICS_
492 #endif // _XM_VMX128_INTRINSICS_
495 //------------------------------------------------------------------------------
497 // Return an integer value via an index. This is not a recommended
498 // function to use due to performance loss.
499 XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
502 #if defined(_XM_NO_INTRINSICS_)
503 return V.vector4_u32[i];
504 #elif defined(_XM_SSE_INTRINSICS_)
505 #if defined(_MSC_VER) && (_MSC_VER<1400)
510 return V.m128_u32[i];
512 #else // _XM_VMX128_INTRINSICS_
513 #endif // _XM_VMX128_INTRINSICS_
516 //------------------------------------------------------------------------------
518 // Return the X component in an integer register.
519 // This causes Load/Hit/Store on VMX targets
520 XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
522 #if defined(_XM_NO_INTRINSICS_)
523 return V.vector4_u32[0];
524 #elif defined(_XM_SSE_INTRINSICS_)
525 return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
526 #else // _XM_VMX128_INTRINSICS_
527 #endif // _XM_VMX128_INTRINSICS_
530 // Return the Y component in an integer register.
531 // This causes Load/Hit/Store on VMX targets
532 XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
534 #if defined(_XM_NO_INTRINSICS_)
535 return V.vector4_u32[1];
536 #elif defined(_XM_SSE_INTRINSICS_)
537 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
538 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
539 #else // _XM_VMX128_INTRINSICS_
540 #endif // _XM_VMX128_INTRINSICS_
543 // Return the Z component in an integer register.
544 // This causes Load/Hit/Store on VMX targets
545 XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
547 #if defined(_XM_NO_INTRINSICS_)
548 return V.vector4_u32[2];
549 #elif defined(_XM_SSE_INTRINSICS_)
550 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
551 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
552 #else // _XM_VMX128_INTRINSICS_
553 #endif // _XM_VMX128_INTRINSICS_
556 // Return the W component in an integer register.
557 // This causes Load/Hit/Store on VMX targets
558 XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
560 #if defined(_XM_NO_INTRINSICS_)
561 return V.vector4_u32[3];
562 #elif defined(_XM_SSE_INTRINSICS_)
563 __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
564 return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
565 #else // _XM_VMX128_INTRINSICS_
566 #endif // _XM_VMX128_INTRINSICS_
569 //------------------------------------------------------------------------------
571 // Store a component indexed by i into a 32 bit integer location in memory.
572 // This causes Load/Hit/Store on VMX targets
573 XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
577 #if defined(_XM_NO_INTRINSICS_)
578 *x = V.vector4_u32[i];
579 #elif defined(_XM_SSE_INTRINSICS_)
580 #if defined(_MSC_VER) && (_MSC_VER<1400)
587 #else // _XM_VMX128_INTRINSICS_
588 #endif // _XM_VMX128_INTRINSICS_
591 //------------------------------------------------------------------------------
593 // Store the X component into a 32 bit integer location in memory.
594 XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
597 #if defined(_XM_NO_INTRINSICS_)
598 *x = V.vector4_u32[0];
599 #elif defined(_XM_SSE_INTRINSICS_)
600 _mm_store_ss(reinterpret_cast<float *>(x),V);
601 #else // _XM_VMX128_INTRINSICS_
602 #endif // _XM_VMX128_INTRINSICS_
605 // Store the Y component into a 32 bit integer location in memory.
606 XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
609 #if defined(_XM_NO_INTRINSICS_)
610 *y = V.vector4_u32[1];
611 #elif defined(_XM_SSE_INTRINSICS_)
612 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
613 _mm_store_ss(reinterpret_cast<float *>(y),vResult);
614 #else // _XM_VMX128_INTRINSICS_
615 #endif // _XM_VMX128_INTRINSICS_
618 // Store the Z component into a 32 bit integer locaCantion in memory.
619 XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
622 #if defined(_XM_NO_INTRINSICS_)
623 *z = V.vector4_u32[2];
624 #elif defined(_XM_SSE_INTRINSICS_)
625 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
626 _mm_store_ss(reinterpret_cast<float *>(z),vResult);
627 #else // _XM_VMX128_INTRINSICS_
628 #endif // _XM_VMX128_INTRINSICS_
631 // Store the W component into a 32 bit integer location in memory.
632 XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
635 #if defined(_XM_NO_INTRINSICS_)
636 *w = V.vector4_u32[3];
637 #elif defined(_XM_SSE_INTRINSICS_)
638 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
639 _mm_store_ss(reinterpret_cast<float *>(w),vResult);
640 #else // _XM_VMX128_INTRINSICS_
641 #endif // _XM_VMX128_INTRINSICS_
644 //------------------------------------------------------------------------------
646 // Set a single indexed floating point component
647 // This causes Load/Hit/Store on VMX targets
648 XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
650 #if defined(_XM_NO_INTRINSICS_)
654 U.vector4_f32[i] = f;
656 #elif defined(_XM_SSE_INTRINSICS_)
661 #else // _XM_VMX128_INTRINSICS_
662 #endif // _XM_VMX128_INTRINSICS_
665 //------------------------------------------------------------------------------
667 // Sets the X component of a vector to a passed floating point value
668 // This causes Load/Hit/Store on VMX targets
669 XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
671 #if defined(_XM_NO_INTRINSICS_)
673 U.vector4_f32[0] = x;
674 U.vector4_f32[1] = V.vector4_f32[1];
675 U.vector4_f32[2] = V.vector4_f32[2];
676 U.vector4_f32[3] = V.vector4_f32[3];
678 #elif defined(_XM_SSE_INTRINSICS_)
679 #if defined(_XM_ISVS2005_)
680 XMVECTOR vResult = V;
681 vResult.m128_f32[0] = x;
684 XMVECTOR vResult = _mm_set_ss(x);
685 vResult = _mm_move_ss(V,vResult);
687 #endif // _XM_ISVS2005_
688 #else // _XM_VMX128_INTRINSICS_
689 #endif // _XM_VMX128_INTRINSICS_
692 // Sets the Y component of a vector to a passed floating point value
693 // This causes Load/Hit/Store on VMX targets
694 XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
696 #if defined(_XM_NO_INTRINSICS_)
698 U.vector4_f32[0] = V.vector4_f32[0];
699 U.vector4_f32[1] = y;
700 U.vector4_f32[2] = V.vector4_f32[2];
701 U.vector4_f32[3] = V.vector4_f32[3];
703 #elif defined(_XM_SSE_INTRINSICS_)
704 #if defined(_XM_ISVS2005_)
705 XMVECTOR vResult = V;
706 vResult.m128_f32[1] = y;
710 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
711 // Convert input to vector
712 XMVECTOR vTemp = _mm_set_ss(y);
713 // Replace the x component
714 vResult = _mm_move_ss(vResult,vTemp);
715 // Swap y and x again
716 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
718 #endif // _XM_ISVS2005_
719 #else // _XM_VMX128_INTRINSICS_
720 #endif // _XM_VMX128_INTRINSICS_
722 // Sets the Z component of a vector to a passed floating point value
723 // This causes Load/Hit/Store on VMX targets
724 XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
726 #if defined(_XM_NO_INTRINSICS_)
728 U.vector4_f32[0] = V.vector4_f32[0];
729 U.vector4_f32[1] = V.vector4_f32[1];
730 U.vector4_f32[2] = z;
731 U.vector4_f32[3] = V.vector4_f32[3];
733 #elif defined(_XM_SSE_INTRINSICS_)
734 #if defined(_XM_ISVS2005_)
735 XMVECTOR vResult = V;
736 vResult.m128_f32[2] = z;
740 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
741 // Convert input to vector
742 XMVECTOR vTemp = _mm_set_ss(z);
743 // Replace the x component
744 vResult = _mm_move_ss(vResult,vTemp);
745 // Swap z and x again
746 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
748 #endif // _XM_ISVS2005_
749 #else // _XM_VMX128_INTRINSICS_
750 #endif // _XM_VMX128_INTRINSICS_
753 // Sets the W component of a vector to a passed floating point value
754 // This causes Load/Hit/Store on VMX targets
755 XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
757 #if defined(_XM_NO_INTRINSICS_)
759 U.vector4_f32[0] = V.vector4_f32[0];
760 U.vector4_f32[1] = V.vector4_f32[1];
761 U.vector4_f32[2] = V.vector4_f32[2];
762 U.vector4_f32[3] = w;
764 #elif defined(_XM_SSE_INTRINSICS_)
765 #if defined(_XM_ISVS2005_)
766 XMVECTOR vResult = V;
767 vResult.m128_f32[3] = w;
771 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
772 // Convert input to vector
773 XMVECTOR vTemp = _mm_set_ss(w);
774 // Replace the x component
775 vResult = _mm_move_ss(vResult,vTemp);
776 // Swap w and x again
777 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
779 #endif // _XM_ISVS2005_
780 #else // _XM_VMX128_INTRINSICS_
781 #endif // _XM_VMX128_INTRINSICS_
784 //------------------------------------------------------------------------------
786 // Sets a component of a vector to a floating point value passed by pointer
787 // This causes Load/Hit/Store on VMX targets
788 XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
790 #if defined(_XM_NO_INTRINSICS_)
795 U.vector4_f32[i] = *f;
797 #elif defined(_XM_SSE_INTRINSICS_)
803 #else // _XM_VMX128_INTRINSICS_
804 #endif // _XM_VMX128_INTRINSICS_
807 //------------------------------------------------------------------------------
809 // Sets the X component of a vector to a floating point value passed by pointer
810 XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
812 #if defined(_XM_NO_INTRINSICS_)
815 U.vector4_f32[0] = *x;
816 U.vector4_f32[1] = V.vector4_f32[1];
817 U.vector4_f32[2] = V.vector4_f32[2];
818 U.vector4_f32[3] = V.vector4_f32[3];
820 #elif defined(_XM_SSE_INTRINSICS_)
822 XMVECTOR vResult = _mm_load_ss(x);
823 vResult = _mm_move_ss(V,vResult);
825 #else // _XM_VMX128_INTRINSICS_
826 #endif // _XM_VMX128_INTRINSICS_
829 // Sets the Y component of a vector to a floating point value passed by pointer
830 XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
832 #if defined(_XM_NO_INTRINSICS_)
835 U.vector4_f32[0] = V.vector4_f32[0];
836 U.vector4_f32[1] = *y;
837 U.vector4_f32[2] = V.vector4_f32[2];
838 U.vector4_f32[3] = V.vector4_f32[3];
840 #elif defined(_XM_SSE_INTRINSICS_)
843 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
844 // Convert input to vector
845 XMVECTOR vTemp = _mm_load_ss(y);
846 // Replace the x component
847 vResult = _mm_move_ss(vResult,vTemp);
848 // Swap y and x again
849 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
851 #else // _XM_VMX128_INTRINSICS_
852 #endif // _XM_VMX128_INTRINSICS_
855 // Sets the Z component of a vector to a floating point value passed by pointer
856 XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
858 #if defined(_XM_NO_INTRINSICS_)
861 U.vector4_f32[0] = V.vector4_f32[0];
862 U.vector4_f32[1] = V.vector4_f32[1];
863 U.vector4_f32[2] = *z;
864 U.vector4_f32[3] = V.vector4_f32[3];
866 #elif defined(_XM_SSE_INTRINSICS_)
869 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
870 // Convert input to vector
871 XMVECTOR vTemp = _mm_load_ss(z);
872 // Replace the x component
873 vResult = _mm_move_ss(vResult,vTemp);
874 // Swap z and x again
875 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
877 #else // _XM_VMX128_INTRINSICS_
878 #endif // _XM_VMX128_INTRINSICS_
881 // Sets the W component of a vector to a floating point value passed by pointer
882 XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
884 #if defined(_XM_NO_INTRINSICS_)
887 U.vector4_f32[0] = V.vector4_f32[0];
888 U.vector4_f32[1] = V.vector4_f32[1];
889 U.vector4_f32[2] = V.vector4_f32[2];
890 U.vector4_f32[3] = *w;
892 #elif defined(_XM_SSE_INTRINSICS_)
895 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
896 // Convert input to vector
897 XMVECTOR vTemp = _mm_load_ss(w);
898 // Replace the x component
899 vResult = _mm_move_ss(vResult,vTemp);
900 // Swap w and x again
901 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
903 #else // _XM_VMX128_INTRINSICS_
904 #endif // _XM_VMX128_INTRINSICS_
907 //------------------------------------------------------------------------------
909 // Sets a component of a vector to an integer passed by value
910 // This causes Load/Hit/Store on VMX targets
911 XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
913 #if defined(_XM_NO_INTRINSICS_)
917 U.vector4_u32[i] = x;
919 #elif defined(_XM_SSE_INTRINSICS_)
925 #else // _XM_VMX128_INTRINSICS_
926 #endif // _XM_VMX128_INTRINSICS_
929 //------------------------------------------------------------------------------
931 // Sets the X component of a vector to an integer passed by value
932 // This causes Load/Hit/Store on VMX targets
933 XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
935 #if defined(_XM_NO_INTRINSICS_)
937 U.vector4_u32[0] = x;
938 U.vector4_u32[1] = V.vector4_u32[1];
939 U.vector4_u32[2] = V.vector4_u32[2];
940 U.vector4_u32[3] = V.vector4_u32[3];
942 #elif defined(_XM_SSE_INTRINSICS_)
943 #if defined(_XM_ISVS2005_)
944 XMVECTOR vResult = V;
945 vResult.m128_i32[0] = x;
948 __m128i vTemp = _mm_cvtsi32_si128(x);
949 XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
951 #endif // _XM_ISVS2005_
952 #else // _XM_VMX128_INTRINSICS_
953 #endif // _XM_VMX128_INTRINSICS_
956 // Sets the Y component of a vector to an integer passed by value
957 // This causes Load/Hit/Store on VMX targets
958 XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
960 #if defined(_XM_NO_INTRINSICS_)
962 U.vector4_u32[0] = V.vector4_u32[0];
963 U.vector4_u32[1] = y;
964 U.vector4_u32[2] = V.vector4_u32[2];
965 U.vector4_u32[3] = V.vector4_u32[3];
967 #elif defined(_XM_SSE_INTRINSICS_)
968 #if defined(_XM_ISVS2005_)
969 XMVECTOR vResult = V;
970 vResult.m128_i32[1] = y;
972 #else // Swap y and x
973 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
974 // Convert input to vector
975 __m128i vTemp = _mm_cvtsi32_si128(y);
976 // Replace the x component
977 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
978 // Swap y and x again
979 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
981 #endif // _XM_ISVS2005_
982 #else // _XM_VMX128_INTRINSICS_
983 #endif // _XM_VMX128_INTRINSICS_
986 // Sets the Z component of a vector to an integer passed by value
987 // This causes Load/Hit/Store on VMX targets
988 XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
990 #if defined(_XM_NO_INTRINSICS_)
992 U.vector4_u32[0] = V.vector4_u32[0];
993 U.vector4_u32[1] = V.vector4_u32[1];
994 U.vector4_u32[2] = z;
995 U.vector4_u32[3] = V.vector4_u32[3];
997 #elif defined(_XM_SSE_INTRINSICS_)
998 #if defined(_XM_ISVS2005_)
999 XMVECTOR vResult = V;
1000 vResult.m128_i32[2] = z;
1004 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1005 // Convert input to vector
1006 __m128i vTemp = _mm_cvtsi32_si128(z);
1007 // Replace the x component
1008 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1009 // Swap z and x again
1010 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1012 #endif // _XM_ISVS2005_
1013 #else // _XM_VMX128_INTRINSICS_
1014 #endif // _XM_VMX128_INTRINSICS_
1017 // Sets the W component of a vector to an integer passed by value
1018 // This causes Load/Hit/Store on VMX targets
1019 XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
1021 #if defined(_XM_NO_INTRINSICS_)
1023 U.vector4_u32[0] = V.vector4_u32[0];
1024 U.vector4_u32[1] = V.vector4_u32[1];
1025 U.vector4_u32[2] = V.vector4_u32[2];
1026 U.vector4_u32[3] = w;
1028 #elif defined(_XM_SSE_INTRINSICS_)
1029 #if defined(_XM_ISVS2005_)
1030 XMVECTOR vResult = V;
1031 vResult.m128_i32[3] = w;
1035 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1036 // Convert input to vector
1037 __m128i vTemp = _mm_cvtsi32_si128(w);
1038 // Replace the x component
1039 vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1040 // Swap w and x again
1041 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1043 #endif // _XM_ISVS2005_
1044 #else // _XM_VMX128_INTRINSICS_
1045 #endif // _XM_VMX128_INTRINSICS_
1048 //------------------------------------------------------------------------------
1050 // Sets a component of a vector to an integer value passed by pointer
1051 // This causes Load/Hit/Store on VMX targets
1052 XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
1054 #if defined(_XM_NO_INTRINSICS_)
1059 U.vector4_u32[i] = *x;
1061 #elif defined(_XM_SSE_INTRINSICS_)
1068 #else // _XM_VMX128_INTRINSICS_
1069 #endif // _XM_VMX128_INTRINSICS_
1072 //------------------------------------------------------------------------------
1074 // Sets the X component of a vector to an integer value passed by pointer
1075 XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
1077 #if defined(_XM_NO_INTRINSICS_)
1080 U.vector4_u32[0] = *x;
1081 U.vector4_u32[1] = V.vector4_u32[1];
1082 U.vector4_u32[2] = V.vector4_u32[2];
1083 U.vector4_u32[3] = V.vector4_u32[3];
1085 #elif defined(_XM_SSE_INTRINSICS_)
1087 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
1088 XMVECTOR vResult = _mm_move_ss(V,vTemp);
1090 #else // _XM_VMX128_INTRINSICS_
1091 #endif // _XM_VMX128_INTRINSICS_
1094 // Sets the Y component of a vector to an integer value passed by pointer
1095 XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
1097 #if defined(_XM_NO_INTRINSICS_)
1100 U.vector4_u32[0] = V.vector4_u32[0];
1101 U.vector4_u32[1] = *y;
1102 U.vector4_u32[2] = V.vector4_u32[2];
1103 U.vector4_u32[3] = V.vector4_u32[3];
1105 #elif defined(_XM_SSE_INTRINSICS_)
1108 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
1109 // Convert input to vector
1110 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
1111 // Replace the x component
1112 vResult = _mm_move_ss(vResult,vTemp);
1113 // Swap y and x again
1114 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
1116 #else // _XM_VMX128_INTRINSICS_
1117 #endif // _XM_VMX128_INTRINSICS_
1120 // Sets the Z component of a vector to an integer value passed by pointer
1121 XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
1123 #if defined(_XM_NO_INTRINSICS_)
1126 U.vector4_u32[0] = V.vector4_u32[0];
1127 U.vector4_u32[1] = V.vector4_u32[1];
1128 U.vector4_u32[2] = *z;
1129 U.vector4_u32[3] = V.vector4_u32[3];
1131 #elif defined(_XM_SSE_INTRINSICS_)
1134 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1135 // Convert input to vector
1136 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
1137 // Replace the x component
1138 vResult = _mm_move_ss(vResult,vTemp);
1139 // Swap z and x again
1140 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1142 #else // _XM_VMX128_INTRINSICS_
1143 #endif // _XM_VMX128_INTRINSICS_
1146 // Sets the W component of a vector to an integer value passed by pointer
1147 XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
1149 #if defined(_XM_NO_INTRINSICS_)
1152 U.vector4_u32[0] = V.vector4_u32[0];
1153 U.vector4_u32[1] = V.vector4_u32[1];
1154 U.vector4_u32[2] = V.vector4_u32[2];
1155 U.vector4_u32[3] = *w;
1157 #elif defined(_XM_SSE_INTRINSICS_)
1160 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1161 // Convert input to vector
1162 XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
1163 // Replace the x component
1164 vResult = _mm_move_ss(vResult,vTemp);
1165 // Swap w and x again
1166 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1168 #else // _XM_VMX128_INTRINSICS_
1169 #endif // _XM_VMX128_INTRINSICS_
1172 //------------------------------------------------------------------------------
1173 // Define a control vector to be used in XMVectorPermute
1174 // operations. Visualize the two vectors V1 and V2 given
1175 // in a permute as arranged back to back in a linear fashion,
1176 // such that they form an array of 8 floating point values.
1177 // The four integers specified in XMVectorPermuteControl
1178 // will serve as indices into the array to select components
1179 // from the two vectors. ElementIndex0 is used to select
1180 // an element from the vectors to be placed in the first
1181 // component of the resulting vector, ElementIndex1 is used
1182 // to select an element for the second component, etc.
1184 XMFINLINE XMVECTOR XMVectorPermuteControl
1192 #if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_)
1193 XMVECTORU32 vControl;
1194 static CONST UINT ControlElement[] = {
1204 XMASSERT(ElementIndex0 < 8);
1205 XMASSERT(ElementIndex1 < 8);
1206 XMASSERT(ElementIndex2 < 8);
1207 XMASSERT(ElementIndex3 < 8);
1209 vControl.u[0] = ControlElement[ElementIndex0];
1210 vControl.u[1] = ControlElement[ElementIndex1];
1211 vControl.u[2] = ControlElement[ElementIndex2];
1212 vControl.u[3] = ControlElement[ElementIndex3];
1218 //------------------------------------------------------------------------------
1220 // Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
1221 // entries into a single 16 byte vector and return it. Index 0-15 = V1,
1223 XMFINLINE XMVECTOR XMVectorPermute
1230 #if defined(_XM_NO_INTRINSICS_)
1231 const BYTE *aByte[2];
1233 UINT i, uIndex, VectorIndex;
1234 const BYTE *pControl;
1237 // Indices must be in range from 0 to 31
1238 XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0);
1239 XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0);
1240 XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0);
1241 XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0);
1243 // 0-15 = V1, 16-31 = V2
1244 aByte[0] = (const BYTE*)(&V1);
1245 aByte[1] = (const BYTE*)(&V2);
1247 pControl = (const BYTE *)(&Control);
1248 pWork = (BYTE *)(&Result);
1250 // Get the byte to map from
1251 uIndex = pControl[0];
1253 VectorIndex = (uIndex>>4)&1;
1255 #if defined(_XM_LITTLEENDIAN_)
1256 uIndex ^= 3; // Swap byte ordering on little endian machines
1258 pWork[0] = aByte[VectorIndex][uIndex];
1262 #elif defined(_XM_SSE_INTRINSICS_)
1263 #if defined(_PREFAST_) || defined(XMDEBUG)
1264 // Indices must be in range from 0 to 31
1265 static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
1266 XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest);
1267 __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
1268 XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf);
1270 // Store the vectors onto local memory on the stack
1274 // Output vector, on the stack
1276 // Get pointer to the two vectors on the stack
1277 const BYTE *pInput = reinterpret_cast<const BYTE *>(Array);
1278 // Store the Control vector on the stack to access the bytes
1279 // don't use Control, it can cause a register variable to spill on the stack.
1280 XMVECTORU8 vControl;
1281 vControl.v = Control; // Write to memory
1284 UINT ComponentIndex = vControl.u[i] & 0x1FU;
1285 ComponentIndex ^= 3; // Swap byte ordering
1286 vResult.u[i] = pInput[ComponentIndex];
1289 #else // _XM_SSE_INTRINSICS_
1290 #endif // _XM_VMX128_INTRINSICS_
1293 //------------------------------------------------------------------------------
1294 // Define a control vector to be used in XMVectorSelect
1295 // operations. The four integers specified in XMVectorSelectControl
1296 // serve as indices to select between components in two vectors.
1297 // The first index controls selection for the first component of
1298 // the vectors involved in a select operation, the second index
1299 // controls selection for the second component etc. A value of
1300 // zero for an index causes the corresponding component from the first
1301 // vector to be selected whereas a one causes the component from the
1302 // second vector to be selected instead.
1304 XMFINLINE XMVECTOR XMVectorSelectControl
1312 #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1313 // x=Index0,y=Index1,z=Index2,w=Index3
1314 __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
1315 // Any non-zero entries become 0xFFFFFFFF else 0
1316 vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
1317 return reinterpret_cast<__m128 *>(&vTemp)[0];
1319 XMVECTOR ControlVector;
1320 CONST UINT ControlElement[] =
1326 XMASSERT(VectorIndex0 < 2);
1327 XMASSERT(VectorIndex1 < 2);
1328 XMASSERT(VectorIndex2 < 2);
1329 XMASSERT(VectorIndex3 < 2);
1331 ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
1332 ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
1333 ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
1334 ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
1336 return ControlVector;
1341 //------------------------------------------------------------------------------
1343 XMFINLINE XMVECTOR XMVectorSelect
1350 #if defined(_XM_NO_INTRINSICS_)
1354 Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]);
1355 Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]);
1356 Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]);
1357 Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]);
1361 #elif defined(_XM_SSE_INTRINSICS_)
1362 XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
1363 XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
1364 return _mm_or_ps(vTemp1,vTemp2);
1365 #else // _XM_VMX128_INTRINSICS_
1366 #endif // _XM_VMX128_INTRINSICS_
1369 //------------------------------------------------------------------------------
1371 XMFINLINE XMVECTOR XMVectorMergeXY
1377 #if defined(_XM_NO_INTRINSICS_)
1381 Result.vector4_u32[0] = V1.vector4_u32[0];
1382 Result.vector4_u32[1] = V2.vector4_u32[0];
1383 Result.vector4_u32[2] = V1.vector4_u32[1];
1384 Result.vector4_u32[3] = V2.vector4_u32[1];
1388 #elif defined(_XM_SSE_INTRINSICS_)
1389 return _mm_unpacklo_ps( V1, V2 );
1390 #else // _XM_VMX128_INTRINSICS_
1391 #endif // _XM_VMX128_INTRINSICS_
1394 //------------------------------------------------------------------------------
1396 XMFINLINE XMVECTOR XMVectorMergeZW
1402 #if defined(_XM_NO_INTRINSICS_)
1406 Result.vector4_u32[0] = V1.vector4_u32[2];
1407 Result.vector4_u32[1] = V2.vector4_u32[2];
1408 Result.vector4_u32[2] = V1.vector4_u32[3];
1409 Result.vector4_u32[3] = V2.vector4_u32[3];
1413 #elif defined(_XM_SSE_INTRINSICS_)
1414 return _mm_unpackhi_ps( V1, V2 );
1415 #else // _XM_VMX128_INTRINSICS_
1416 #endif // _XM_VMX128_INTRINSICS_
1419 //------------------------------------------------------------------------------
1420 // Comparison operations
1421 //------------------------------------------------------------------------------
1423 //------------------------------------------------------------------------------
1425 XMFINLINE XMVECTOR XMVectorEqual
1431 #if defined(_XM_NO_INTRINSICS_)
1435 Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1436 Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1437 Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1438 Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1442 #elif defined(_XM_SSE_INTRINSICS_)
1443 return _mm_cmpeq_ps( V1, V2 );
1444 #else // _XM_VMX128_INTRINSICS_
1445 #endif // _XM_VMX128_INTRINSICS_
1448 //------------------------------------------------------------------------------
1450 XMFINLINE XMVECTOR XMVectorEqualR
1457 #if defined(_XM_NO_INTRINSICS_)
1458 UINT ux, uy, uz, uw, CR;
1463 ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1464 uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1465 uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1466 uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1470 // All elements are greater
1471 CR = XM_CRMASK_CR6TRUE;
1473 else if (!(ux|uy|uz|uw))
1475 // All elements are not greater
1476 CR = XM_CRMASK_CR6FALSE;
1479 Control.vector4_u32[0] = ux;
1480 Control.vector4_u32[1] = uy;
1481 Control.vector4_u32[2] = uz;
1482 Control.vector4_u32[3] = uw;
1485 #elif defined(_XM_SSE_INTRINSICS_)
1487 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
1489 int iTest = _mm_movemask_ps(vTemp);
1492 CR = XM_CRMASK_CR6TRUE;
1496 // All elements are not greater
1497 CR = XM_CRMASK_CR6FALSE;
1501 #else // _XM_VMX128_INTRINSICS_
1502 #endif // _XM_VMX128_INTRINSICS_
1505 //------------------------------------------------------------------------------
1506 // Treat the components of the vectors as unsigned integers and
1507 // compare individual bits between the two. This is useful for
1508 // comparing control vectors and result vectors returned from
1509 // other comparison operations.
1511 XMFINLINE XMVECTOR XMVectorEqualInt
1517 #if defined(_XM_NO_INTRINSICS_)
1521 Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
1522 Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
1523 Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
1524 Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
1528 #elif defined(_XM_SSE_INTRINSICS_)
1529 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1530 return reinterpret_cast<__m128 *>(&V)[0];
1531 #else // _XM_VMX128_INTRINSICS_
1532 #endif // _XM_VMX128_INTRINSICS_
1535 //------------------------------------------------------------------------------
1537 XMFINLINE XMVECTOR XMVectorEqualIntR
1544 #if defined(_XM_NO_INTRINSICS_)
1550 Control = XMVectorEqualInt(V1, V2);
1554 if (XMVector4EqualInt(Control, XMVectorTrueInt()))
1556 // All elements are equal
1557 *pCR |= XM_CRMASK_CR6TRUE;
1559 else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
1561 // All elements are not equal
1562 *pCR |= XM_CRMASK_CR6FALSE;
1567 #elif defined(_XM_SSE_INTRINSICS_)
1569 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1570 int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
1574 CR = XM_CRMASK_CR6TRUE;
1578 CR = XM_CRMASK_CR6FALSE;
1581 return reinterpret_cast<__m128 *>(&V)[0];
1582 #else // _XM_VMX128_INTRINSICS_
1583 #endif // _XM_VMX128_INTRINSICS_
1586 //------------------------------------------------------------------------------
1588 XMFINLINE XMVECTOR XMVectorNearEqual
1595 #if defined(_XM_NO_INTRINSICS_)
1597 FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
1600 fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
1601 fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
1602 fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
1603 fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
1605 fDeltax = fabsf(fDeltax);
1606 fDeltay = fabsf(fDeltay);
1607 fDeltaz = fabsf(fDeltaz);
1608 fDeltaw = fabsf(fDeltaw);
1610 Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1611 Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1612 Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1613 Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1617 #elif defined(_XM_SSE_INTRINSICS_)
1618 // Get the difference
1619 XMVECTOR vDelta = _mm_sub_ps(V1,V2);
1620 // Get the absolute value of the difference
1621 XMVECTOR vTemp = _mm_setzero_ps();
1622 vTemp = _mm_sub_ps(vTemp,vDelta);
1623 vTemp = _mm_max_ps(vTemp,vDelta);
1624 vTemp = _mm_cmple_ps(vTemp,Epsilon);
1626 #else // _XM_VMX128_INTRINSICS_
1627 #endif // _XM_VMX128_INTRINSICS_
1630 //------------------------------------------------------------------------------
1632 XMFINLINE XMVECTOR XMVectorNotEqual
1638 #if defined(_XM_NO_INTRINSICS_)
1641 Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1642 Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1643 Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1644 Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1647 #elif defined(_XM_SSE_INTRINSICS_)
1648 return _mm_cmpneq_ps( V1, V2 );
1649 #else // _XM_VMX128_INTRINSICS_
1650 #endif // _XM_VMX128_INTRINSICS_
1653 //------------------------------------------------------------------------------
1655 XMFINLINE XMVECTOR XMVectorNotEqualInt
1661 #if defined(_XM_NO_INTRINSICS_)
1664 Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
1665 Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
1666 Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
1667 Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
1670 #elif defined(_XM_SSE_INTRINSICS_)
1671 __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1672 return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
1673 #else // _XM_VMX128_INTRINSICS_
1674 #endif // _XM_VMX128_INTRINSICS_
1677 //------------------------------------------------------------------------------
1679 XMFINLINE XMVECTOR XMVectorGreater
1685 #if defined(_XM_NO_INTRINSICS_)
1688 Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1689 Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1690 Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1691 Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1694 #elif defined(_XM_SSE_INTRINSICS_)
1695 return _mm_cmpgt_ps( V1, V2 );
1696 #else // _XM_VMX128_INTRINSICS_
1697 #endif // _XM_VMX128_INTRINSICS_
1700 //------------------------------------------------------------------------------
1702 XMFINLINE XMVECTOR XMVectorGreaterR
1709 #if defined(_XM_NO_INTRINSICS_)
1710 UINT ux, uy, uz, uw, CR;
1715 ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1716 uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1717 uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1718 uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1722 // All elements are greater
1723 CR = XM_CRMASK_CR6TRUE;
1725 else if (!(ux|uy|uz|uw))
1727 // All elements are not greater
1728 CR = XM_CRMASK_CR6FALSE;
1731 Control.vector4_u32[0] = ux;
1732 Control.vector4_u32[1] = uy;
1733 Control.vector4_u32[2] = uz;
1734 Control.vector4_u32[3] = uw;
1737 #elif defined(_XM_SSE_INTRINSICS_)
1739 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
1741 int iTest = _mm_movemask_ps(vTemp);
1744 CR = XM_CRMASK_CR6TRUE;
1748 // All elements are not greater
1749 CR = XM_CRMASK_CR6FALSE;
1753 #else // _XM_VMX128_INTRINSICS_
1754 #endif // _XM_VMX128_INTRINSICS_
1757 //------------------------------------------------------------------------------
1759 XMFINLINE XMVECTOR XMVectorGreaterOrEqual
1765 #if defined(_XM_NO_INTRINSICS_)
1768 Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1769 Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1770 Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1771 Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1774 #elif defined(_XM_SSE_INTRINSICS_)
1775 return _mm_cmpge_ps( V1, V2 );
1776 #else // _XM_VMX128_INTRINSICS_
1777 #endif // _XM_VMX128_INTRINSICS_
1780 //------------------------------------------------------------------------------
1782 XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
1789 #if defined(_XM_NO_INTRINSICS_)
1790 UINT ux, uy, uz, uw, CR;
1795 ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1796 uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1797 uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1798 uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1802 // All elements are greater
1803 CR = XM_CRMASK_CR6TRUE;
1805 else if (!(ux|uy|uz|uw))
1807 // All elements are not greater
1808 CR = XM_CRMASK_CR6FALSE;
1811 Control.vector4_u32[0] = ux;
1812 Control.vector4_u32[1] = uy;
1813 Control.vector4_u32[2] = uz;
1814 Control.vector4_u32[3] = uw;
1817 #elif defined(_XM_SSE_INTRINSICS_)
1819 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
1821 int iTest = _mm_movemask_ps(vTemp);
1824 CR = XM_CRMASK_CR6TRUE;
1828 // All elements are not greater
1829 CR = XM_CRMASK_CR6FALSE;
1833 #else // _XM_VMX128_INTRINSICS_
1834 #endif // _XM_VMX128_INTRINSICS_
1837 //------------------------------------------------------------------------------
1839 XMFINLINE XMVECTOR XMVectorLess
1845 #if defined(_XM_NO_INTRINSICS_)
1848 Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1849 Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1850 Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1851 Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1854 #elif defined(_XM_SSE_INTRINSICS_)
1855 return _mm_cmplt_ps( V1, V2 );
1856 #else // _XM_VMX128_INTRINSICS_
1857 #endif // _XM_VMX128_INTRINSICS_
1860 //------------------------------------------------------------------------------
1862 XMFINLINE XMVECTOR XMVectorLessOrEqual
1868 #if defined(_XM_NO_INTRINSICS_)
1871 Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1872 Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1873 Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1874 Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1877 #elif defined(_XM_SSE_INTRINSICS_)
1878 return _mm_cmple_ps( V1, V2 );
1879 #else // _XM_VMX128_INTRINSICS_
1880 #endif // _XM_VMX128_INTRINSICS_
1883 //------------------------------------------------------------------------------
1885 XMFINLINE XMVECTOR XMVectorInBounds
1891 #if defined(_XM_NO_INTRINSICS_)
1894 Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1895 Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1896 Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1897 Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1900 #elif defined(_XM_SSE_INTRINSICS_)
1901 // Test if less than or equal
1902 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1903 // Negate the bounds
1904 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1905 // Test if greater or equal (Reversed)
1906 vTemp2 = _mm_cmple_ps(vTemp2,V);
1908 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1910 #else // _XM_VMX128_INTRINSICS_
1911 #endif // _XM_VMX128_INTRINSICS_
1914 //------------------------------------------------------------------------------
1916 XMFINLINE XMVECTOR XMVectorInBoundsR
1923 #if defined(_XM_NO_INTRINSICS_)
1924 UINT ux, uy, uz, uw, CR;
1927 XMASSERT( pCR != 0 );
1929 ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1930 uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1931 uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1932 uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1938 // All elements are in bounds
1939 CR = XM_CRMASK_CR6BOUNDS;
1942 Control.vector4_u32[0] = ux;
1943 Control.vector4_u32[1] = uy;
1944 Control.vector4_u32[2] = uz;
1945 Control.vector4_u32[3] = uw;
1948 #elif defined(_XM_SSE_INTRINSICS_)
1949 XMASSERT( pCR != 0 );
1950 // Test if less than or equal
1951 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1952 // Negate the bounds
1953 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1954 // Test if greater or equal (Reversed)
1955 vTemp2 = _mm_cmple_ps(vTemp2,V);
1957 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1960 if (_mm_movemask_ps(vTemp1)==0xf) {
1961 // All elements are in bounds
1962 CR = XM_CRMASK_CR6BOUNDS;
1966 #else // _XM_VMX128_INTRINSICS_
1967 #endif // _XM_VMX128_INTRINSICS_
1970 //------------------------------------------------------------------------------
1972 XMFINLINE XMVECTOR XMVectorIsNaN
1977 #if defined(_XM_NO_INTRINSICS_)
1980 Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1981 Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1982 Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1983 Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1986 #elif defined(_XM_SSE_INTRINSICS_)
1987 // Mask off the exponent
1988 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
1989 // Mask off the mantissa
1990 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
1991 // Are any of the exponents == 0x7F800000?
1992 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
1993 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
1994 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
1995 // Perform a not on the NaN test to be true on NON-zero mantissas
1996 vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
1997 // If any are NaN, the signs are true after the merge above
1998 return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
1999 #else // _XM_VMX128_INTRINSICS_
2000 #endif // _XM_VMX128_INTRINSICS_
2003 //------------------------------------------------------------------------------
2005 XMFINLINE XMVECTOR XMVectorIsInfinite
2010 #if defined(_XM_NO_INTRINSICS_)
2013 Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
2014 Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
2015 Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
2016 Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
2019 #elif defined(_XM_SSE_INTRINSICS_)
2020 // Mask off the sign bit
2021 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
2022 // Compare to infinity
2023 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
2024 // If any are infinity, the signs are true.
2026 #else // _XM_VMX128_INTRINSICS_
2027 #endif // _XM_VMX128_INTRINSICS_
2030 //------------------------------------------------------------------------------
2031 // Rounding and clamping operations
2032 //------------------------------------------------------------------------------
2034 //------------------------------------------------------------------------------
2036 XMFINLINE XMVECTOR XMVectorMin
2042 #if defined(_XM_NO_INTRINSICS_)
2045 Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2046 Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2047 Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2048 Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2051 #elif defined(_XM_SSE_INTRINSICS_)
2052 return _mm_min_ps( V1, V2 );
2053 #else // _XM_VMX128_INTRINSICS_
2054 #endif // _XM_VMX128_INTRINSICS_
2057 //------------------------------------------------------------------------------
2059 XMFINLINE XMVECTOR XMVectorMax
2065 #if defined(_XM_NO_INTRINSICS_)
2068 Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2069 Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2070 Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2071 Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2074 #elif defined(_XM_SSE_INTRINSICS_)
2075 return _mm_max_ps( V1, V2 );
2076 #else // _XM_VMX128_INTRINSICS_
2077 #endif // _XM_VMX128_INTRINSICS_
2080 //------------------------------------------------------------------------------
2082 XMFINLINE XMVECTOR XMVectorRound
2087 #if defined(_XM_NO_INTRINSICS_)
2091 CONST XMVECTOR Zero = XMVectorZero();
2092 CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
2093 CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
2095 Bias = XMVectorLess(V, Zero);
2096 Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
2097 Result = XMVectorAdd(V, Bias);
2098 Result = XMVectorTruncate(Result);
2102 #elif defined(_XM_SSE_INTRINSICS_)
2103 // To handle NAN, INF and numbers greater than 8388608, use masking
2104 // Get the abs value
2105 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2106 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2107 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2108 // Convert to int and back to float for rounding
2109 __m128i vInt = _mm_cvtps_epi32(V);
2110 // Convert back to floats
2111 XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2112 // All numbers less than 8388608 will use the round to int
2113 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2114 // All others, use the ORIGINAL value
2115 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2116 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2118 #else // _XM_VMX128_INTRINSICS_
2119 #endif // _XM_VMX128_INTRINSICS_
2122 //------------------------------------------------------------------------------
2124 XMFINLINE XMVECTOR XMVectorTruncate
2129 #if defined(_XM_NO_INTRINSICS_)
2134 Result.vector4_f32[0] = 0.0f;
2136 for (i = 0; i < 4; i++)
2138 if (XMISNAN(V.vector4_f32[i]))
2140 Result.vector4_u32[i] = 0x7FC00000;
2142 else if (fabsf(V.vector4_f32[i]) < 8388608.0f)
2144 Result.vector4_f32[i] = (FLOAT)((INT)V.vector4_f32[i]);
2148 Result.vector4_f32[i] = V.vector4_f32[i];
2153 #elif defined(_XM_SSE_INTRINSICS_)
2154 // To handle NAN, INF and numbers greater than 8388608, use masking
2155 // Get the abs value
2156 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2157 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2158 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2159 // Convert to int and back to float for rounding with truncation
2160 __m128i vInt = _mm_cvttps_epi32(V);
2161 // Convert back to floats
2162 XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2163 // All numbers less than 8388608 will use the round to int
2164 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2165 // All others, use the ORIGINAL value
2166 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2167 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2169 #else // _XM_VMX128_INTRINSICS_
2170 #endif // _XM_VMX128_INTRINSICS_
2173 //------------------------------------------------------------------------------
2175 XMFINLINE XMVECTOR XMVectorFloor
2180 #if defined(_XM_NO_INTRINSICS_)
2182 XMVECTOR vResult = {
2183 floorf(V.vector4_f32[0]),
2184 floorf(V.vector4_f32[1]),
2185 floorf(V.vector4_f32[2]),
2186 floorf(V.vector4_f32[3])
2190 #elif defined(_XM_SSE_INTRINSICS_)
2191 // To handle NAN, INF and numbers greater than 8388608, use masking
2192 // Get the abs value
2193 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2194 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2195 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2196 // Convert to int and back to float for rounding
2197 XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
2198 __m128i vInt = _mm_cvtps_epi32(vResult);
2199 // Convert back to floats
2200 vResult = _mm_cvtepi32_ps(vInt);
2201 // All numbers less than 8388608 will use the round to int
2202 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2203 // All others, use the ORIGINAL value
2204 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2205 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2207 #else // _XM_VMX128_INTRINSICS_
2208 #endif // _XM_VMX128_INTRINSICS_
2211 //------------------------------------------------------------------------------
2213 XMFINLINE XMVECTOR XMVectorCeiling
2218 #if defined(_XM_NO_INTRINSICS_)
2219 XMVECTOR vResult = {
2220 ceilf(V.vector4_f32[0]),
2221 ceilf(V.vector4_f32[1]),
2222 ceilf(V.vector4_f32[2]),
2223 ceilf(V.vector4_f32[3])
2227 #elif defined(_XM_SSE_INTRINSICS_)
2228 // To handle NAN, INF and numbers greater than 8388608, use masking
2229 // Get the abs value
2230 __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2231 // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2232 vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2233 // Convert to int and back to float for rounding
2234 XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
2235 __m128i vInt = _mm_cvtps_epi32(vResult);
2236 // Convert back to floats
2237 vResult = _mm_cvtepi32_ps(vInt);
2238 // All numbers less than 8388608 will use the round to int
2239 vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2240 // All others, use the ORIGINAL value
2241 vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2242 vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2244 #else // _XM_VMX128_INTRINSICS_
2245 #endif // _XM_VMX128_INTRINSICS_
2248 //------------------------------------------------------------------------------
2250 XMFINLINE XMVECTOR XMVectorClamp
2257 #if defined(_XM_NO_INTRINSICS_)
2261 XMASSERT(XMVector4LessOrEqual(Min, Max));
2263 Result = XMVectorMax(Min, V);
2264 Result = XMVectorMin(Max, Result);
2268 #elif defined(_XM_SSE_INTRINSICS_)
2270 XMASSERT(XMVector4LessOrEqual(Min, Max));
2271 vResult = _mm_max_ps(Min,V);
2272 vResult = _mm_min_ps(vResult,Max);
2274 #else // _XM_VMX128_INTRINSICS_
2275 #endif // _XM_VMX128_INTRINSICS_
2278 //------------------------------------------------------------------------------
2280 XMFINLINE XMVECTOR XMVectorSaturate
2285 #if defined(_XM_NO_INTRINSICS_)
2287 CONST XMVECTOR Zero = XMVectorZero();
2289 return XMVectorClamp(V, Zero, g_XMOne.v);
2291 #elif defined(_XM_SSE_INTRINSICS_)
2293 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2295 return _mm_min_ps(vResult,g_XMOne);
2296 #else // _XM_VMX128_INTRINSICS_
2297 #endif // _XM_VMX128_INTRINSICS_
2300 //------------------------------------------------------------------------------
2301 // Bitwise logical operations
2302 //------------------------------------------------------------------------------
2304 XMFINLINE XMVECTOR XMVectorAndInt
2310 #if defined(_XM_NO_INTRINSICS_)
2314 Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
2315 Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
2316 Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
2317 Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
2320 #elif defined(_XM_SSE_INTRINSICS_)
2321 return _mm_and_ps(V1,V2);
2322 #else // _XM_VMX128_INTRINSICS_
2323 #endif // _XM_VMX128_INTRINSICS_
2326 //------------------------------------------------------------------------------
2328 XMFINLINE XMVECTOR XMVectorAndCInt
2334 #if defined(_XM_NO_INTRINSICS_)
2338 Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
2339 Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
2340 Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
2341 Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
2345 #elif defined(_XM_SSE_INTRINSICS_)
2346 __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] );
2347 return reinterpret_cast<__m128 *>(&V)[0];
2348 #else // _XM_VMX128_INTRINSICS_
2349 #endif // _XM_VMX128_INTRINSICS_
2352 //------------------------------------------------------------------------------
2354 XMFINLINE XMVECTOR XMVectorOrInt
2360 #if defined(_XM_NO_INTRINSICS_)
2364 Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0];
2365 Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1];
2366 Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2];
2367 Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3];
2371 #elif defined(_XM_SSE_INTRINSICS_)
2372 __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2373 return reinterpret_cast<__m128 *>(&V)[0];
2374 #else // _XM_VMX128_INTRINSICS_
2375 #endif // _XM_VMX128_INTRINSICS_
2378 //------------------------------------------------------------------------------
2380 XMFINLINE XMVECTOR XMVectorNorInt
2386 #if defined(_XM_NO_INTRINSICS_)
2390 Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]);
2391 Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]);
2392 Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]);
2393 Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]);
2397 #elif defined(_XM_SSE_INTRINSICS_)
2399 Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2400 Result = _mm_andnot_si128( Result,g_XMNegOneMask);
2401 return reinterpret_cast<__m128 *>(&Result)[0];
2402 #else // _XM_VMX128_INTRINSICS_
2403 #endif // _XM_VMX128_INTRINSICS_
2406 //------------------------------------------------------------------------------
2408 XMFINLINE XMVECTOR XMVectorXorInt
2414 #if defined(_XM_NO_INTRINSICS_)
2418 Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
2419 Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
2420 Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
2421 Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
2425 #elif defined(_XM_SSE_INTRINSICS_)
2426 __m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2427 return reinterpret_cast<__m128 *>(&V)[0];
2428 #else // _XM_VMX128_INTRINSICS_
2429 #endif // _XM_VMX128_INTRINSICS_
2432 //------------------------------------------------------------------------------
2433 // Computation operations
2434 //------------------------------------------------------------------------------
2436 //------------------------------------------------------------------------------
2438 XMFINLINE XMVECTOR XMVectorNegate
2443 #if defined(_XM_NO_INTRINSICS_)
2447 Result.vector4_f32[0] = -V.vector4_f32[0];
2448 Result.vector4_f32[1] = -V.vector4_f32[1];
2449 Result.vector4_f32[2] = -V.vector4_f32[2];
2450 Result.vector4_f32[3] = -V.vector4_f32[3];
2454 #elif defined(_XM_SSE_INTRINSICS_)
2457 Z = _mm_setzero_ps();
2459 return _mm_sub_ps( Z, V );
2460 #else // _XM_VMX128_INTRINSICS_
2461 #endif // _XM_VMX128_INTRINSICS_
2464 //------------------------------------------------------------------------------
2466 XMFINLINE XMVECTOR XMVectorAdd
2472 #if defined(_XM_NO_INTRINSICS_)
2476 Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
2477 Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
2478 Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
2479 Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
2483 #elif defined(_XM_SSE_INTRINSICS_)
2484 return _mm_add_ps( V1, V2 );
2485 #else // _XM_VMX128_INTRINSICS_
2486 #endif // _XM_VMX128_INTRINSICS_
2489 //------------------------------------------------------------------------------
2491 XMFINLINE XMVECTOR XMVectorAddAngles
2497 #if defined(_XM_NO_INTRINSICS_)
2502 CONST XMVECTOR Zero = XMVectorZero();
2504 // Add the given angles together. If the range of V1 is such
2505 // that -Pi <= V1 < Pi and the range of V2 is such that
2506 // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2507 // will be -Pi <= Result < Pi.
2508 Result = XMVectorAdd(V1, V2);
2510 Mask = XMVectorLess(Result, g_XMNegativePi.v);
2511 Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2513 Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2514 Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2516 Result = XMVectorAdd(Result, Offset);
2520 #elif defined(_XM_SSE_INTRINSICS_)
2521 // Adjust the angles
2522 XMVECTOR vResult = _mm_add_ps(V1,V2);
2524 XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2525 vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2526 // Add 2Pi to all entries less than -Pi
2527 vResult = _mm_add_ps(vResult,vOffset);
2528 // Greater than or equal to Pi?
2529 vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2530 vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2531 // Sub 2Pi to all entries greater than Pi
2532 vResult = _mm_sub_ps(vResult,vOffset);
2534 #else // _XM_VMX128_INTRINSICS_
2535 #endif // _XM_VMX128_INTRINSICS_
2538 //------------------------------------------------------------------------------
2540 XMFINLINE XMVECTOR XMVectorSubtract
2546 #if defined(_XM_NO_INTRINSICS_)
2550 Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
2551 Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
2552 Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
2553 Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
2557 #elif defined(_XM_SSE_INTRINSICS_)
2558 return _mm_sub_ps( V1, V2 );
2559 #else // _XM_VMX128_INTRINSICS_
2560 #endif // _XM_VMX128_INTRINSICS_
2563 //------------------------------------------------------------------------------
2565 XMFINLINE XMVECTOR XMVectorSubtractAngles
2571 #if defined(_XM_NO_INTRINSICS_)
2576 CONST XMVECTOR Zero = XMVectorZero();
2578 // Subtract the given angles. If the range of V1 is such
2579 // that -Pi <= V1 < Pi and the range of V2 is such that
2580 // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2581 // will be -Pi <= Result < Pi.
2582 Result = XMVectorSubtract(V1, V2);
2584 Mask = XMVectorLess(Result, g_XMNegativePi.v);
2585 Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2587 Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2588 Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2590 Result = XMVectorAdd(Result, Offset);
2594 #elif defined(_XM_SSE_INTRINSICS_)
2595 // Adjust the angles
2596 XMVECTOR vResult = _mm_sub_ps(V1,V2);
2598 XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2599 vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2600 // Add 2Pi to all entries less than -Pi
2601 vResult = _mm_add_ps(vResult,vOffset);
2602 // Greater than or equal to Pi?
2603 vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2604 vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2605 // Sub 2Pi to all entries greater than Pi
2606 vResult = _mm_sub_ps(vResult,vOffset);
2608 #else // _XM_VMX128_INTRINSICS_
2609 #endif // _XM_VMX128_INTRINSICS_
2612 //------------------------------------------------------------------------------
2614 XMFINLINE XMVECTOR XMVectorMultiply
2620 #if defined(_XM_NO_INTRINSICS_)
2622 V1.vector4_f32[0] * V2.vector4_f32[0],
2623 V1.vector4_f32[1] * V2.vector4_f32[1],
2624 V1.vector4_f32[2] * V2.vector4_f32[2],
2625 V1.vector4_f32[3] * V2.vector4_f32[3]
2628 #elif defined(_XM_SSE_INTRINSICS_)
2629 return _mm_mul_ps( V1, V2 );
2630 #else // _XM_VMX128_INTRINSICS_
2631 #endif // _XM_VMX128_INTRINSICS_
2634 //------------------------------------------------------------------------------
2636 XMFINLINE XMVECTOR XMVectorMultiplyAdd
2643 #if defined(_XM_NO_INTRINSICS_)
2644 XMVECTOR vResult = {
2645 (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
2646 (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
2647 (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
2648 (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
2652 #elif defined(_XM_SSE_INTRINSICS_)
2653 XMVECTOR vResult = _mm_mul_ps( V1, V2 );
2654 return _mm_add_ps(vResult, V3 );
2655 #else // _XM_VMX128_INTRINSICS_
2656 #endif // _XM_VMX128_INTRINSICS_
2659 //------------------------------------------------------------------------------
2661 XMFINLINE XMVECTOR XMVectorDivide
2667 #if defined(_XM_NO_INTRINSICS_)
2669 Result.vector4_f32[0] = V1.vector4_f32[0] / V2.vector4_f32[0];
2670 Result.vector4_f32[1] = V1.vector4_f32[1] / V2.vector4_f32[1];
2671 Result.vector4_f32[2] = V1.vector4_f32[2] / V2.vector4_f32[2];
2672 Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3];
2674 #elif defined(_XM_SSE_INTRINSICS_)
2675 return _mm_div_ps( V1, V2 );
2676 #else // _XM_VMX128_INTRINSICS_
2677 #endif // _XM_VMX128_INTRINSICS_
2680 //------------------------------------------------------------------------------
2682 XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
2689 #if defined(_XM_NO_INTRINSICS_)
2691 XMVECTOR vResult = {
2692 V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
2693 V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
2694 V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
2695 V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
2699 #elif defined(_XM_SSE_INTRINSICS_)
2700 XMVECTOR R = _mm_mul_ps( V1, V2 );
2701 return _mm_sub_ps( V3, R );
2702 #else // _XM_VMX128_INTRINSICS_
2703 #endif // _XM_VMX128_INTRINSICS_
2706 //------------------------------------------------------------------------------
2708 XMFINLINE XMVECTOR XMVectorScale
2714 #if defined(_XM_NO_INTRINSICS_)
2715 XMVECTOR vResult = {
2716 V.vector4_f32[0] * ScaleFactor,
2717 V.vector4_f32[1] * ScaleFactor,
2718 V.vector4_f32[2] * ScaleFactor,
2719 V.vector4_f32[3] * ScaleFactor
2723 #elif defined(_XM_SSE_INTRINSICS_)
2724 XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
2725 return _mm_mul_ps(vResult,V);
2726 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
2727 #endif // _XM_VMX128_INTRINSICS_
2730 //------------------------------------------------------------------------------
2732 XMFINLINE XMVECTOR XMVectorReciprocalEst
2737 #if defined(_XM_NO_INTRINSICS_)
2742 Result.vector4_f32[0] = 0.0f;
2744 for (i = 0; i < 4; i++)
2746 if (XMISNAN(V.vector4_f32[i]))
2748 Result.vector4_u32[i] = 0x7FC00000;
2750 else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
2752 Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
2756 Result.vector4_f32[i] = 1.f / V.vector4_f32[i];
2761 #elif defined(_XM_SSE_INTRINSICS_)
2762 return _mm_rcp_ps(V);
2763 #else // _XM_VMX128_INTRINSICS_
2764 #endif // _XM_VMX128_INTRINSICS_
2767 //------------------------------------------------------------------------------
2769 XMFINLINE XMVECTOR XMVectorReciprocal
2774 #if defined(_XM_NO_INTRINSICS_)
2775 return XMVectorReciprocalEst(V);
2777 #elif defined(_XM_SSE_INTRINSICS_)
2778 return _mm_div_ps(g_XMOne,V);
2779 #else // _XM_VMX128_INTRINSICS_
2780 #endif // _XM_VMX128_INTRINSICS_
2783 //------------------------------------------------------------------------------
2784 // Return an estimated square root
2785 XMFINLINE XMVECTOR XMVectorSqrtEst
2790 #if defined(_XM_NO_INTRINSICS_)
2793 // if (x == +Infinity) sqrt(x) = +Infinity
2794 // if (x == +0.0f) sqrt(x) = +0.0f
2795 // if (x == -0.0f) sqrt(x) = -0.0f
2796 // if (x < 0.0f) sqrt(x) = QNaN
2798 XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
2799 XMVECTOR Zero = XMVectorZero();
2800 XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2801 XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
2802 Result = XMVectorMultiply(V, Result);
2803 Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2804 Result = XMVectorSelect(V, Result, Select);
2807 #elif defined(_XM_SSE_INTRINSICS_)
2808 return _mm_sqrt_ps(V);
2809 #else // _XM_VMX128_INTRINSICS_
2810 #endif // _XM_VMX128_INTRINSICS_
2813 //------------------------------------------------------------------------------
2815 XMFINLINE XMVECTOR XMVectorSqrt
2820 #if defined(_XM_NO_INTRINSICS_)
2823 XMVECTOR VEqualsInfinity, VEqualsZero;
2827 // if (x == +Infinity) sqrt(x) = +Infinity
2828 // if (x == +0.0f) sqrt(x) = +0.0f
2829 // if (x == -0.0f) sqrt(x) = -0.0f
2830 // if (x < 0.0f) sqrt(x) = QNaN
2832 Result = XMVectorReciprocalSqrt(V);
2833 Zero = XMVectorZero();
2834 VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2835 VEqualsZero = XMVectorEqual(V, Zero);
2836 Result = XMVectorMultiply(V, Result);
2837 Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2838 Result = XMVectorSelect(V, Result, Select);
2842 #elif defined(_XM_SSE_INTRINSICS_)
2843 return _mm_sqrt_ps(V);
2844 #else // _XM_VMX128_INTRINSICS_
2845 #endif // _XM_VMX128_INTRINSICS_
2848 //------------------------------------------------------------------------------
2850 XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
2855 #if defined(_XM_NO_INTRINSICS_)
2857 // if (x == +Infinity) rsqrt(x) = 0
2858 // if (x == +0.0f) rsqrt(x) = +Infinity
2859 // if (x == -0.0f) rsqrt(x) = -Infinity
2860 // if (x < 0.0f) rsqrt(x) = QNaN
2866 Result.vector4_f32[0] = 0.0f;
2868 for (i = 0; i < 4; i++)
2870 if (XMISNAN(V.vector4_f32[i]))
2872 Result.vector4_u32[i] = 0x7FC00000;
2874 else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
2876 Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
2878 else if (V.vector4_f32[i] < 0.0f)
2880 Result.vector4_u32[i] = 0x7FFFFFFF;
2882 else if (XMISINF(V.vector4_f32[i]))
2884 Result.vector4_f32[i] = 0.0f;
2888 Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]);
2894 #elif defined(_XM_SSE_INTRINSICS_)
2895 return _mm_rsqrt_ps(V);
2896 #else // _XM_VMX128_INTRINSICS_
2897 #endif // _XM_VMX128_INTRINSICS_
2900 //------------------------------------------------------------------------------
2902 XMFINLINE XMVECTOR XMVectorReciprocalSqrt
2907 #if defined(_XM_NO_INTRINSICS_)
2909 return XMVectorReciprocalSqrtEst(V);
2911 #elif defined(_XM_SSE_INTRINSICS_)
2912 XMVECTOR vResult = _mm_sqrt_ps(V);
2913 vResult = _mm_div_ps(g_XMOne,vResult);
2915 #else // _XM_VMX128_INTRINSICS_
2916 #endif // _XM_VMX128_INTRINSICS_
2919 //------------------------------------------------------------------------------
2921 XMFINLINE XMVECTOR XMVectorExpEst
2926 #if defined(_XM_NO_INTRINSICS_)
2929 Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
2930 Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
2931 Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
2932 Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
2935 #elif defined(_XM_SSE_INTRINSICS_)
2936 XMVECTOR vResult = _mm_setr_ps(
2937 powf(2.0f,XMVectorGetX(V)),
2938 powf(2.0f,XMVectorGetY(V)),
2939 powf(2.0f,XMVectorGetZ(V)),
2940 powf(2.0f,XMVectorGetW(V)));
2942 #else // _XM_VMX128_INTRINSICS_
2943 #endif // _XM_VMX128_INTRINSICS_
2946 //------------------------------------------------------------------------------
2948 XMINLINE XMVECTOR XMVectorExp
2953 #if defined(_XM_NO_INTRINSICS_)
2956 XMVECTOR R, R2, R3, R4;
2958 XMVECTOR C0X, C0Y, C0Z, C0W;
2959 XMVECTOR C1X, C1Y, C1Z, C1W;
2961 static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2962 static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
2964 R = XMVectorFloor(V);
2965 E = XMVectorExpEst(R);
2966 R = XMVectorSubtract(V, R);
2967 R2 = XMVectorMultiply(R, R);
2968 R3 = XMVectorMultiply(R, R2);
2969 R4 = XMVectorMultiply(R2, R2);
2971 C0X = XMVectorSplatX(C0);
2972 C0Y = XMVectorSplatY(C0);
2973 C0Z = XMVectorSplatZ(C0);
2974 C0W = XMVectorSplatW(C0);
2976 C1X = XMVectorSplatX(C1);
2977 C1Y = XMVectorSplatY(C1);
2978 C1Z = XMVectorSplatZ(C1);
2979 C1W = XMVectorSplatW(C1);
2981 V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
2982 V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
2983 V0 = XMVectorMultiplyAdd(R3, C0W, V0);
2985 V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
2986 V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
2987 V1 = XMVectorMultiplyAdd(R3, C1W, V1);
2989 S = XMVectorMultiplyAdd(R4, V1, V0);
2991 S = XMVectorReciprocal(S);
2992 Result = XMVectorMultiply(E, S);
2996 #elif defined(_XM_SSE_INTRINSICS_)
2997 static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2998 static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
3000 // Get the integer of the input
3001 XMVECTOR R = XMVectorFloor(V);
3002 // Get the exponent estimate
3003 XMVECTOR E = XMVectorExpEst(R);
3004 // Get the fractional only
3005 R = _mm_sub_ps(V,R);
3007 XMVECTOR R2 = _mm_mul_ps(R,R);
3009 XMVECTOR R3 = _mm_mul_ps(R,R2);
3011 XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
3012 V0 = _mm_mul_ps(V0,R);
3013 XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
3014 V0 = _mm_add_ps(V0,vConstants);
3015 vConstants = _mm_load_ps1(&C0.f[2]);
3016 vConstants = _mm_mul_ps(vConstants,R2);
3017 V0 = _mm_add_ps(V0,vConstants);
3018 vConstants = _mm_load_ps1(&C0.f[3]);
3019 vConstants = _mm_mul_ps(vConstants,R3);
3020 V0 = _mm_add_ps(V0,vConstants);
3022 XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
3023 V1 = _mm_mul_ps(V1,R);
3024 vConstants = _mm_load_ps1(&C1.f[0]);
3025 V1 = _mm_add_ps(V1,vConstants);
3026 vConstants = _mm_load_ps1(&C1.f[2]);
3027 vConstants = _mm_mul_ps(vConstants,R2);
3028 V1 = _mm_add_ps(V1,vConstants);
3029 vConstants = _mm_load_ps1(&C1.f[3]);
3030 vConstants = _mm_mul_ps(vConstants,R3);
3031 V1 = _mm_add_ps(V1,vConstants);
3033 R2 = _mm_mul_ps(R2,R2);
3034 R2 = _mm_mul_ps(R2,V1);
3035 R2 = _mm_add_ps(R2,V0);
3036 E = _mm_div_ps(E,R2);
3038 #else // _XM_VMX128_INTRINSICS_
3039 #endif // _XM_VMX128_INTRINSICS_
3042 //------------------------------------------------------------------------------
3044 XMFINLINE XMVECTOR XMVectorLogEst
3049 #if defined(_XM_NO_INTRINSICS_)
3051 FLOAT fScale = (1.0f / logf(2.0f));
3054 Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
3055 Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3056 Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3057 Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3060 #elif defined(_XM_SSE_INTRINSICS_)
3061 XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3062 XMVECTOR vResult = _mm_setr_ps(
3063 logf(XMVectorGetX(V)),
3064 logf(XMVectorGetY(V)),
3065 logf(XMVectorGetZ(V)),
3066 logf(XMVectorGetW(V)));
3067 vResult = _mm_mul_ps(vResult,vScale);
3069 #else // _XM_VMX128_INTRINSICS_
3070 #endif // _XM_VMX128_INTRINSICS_
3073 //------------------------------------------------------------------------------
3075 XMINLINE XMVECTOR XMVectorLog
3080 #if defined(_XM_NO_INTRINSICS_)
3081 FLOAT fScale = (1.0f / logf(2.0f));
3084 Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
3085 Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3086 Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3087 Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3090 #elif defined(_XM_SSE_INTRINSICS_)
3091 XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3092 XMVECTOR vResult = _mm_setr_ps(
3093 logf(XMVectorGetX(V)),
3094 logf(XMVectorGetY(V)),
3095 logf(XMVectorGetZ(V)),
3096 logf(XMVectorGetW(V)));
3097 vResult = _mm_mul_ps(vResult,vScale);
3099 #else // _XM_VMX128_INTRINSICS_
3100 #endif // _XM_VMX128_INTRINSICS_
3103 //------------------------------------------------------------------------------
3105 XMFINLINE XMVECTOR XMVectorPowEst
3111 #if defined(_XM_NO_INTRINSICS_)
3115 Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
3116 Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
3117 Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
3118 Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
3122 #elif defined(_XM_SSE_INTRINSICS_)
3123 XMVECTOR vResult = _mm_setr_ps(
3124 powf(XMVectorGetX(V1),XMVectorGetX(V2)),
3125 powf(XMVectorGetY(V1),XMVectorGetY(V2)),
3126 powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
3127 powf(XMVectorGetW(V1),XMVectorGetW(V2)));
3129 #else // _XM_VMX128_INTRINSICS_
3130 #endif // _XM_VMX128_INTRINSICS_
3133 //------------------------------------------------------------------------------
3135 XMFINLINE XMVECTOR XMVectorPow
3141 #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
3143 return XMVectorPowEst(V1, V2);
3145 #else // _XM_VMX128_INTRINSICS_
3146 #endif // _XM_VMX128_INTRINSICS_
3149 //------------------------------------------------------------------------------
3151 XMFINLINE XMVECTOR XMVectorAbs
3156 #if defined(_XM_NO_INTRINSICS_)
3157 XMVECTOR vResult = {
3158 fabsf(V.vector4_f32[0]),
3159 fabsf(V.vector4_f32[1]),
3160 fabsf(V.vector4_f32[2]),
3161 fabsf(V.vector4_f32[3])
3165 #elif defined(_XM_SSE_INTRINSICS_)
3166 XMVECTOR vResult = _mm_setzero_ps();
3167 vResult = _mm_sub_ps(vResult,V);
3168 vResult = _mm_max_ps(vResult,V);
3170 #else // _XM_VMX128_INTRINSICS_
3171 #endif // _XM_VMX128_INTRINSICS_
3174 //------------------------------------------------------------------------------
3176 XMFINLINE XMVECTOR XMVectorMod
3182 #if defined(_XM_NO_INTRINSICS_)
3184 XMVECTOR Reciprocal;
3188 // V1 % V2 = V1 - V2 * truncate(V1 / V2)
3189 Reciprocal = XMVectorReciprocal(V2);
3190 Quotient = XMVectorMultiply(V1, Reciprocal);
3191 Quotient = XMVectorTruncate(Quotient);
3192 Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
3196 #elif defined(_XM_SSE_INTRINSICS_)
3197 XMVECTOR vResult = _mm_div_ps(V1, V2);
3198 vResult = XMVectorTruncate(vResult);
3199 vResult = _mm_mul_ps(vResult,V2);
3200 vResult = _mm_sub_ps(V1,vResult);
3202 #else // _XM_VMX128_INTRINSICS_
3203 #endif // _XM_VMX128_INTRINSICS_
3206 //------------------------------------------------------------------------------
3208 XMFINLINE XMVECTOR XMVectorModAngles
3213 #if defined(_XM_NO_INTRINSICS_)
3218 // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3219 V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
3220 V = XMVectorRound(V);
3221 Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
3225 #elif defined(_XM_SSE_INTRINSICS_)
3226 // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3227 XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
3228 // Use the inline function due to complexity for rounding
3229 vResult = XMVectorRound(vResult);
3230 vResult = _mm_mul_ps(vResult,g_XMTwoPi);
3231 vResult = _mm_sub_ps(Angles,vResult);
3233 #else // _XM_VMX128_INTRINSICS_
3234 #endif // _XM_VMX128_INTRINSICS_
3237 //------------------------------------------------------------------------------
3239 XMINLINE XMVECTOR XMVectorSin
3245 #if defined(_XM_NO_INTRINSICS_)
3247 XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
3248 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3251 V1 = XMVectorModAngles(V);
3253 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3254 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3255 V2 = XMVectorMultiply(V1, V1);
3256 V3 = XMVectorMultiply(V2, V1);
3257 V5 = XMVectorMultiply(V3, V2);
3258 V7 = XMVectorMultiply(V5, V2);
3259 V9 = XMVectorMultiply(V7, V2);
3260 V11 = XMVectorMultiply(V9, V2);
3261 V13 = XMVectorMultiply(V11, V2);
3262 V15 = XMVectorMultiply(V13, V2);
3263 V17 = XMVectorMultiply(V15, V2);
3264 V19 = XMVectorMultiply(V17, V2);
3265 V21 = XMVectorMultiply(V19, V2);
3266 V23 = XMVectorMultiply(V21, V2);
3268 S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
3269 S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
3270 S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
3271 S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
3272 S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
3273 S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
3274 S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
3275 S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
3276 S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
3277 S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
3278 S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
3280 Result = XMVectorMultiplyAdd(S1, V3, V1);
3281 Result = XMVectorMultiplyAdd(S2, V5, Result);
3282 Result = XMVectorMultiplyAdd(S3, V7, Result);
3283 Result = XMVectorMultiplyAdd(S4, V9, Result);
3284 Result = XMVectorMultiplyAdd(S5, V11, Result);
3285 Result = XMVectorMultiplyAdd(S6, V13, Result);
3286 Result = XMVectorMultiplyAdd(S7, V15, Result);
3287 Result = XMVectorMultiplyAdd(S8, V17, Result);
3288 Result = XMVectorMultiplyAdd(S9, V19, Result);
3289 Result = XMVectorMultiplyAdd(S10, V21, Result);
3290 Result = XMVectorMultiplyAdd(S11, V23, Result);
3294 #elif defined(_XM_SSE_INTRINSICS_)
3295 // Force the value within the bounds of pi
3296 XMVECTOR vResult = XMVectorModAngles(V);
3297 // Each on is V to the "num" power
3299 XMVECTOR V2 = _mm_mul_ps(vResult,vResult);
3301 XMVECTOR vPower = _mm_mul_ps(vResult,V2);
3302 XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3303 vConstants = _mm_mul_ps(vConstants,vPower);
3304 vResult = _mm_add_ps(vResult,vConstants);
3307 vPower = _mm_mul_ps(vPower,V2);
3308 vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3309 vConstants = _mm_mul_ps(vConstants,vPower);
3310 vResult = _mm_add_ps(vResult,vConstants);
3313 vPower = _mm_mul_ps(vPower,V2);
3314 vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3315 vConstants = _mm_mul_ps(vConstants,vPower);
3316 vResult = _mm_add_ps(vResult,vConstants);
3319 vPower = _mm_mul_ps(vPower,V2);
3320 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3321 vConstants = _mm_mul_ps(vConstants,vPower);
3322 vResult = _mm_add_ps(vResult,vConstants);
3325 vPower = _mm_mul_ps(vPower,V2);
3326 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3327 vConstants = _mm_mul_ps(vConstants,vPower);
3328 vResult = _mm_add_ps(vResult,vConstants);
3331 vPower = _mm_mul_ps(vPower,V2);
3332 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3333 vConstants = _mm_mul_ps(vConstants,vPower);
3334 vResult = _mm_add_ps(vResult,vConstants);
3337 vPower = _mm_mul_ps(vPower,V2);
3338 vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3339 vConstants = _mm_mul_ps(vConstants,vPower);
3340 vResult = _mm_add_ps(vResult,vConstants);
3343 vPower = _mm_mul_ps(vPower,V2);
3344 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3345 vConstants = _mm_mul_ps(vConstants,vPower);
3346 vResult = _mm_add_ps(vResult,vConstants);
3349 vPower = _mm_mul_ps(vPower,V2);
3350 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3351 vConstants = _mm_mul_ps(vConstants,vPower);
3352 vResult = _mm_add_ps(vResult,vConstants);
3355 vPower = _mm_mul_ps(vPower,V2);
3356 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3357 vConstants = _mm_mul_ps(vConstants,vPower);
3358 vResult = _mm_add_ps(vResult,vConstants);
3361 vPower = _mm_mul_ps(vPower,V2);
3362 vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3363 vConstants = _mm_mul_ps(vConstants,vPower);
3364 vResult = _mm_add_ps(vResult,vConstants);
3366 #else // _XM_VMX128_INTRINSICS_
3367 #endif // _XM_VMX128_INTRINSICS_
3370 //------------------------------------------------------------------------------
3372 XMINLINE XMVECTOR XMVectorCos
3377 #if defined(_XM_NO_INTRINSICS_)
3379 XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
3380 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3383 V1 = XMVectorModAngles(V);
3385 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3386 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3387 V2 = XMVectorMultiply(V1, V1);
3388 V4 = XMVectorMultiply(V2, V2);
3389 V6 = XMVectorMultiply(V4, V2);
3390 V8 = XMVectorMultiply(V4, V4);
3391 V10 = XMVectorMultiply(V6, V4);
3392 V12 = XMVectorMultiply(V6, V6);
3393 V14 = XMVectorMultiply(V8, V6);
3394 V16 = XMVectorMultiply(V8, V8);
3395 V18 = XMVectorMultiply(V10, V8);
3396 V20 = XMVectorMultiply(V10, V10);
3397 V22 = XMVectorMultiply(V12, V10);
3399 C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3400 C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3401 C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3402 C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3403 C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3404 C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3405 C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3406 C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3407 C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3408 C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3409 C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3411 Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3412 Result = XMVectorMultiplyAdd(C2, V4, Result);
3413 Result = XMVectorMultiplyAdd(C3, V6, Result);
3414 Result = XMVectorMultiplyAdd(C4, V8, Result);
3415 Result = XMVectorMultiplyAdd(C5, V10, Result);
3416 Result = XMVectorMultiplyAdd(C6, V12, Result);
3417 Result = XMVectorMultiplyAdd(C7, V14, Result);
3418 Result = XMVectorMultiplyAdd(C8, V16, Result);
3419 Result = XMVectorMultiplyAdd(C9, V18, Result);
3420 Result = XMVectorMultiplyAdd(C10, V20, Result);
3421 Result = XMVectorMultiplyAdd(C11, V22, Result);
3425 #elif defined(_XM_SSE_INTRINSICS_)
3426 // Force the value within the bounds of pi
3427 XMVECTOR V2 = XMVectorModAngles(V);
3428 // Each on is V to the "num" power
3430 V2 = _mm_mul_ps(V2,V2);
3432 XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3433 vConstants = _mm_mul_ps(vConstants,V2);
3434 XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
3437 XMVECTOR vPower = _mm_mul_ps(V2,V2);
3438 vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3439 vConstants = _mm_mul_ps(vConstants,vPower);
3440 vResult = _mm_add_ps(vResult,vConstants);
3443 vPower = _mm_mul_ps(vPower,V2);
3444 vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3445 vConstants = _mm_mul_ps(vConstants,vPower);
3446 vResult = _mm_add_ps(vResult,vConstants);
3449 vPower = _mm_mul_ps(vPower,V2);
3450 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3451 vConstants = _mm_mul_ps(vConstants,vPower);
3452 vResult = _mm_add_ps(vResult,vConstants);
3455 vPower = _mm_mul_ps(vPower,V2);
3456 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3457 vConstants = _mm_mul_ps(vConstants,vPower);
3458 vResult = _mm_add_ps(vResult,vConstants);
3461 vPower = _mm_mul_ps(vPower,V2);
3462 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3463 vConstants = _mm_mul_ps(vConstants,vPower);
3464 vResult = _mm_add_ps(vResult,vConstants);
3467 vPower = _mm_mul_ps(vPower,V2);
3468 vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3469 vConstants = _mm_mul_ps(vConstants,vPower);
3470 vResult = _mm_add_ps(vResult,vConstants);
3473 vPower = _mm_mul_ps(vPower,V2);
3474 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3475 vConstants = _mm_mul_ps(vConstants,vPower);
3476 vResult = _mm_add_ps(vResult,vConstants);
3479 vPower = _mm_mul_ps(vPower,V2);
3480 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3481 vConstants = _mm_mul_ps(vConstants,vPower);
3482 vResult = _mm_add_ps(vResult,vConstants);
3485 vPower = _mm_mul_ps(vPower,V2);
3486 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3487 vConstants = _mm_mul_ps(vConstants,vPower);
3488 vResult = _mm_add_ps(vResult,vConstants);
3491 vPower = _mm_mul_ps(vPower,V2);
3492 vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3493 vConstants = _mm_mul_ps(vConstants,vPower);
3494 vResult = _mm_add_ps(vResult,vConstants);
3496 #else // _XM_VMX128_INTRINSICS_
3497 #endif // _XM_VMX128_INTRINSICS_
3500 //------------------------------------------------------------------------------
3502 XMINLINE VOID XMVectorSinCos
3509 #if defined(_XM_NO_INTRINSICS_)
3511 XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3512 XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3513 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3514 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3520 V1 = XMVectorModAngles(V);
3522 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3523 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3524 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3525 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3527 V2 = XMVectorMultiply(V1, V1);
3528 V3 = XMVectorMultiply(V2, V1);
3529 V4 = XMVectorMultiply(V2, V2);
3530 V5 = XMVectorMultiply(V3, V2);
3531 V6 = XMVectorMultiply(V3, V3);
3532 V7 = XMVectorMultiply(V4, V3);
3533 V8 = XMVectorMultiply(V4, V4);
3534 V9 = XMVectorMultiply(V5, V4);
3535 V10 = XMVectorMultiply(V5, V5);
3536 V11 = XMVectorMultiply(V6, V5);
3537 V12 = XMVectorMultiply(V6, V6);
3538 V13 = XMVectorMultiply(V7, V6);
3539 V14 = XMVectorMultiply(V7, V7);
3540 V15 = XMVectorMultiply(V8, V7);
3541 V16 = XMVectorMultiply(V8, V8);
3542 V17 = XMVectorMultiply(V9, V8);
3543 V18 = XMVectorMultiply(V9, V9);
3544 V19 = XMVectorMultiply(V10, V9);
3545 V20 = XMVectorMultiply(V10, V10);
3546 V21 = XMVectorMultiply(V11, V10);
3547 V22 = XMVectorMultiply(V11, V11);
3548 V23 = XMVectorMultiply(V12, V11);
3550 S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
3551 S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
3552 S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
3553 S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
3554 S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
3555 S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
3556 S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
3557 S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
3558 S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
3559 S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
3560 S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
3562 C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3563 C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3564 C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3565 C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3566 C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3567 C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3568 C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3569 C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3570 C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3571 C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3572 C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3574 Sin = XMVectorMultiplyAdd(S1, V3, V1);
3575 Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3576 Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3577 Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3578 Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3579 Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3580 Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3581 Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3582 Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3583 Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3584 Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3586 Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3587 Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3588 Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3589 Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3590 Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3591 Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3592 Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3593 Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3594 Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3595 Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3596 Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3601 #elif defined(_XM_SSE_INTRINSICS_)
3604 XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3605 XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3606 XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3607 XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3610 V1 = XMVectorModAngles(V);
3612 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
3613 // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3614 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
3615 // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3617 V2 = XMVectorMultiply(V1, V1);
3618 V3 = XMVectorMultiply(V2, V1);
3619 V4 = XMVectorMultiply(V2, V2);
3620 V5 = XMVectorMultiply(V3, V2);
3621 V6 = XMVectorMultiply(V3, V3);
3622 V7 = XMVectorMultiply(V4, V3);
3623 V8 = XMVectorMultiply(V4, V4);
3624 V9 = XMVectorMultiply(V5, V4);
3625 V10 = XMVectorMultiply(V5, V5);
3626 V11 = XMVectorMultiply(V6, V5);
3627 V12 = XMVectorMultiply(V6, V6);
3628 V13 = XMVectorMultiply(V7, V6);
3629 V14 = XMVectorMultiply(V7, V7);
3630 V15 = XMVectorMultiply(V8, V7);
3631 V16 = XMVectorMultiply(V8, V8);
3632 V17 = XMVectorMultiply(V9, V8);
3633 V18 = XMVectorMultiply(V9, V9);
3634 V19 = XMVectorMultiply(V10, V9);
3635 V20 = XMVectorMultiply(V10, V10);
3636 V21 = XMVectorMultiply(V11, V10);
3637 V22 = XMVectorMultiply(V11, V11);
3638 V23 = XMVectorMultiply(V12, V11);
3640 S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3641 S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3642 S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3643 S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3644 S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3645 S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3646 S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3647 S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3648 S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3649 S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3650 S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3652 C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3653 C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3654 C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3655 C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3656 C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3657 C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3658 C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3659 C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3660 C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3661 C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3662 C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3664 S1 = _mm_mul_ps(S1,V3);
3665 Sin = _mm_add_ps(S1,V1);
3666 Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3667 Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3668 Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3669 Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3670 Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3671 Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3672 Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3673 Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3674 Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3675 Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3677 Cos = _mm_mul_ps(C1,V2);
3678 Cos = _mm_add_ps(Cos,g_XMOne);
3679 Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3680 Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3681 Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3682 Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3683 Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3684 Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3685 Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3686 Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3687 Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3688 Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3692 #else // _XM_VMX128_INTRINSICS_
3693 #endif // _XM_VMX128_INTRINSICS_
3696 //------------------------------------------------------------------------------
3698 XMINLINE XMVECTOR XMVectorTan
3703 #if defined(_XM_NO_INTRINSICS_)
3705 // Cody and Waite algorithm to compute tangent.
3707 XMVECTOR VA, VB, VC, VC2;
3708 XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3709 XMVECTOR C0, C1, TwoDivPi, Epsilon;
3712 XMVECTOR VIsZero, VCNearZero, VBIsEven;
3716 static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3717 static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3718 static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3719 static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
3721 TwoDivPi = XMVectorSplatW(TanConstants);
3723 Zero = XMVectorZero();
3725 C0 = XMVectorSplatX(TanConstants);
3726 C1 = XMVectorSplatY(TanConstants);
3727 Epsilon = XMVectorSplatZ(TanConstants);
3729 VA = XMVectorMultiply(V, TwoDivPi);
3731 VA = XMVectorRound(VA);
3733 VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3735 VB = XMVectorAbs(VA);
3737 VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3739 for (i = 0; i < 4; i++)
3741 VB.vector4_u32[i] = (UINT)VB.vector4_f32[i];
3744 VC2 = XMVectorMultiply(VC, VC);
3746 T7 = XMVectorSplatW(TanCoefficients1);
3747 T6 = XMVectorSplatZ(TanCoefficients1);
3748 T4 = XMVectorSplatX(TanCoefficients1);
3749 T3 = XMVectorSplatW(TanCoefficients0);
3750 T5 = XMVectorSplatY(TanCoefficients1);
3751 T2 = XMVectorSplatZ(TanCoefficients0);
3752 T1 = XMVectorSplatY(TanCoefficients0);
3753 T0 = XMVectorSplatX(TanCoefficients0);
3755 VBIsEven = XMVectorAndInt(VB, Mask.v);
3756 VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3758 N = XMVectorMultiplyAdd(VC2, T7, T6);
3759 D = XMVectorMultiplyAdd(VC2, T4, T3);
3760 N = XMVectorMultiplyAdd(VC2, N, T5);
3761 D = XMVectorMultiplyAdd(VC2, D, T2);
3762 N = XMVectorMultiply(VC2, N);
3763 D = XMVectorMultiplyAdd(VC2, D, T1);
3764 N = XMVectorMultiplyAdd(VC, N, VC);
3765 VCNearZero = XMVectorInBounds(VC, Epsilon);
3766 D = XMVectorMultiplyAdd(VC2, D, T0);
3768 N = XMVectorSelect(N, VC, VCNearZero);
3769 D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
3771 R0 = XMVectorNegate(N);
3772 R1 = XMVectorReciprocal(D);
3773 R0 = XMVectorReciprocal(R0);
3774 R1 = XMVectorMultiply(N, R1);
3775 R0 = XMVectorMultiply(D, R0);
3777 VIsZero = XMVectorEqual(V, Zero);
3779 Result = XMVectorSelect(R0, R1, VBIsEven);
3781 Result = XMVectorSelect(Result, Zero, VIsZero);
3785 #elif defined(_XM_SSE_INTRINSICS_)
3786 // Cody and Waite algorithm to compute tangent.
3788 XMVECTOR VA, VB, VC, VC2;
3789 XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3790 XMVECTOR C0, C1, TwoDivPi, Epsilon;
3793 XMVECTOR VIsZero, VCNearZero, VBIsEven;
3796 static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3797 static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3798 static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3799 static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
3801 TwoDivPi = XMVectorSplatW(TanConstants);
3803 Zero = XMVectorZero();
3805 C0 = XMVectorSplatX(TanConstants);
3806 C1 = XMVectorSplatY(TanConstants);
3807 Epsilon = XMVectorSplatZ(TanConstants);
3809 VA = XMVectorMultiply(V, TwoDivPi);
3811 VA = XMVectorRound(VA);
3813 VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3815 VB = XMVectorAbs(VA);
3817 VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3819 reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
3821 VC2 = XMVectorMultiply(VC, VC);
3823 T7 = XMVectorSplatW(TanCoefficients1);
3824 T6 = XMVectorSplatZ(TanCoefficients1);
3825 T4 = XMVectorSplatX(TanCoefficients1);
3826 T3 = XMVectorSplatW(TanCoefficients0);
3827 T5 = XMVectorSplatY(TanCoefficients1);
3828 T2 = XMVectorSplatZ(TanCoefficients0);
3829 T1 = XMVectorSplatY(TanCoefficients0);
3830 T0 = XMVectorSplatX(TanCoefficients0);
3832 VBIsEven = XMVectorAndInt(VB,Mask);
3833 VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3835 N = XMVectorMultiplyAdd(VC2, T7, T6);
3836 D = XMVectorMultiplyAdd(VC2, T4, T3);
3837 N = XMVectorMultiplyAdd(VC2, N, T5);
3838 D = XMVectorMultiplyAdd(VC2, D, T2);
3839 N = XMVectorMultiply(VC2, N);
3840 D = XMVectorMultiplyAdd(VC2, D, T1);
3841 N = XMVectorMultiplyAdd(VC, N, VC);
3842 VCNearZero = XMVectorInBounds(VC, Epsilon);
3843 D = XMVectorMultiplyAdd(VC2, D, T0);
3845 N = XMVectorSelect(N, VC, VCNearZero);
3846 D = XMVectorSelect(D, g_XMOne, VCNearZero);
3847 R0 = XMVectorNegate(N);
3848 R1 = _mm_div_ps(N,D);
3849 R0 = _mm_div_ps(D,R0);
3850 VIsZero = XMVectorEqual(V, Zero);
3851 Result = XMVectorSelect(R0, R1, VBIsEven);
3852 Result = XMVectorSelect(Result, Zero, VIsZero);
3856 #else // _XM_VMX128_INTRINSICS_
3857 #endif // _XM_VMX128_INTRINSICS_
3860 //------------------------------------------------------------------------------
3862 XMINLINE XMVECTOR XMVectorSinH
3867 #if defined(_XM_NO_INTRINSICS_)
3872 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3874 V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
3875 V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
3877 E1 = XMVectorExp(V1);
3878 E2 = XMVectorExp(V2);
3880 Result = XMVectorSubtract(E1, E2);
3884 #elif defined(_XM_SSE_INTRINSICS_)
3888 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3890 V1 = _mm_mul_ps(V, Scale);
3891 V1 = _mm_add_ps(V1,g_XMNegativeOne);
3892 V2 = _mm_mul_ps(V, Scale);
3893 V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3894 E1 = XMVectorExp(V1);
3895 E2 = XMVectorExp(V2);
3897 Result = _mm_sub_ps(E1, E2);
3900 #else // _XM_VMX128_INTRINSICS_
3901 #endif // _XM_VMX128_INTRINSICS_
3904 //------------------------------------------------------------------------------
3906 XMINLINE XMVECTOR XMVectorCosH
3911 #if defined(_XM_NO_INTRINSICS_)
3916 static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3918 V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
3919 V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
3921 E1 = XMVectorExp(V1);
3922 E2 = XMVectorExp(V2);
3924 Result = XMVectorAdd(E1, E2);
3928 #elif defined(_XM_SSE_INTRINSICS_)
3932 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3934 V1 = _mm_mul_ps(V,Scale);
3935 V1 = _mm_add_ps(V1,g_XMNegativeOne);
3936 V2 = _mm_mul_ps(V, Scale);
3937 V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3938 E1 = XMVectorExp(V1);
3939 E2 = XMVectorExp(V2);
3940 Result = _mm_add_ps(E1, E2);
3942 #else // _XM_VMX128_INTRINSICS_
3943 #endif // _XM_VMX128_INTRINSICS_
3946 //------------------------------------------------------------------------------
3948 XMINLINE XMVECTOR XMVectorTanH
3953 #if defined(_XM_NO_INTRINSICS_)
3957 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3959 E = XMVectorMultiply(V, Scale.v);
3961 E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
3962 E = XMVectorReciprocal(E);
3964 Result = XMVectorSubtract(g_XMOne.v, E);
3968 #elif defined(_XM_SSE_INTRINSICS_)
3969 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3971 XMVECTOR E = _mm_mul_ps(V, Scale);
3973 E = _mm_mul_ps(E,g_XMOneHalf);
3974 E = _mm_add_ps(E,g_XMOneHalf);
3975 E = XMVectorReciprocal(E);
3976 E = _mm_sub_ps(g_XMOne, E);
3978 #else // _XM_VMX128_INTRINSICS_
3979 #endif // _XM_VMX128_INTRINSICS_
3982 //------------------------------------------------------------------------------
3984 XMINLINE XMVECTOR XMVectorASin
3989 #if defined(_XM_NO_INTRINSICS_)
3991 XMVECTOR V2, V3, AbsV;
3992 XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3993 XMVECTOR R0, R1, R2, R3, R4;
3994 XMVECTOR OneMinusAbsV;
3997 static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
3999 // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
4000 // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
4002 AbsV = XMVectorAbs(V);
4004 V2 = XMVectorMultiply(V, V);
4005 V3 = XMVectorMultiply(V2, AbsV);
4007 R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
4009 OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
4010 Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
4012 C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
4013 C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
4014 C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
4015 C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
4017 C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
4018 C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
4019 C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
4020 C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
4022 C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
4023 C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
4024 C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
4025 C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
4027 R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
4028 R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
4029 R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
4030 R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
4032 R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
4033 R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
4034 R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
4035 R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
4037 R0 = XMVectorMultiplyAdd(R2, V3, R0);
4038 R1 = XMVectorMultiplyAdd(R3, V3, R1);
4040 R0 = XMVectorMultiply(V, R0);
4041 R1 = XMVectorMultiply(R4, R1);
4043 Result = XMVectorMultiplyAdd(R1, Rsq, R0);
4047 #elif defined(_XM_SSE_INTRINSICS_)
4048 static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4050 // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
4051 // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
4053 XMVECTOR vAbsV = _mm_setzero_ps();
4054 vAbsV = _mm_sub_ps(vAbsV,V);
4055 vAbsV = _mm_max_ps(vAbsV,V);
4057 XMVECTOR R0 = vAbsV;
4058 XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4059 R0 = _mm_mul_ps(R0,vConstants);
4060 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4061 R0 = _mm_add_ps(R0,vConstants);
4063 XMVECTOR R1 = vAbsV;
4064 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4065 R1 = _mm_mul_ps(R1,vConstants);
4066 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4067 R1 = _mm_add_ps(R1, vConstants);
4069 XMVECTOR R2 = vAbsV;
4070 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4071 R2 = _mm_mul_ps(R2,vConstants);
4072 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4073 R2 = _mm_add_ps(R2, vConstants);
4075 XMVECTOR R3 = vAbsV;
4076 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4077 R3 = _mm_mul_ps(R3,vConstants);
4078 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4079 R3 = _mm_add_ps(R3, vConstants);
4081 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4082 R0 = _mm_mul_ps(R0,vAbsV);
4083 R0 = _mm_add_ps(R0,vConstants);
4085 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4086 R1 = _mm_mul_ps(R1,vAbsV);
4087 R1 = _mm_add_ps(R1,vConstants);
4089 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4090 R2 = _mm_mul_ps(R2,vAbsV);
4091 R2 = _mm_add_ps(R2,vConstants);
4093 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4094 R3 = _mm_mul_ps(R3,vAbsV);
4095 R3 = _mm_add_ps(R3,vConstants);
4098 vConstants = _mm_mul_ps(V,V);
4099 vConstants = _mm_mul_ps(vConstants, vAbsV);
4101 R2 = _mm_mul_ps(R2,vConstants);
4102 R3 = _mm_mul_ps(R3,vConstants);
4103 // Merge the results
4104 R0 = _mm_add_ps(R0,R2);
4105 R1 = _mm_add_ps(R1,R3);
4107 R0 = _mm_mul_ps(R0,V);
4108 // vConstants = V-(V^2 retaining sign)
4109 vConstants = _mm_mul_ps(vAbsV, V);
4110 vConstants = _mm_sub_ps(V,vConstants);
4111 R1 = _mm_mul_ps(R1,vConstants);
4112 vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
4113 // Do NOT use rsqrt/mul. This needs the precision
4114 vConstants = _mm_sqrt_ps(vConstants);
4115 R1 = _mm_div_ps(R1,vConstants);
4116 R0 = _mm_add_ps(R0,R1);
4118 #else // _XM_VMX128_INTRINSICS_
4119 #endif // _XM_VMX128_INTRINSICS_
4122 //------------------------------------------------------------------------------
4124 XMINLINE XMVECTOR XMVectorACos
4129 #if defined(_XM_NO_INTRINSICS_)
4131 XMVECTOR V2, V3, AbsV;
4132 XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
4133 XMVECTOR R0, R1, R2, R3, R4;
4134 XMVECTOR OneMinusAbsV;
4137 static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4139 // acos(V) = PI / 2 - asin(V)
4141 AbsV = XMVectorAbs(V);
4143 V2 = XMVectorMultiply(V, V);
4144 V3 = XMVectorMultiply(V2, AbsV);
4146 R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
4148 OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
4149 Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
4151 C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
4152 C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
4153 C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
4154 C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
4156 C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
4157 C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
4158 C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
4159 C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
4161 C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
4162 C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
4163 C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
4164 C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
4166 R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
4167 R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
4168 R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
4169 R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
4171 R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
4172 R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
4173 R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
4174 R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
4176 R0 = XMVectorMultiplyAdd(R2, V3, R0);
4177 R1 = XMVectorMultiplyAdd(R3, V3, R1);
4179 R0 = XMVectorMultiply(V, R0);
4180 R1 = XMVectorMultiply(R4, R1);
4182 Result = XMVectorMultiplyAdd(R1, Rsq, R0);
4184 Result = XMVectorSubtract(g_XMHalfPi.v, Result);
4188 #elif defined(_XM_SSE_INTRINSICS_)
4189 static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4190 // Uses only 6 registers for good code on x86 targets
4191 // acos(V) = PI / 2 - asin(V)
4193 XMVECTOR vAbsV = _mm_setzero_ps();
4194 vAbsV = _mm_sub_ps(vAbsV,V);
4195 vAbsV = _mm_max_ps(vAbsV,V);
4196 // Perform the series in precision groups to
4197 // retain precision across 20 bits. (3 bits of imprecision due to operations)
4198 XMVECTOR R0 = vAbsV;
4199 XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4200 R0 = _mm_mul_ps(R0,vConstants);
4201 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4202 R0 = _mm_add_ps(R0,vConstants);
4203 R0 = _mm_mul_ps(R0,vAbsV);
4204 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4205 R0 = _mm_add_ps(R0,vConstants);
4207 XMVECTOR R1 = vAbsV;
4208 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4209 R1 = _mm_mul_ps(R1,vConstants);
4210 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4211 R1 = _mm_add_ps(R1,vConstants);
4212 R1 = _mm_mul_ps(R1, vAbsV);
4213 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4214 R1 = _mm_add_ps(R1,vConstants);
4216 XMVECTOR R2 = vAbsV;
4217 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4218 R2 = _mm_mul_ps(R2,vConstants);
4219 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4220 R2 = _mm_add_ps(R2,vConstants);
4221 R2 = _mm_mul_ps(R2, vAbsV);
4222 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4223 R2 = _mm_add_ps(R2,vConstants);
4225 XMVECTOR R3 = vAbsV;
4226 vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4227 R3 = _mm_mul_ps(R3,vConstants);
4228 vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4229 R3 = _mm_add_ps(R3,vConstants);
4230 R3 = _mm_mul_ps(R3, vAbsV);
4231 vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4232 R3 = _mm_add_ps(R3,vConstants);
4235 vConstants = _mm_mul_ps(V,V);
4236 vConstants = _mm_mul_ps(vConstants,vAbsV);
4237 R2 = _mm_mul_ps(R2,vConstants);
4238 R3 = _mm_mul_ps(R3,vConstants);
4239 // Add the pair of values together here to retain
4240 // as much precision as possible
4241 R0 = _mm_add_ps(R0,R2);
4242 R1 = _mm_add_ps(R1,R3);
4244 R0 = _mm_mul_ps(R0,V);
4245 // vConstants = V-(V*abs(V))
4246 vConstants = _mm_mul_ps(V,vAbsV);
4247 vConstants = _mm_sub_ps(V,vConstants);
4248 R1 = _mm_mul_ps(R1,vConstants);
4249 // Episilon exists to allow 1.0 as an answer
4250 vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV);
4251 // Use sqrt instead of rsqrt for precision
4252 vConstants = _mm_sqrt_ps(vConstants);
4253 R1 = _mm_div_ps(R1,vConstants);
4254 R1 = _mm_add_ps(R1,R0);
4255 vConstants = _mm_sub_ps(g_XMHalfPi,R1);
4257 #else // _XM_VMX128_INTRINSICS_
4258 #endif // _XM_VMX128_INTRINSICS_
4261 //------------------------------------------------------------------------------
4263 XMINLINE XMVECTOR XMVectorATan
4268 #if defined(_XM_NO_INTRINSICS_)
4270 // Cody and Waite algorithm to compute inverse tangent.
4273 XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
4274 XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
4275 XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
4277 XMVECTOR NegativeHalfPi;
4278 XMVECTOR Angle1, Angle2;
4279 XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
4280 XMVECTOR NegativeResult, Result;
4281 XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
4282 static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4283 static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4284 static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4285 static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4287 Zero = XMVectorZero();
4289 P0 = XMVectorSplatX(ATanConstants0);
4290 P1 = XMVectorSplatY(ATanConstants0);
4291 P2 = XMVectorSplatZ(ATanConstants0);
4292 P3 = XMVectorSplatW(ATanConstants0);
4294 Q0 = XMVectorSplatX(ATanConstants1);
4295 Q1 = XMVectorSplatY(ATanConstants1);
4296 Q2 = XMVectorSplatZ(ATanConstants1);
4297 Q3 = XMVectorSplatW(ATanConstants1);
4299 Sqrt3 = XMVectorSplatX(ATanConstants2);
4300 Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
4301 TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
4302 Epsilon = XMVectorSplatW(ATanConstants2);
4304 HalfPi = XMVectorSplatX(ATanConstants3);
4305 OneThirdPi = XMVectorSplatY(ATanConstants3);
4306 OneSixthPi = XMVectorSplatZ(ATanConstants3);
4307 MaxV = XMVectorSplatW(ATanConstants3);
4309 VF = XMVectorAbs(V);
4310 ReciprocalF = XMVectorReciprocal(VF);
4312 F_GT_One = XMVectorGreater(VF, g_XMOne.v);
4314 VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4315 Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4316 Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
4318 F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
4320 FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
4321 FA = XMVectorAdd(FA, g_XMNegativeOne.v);
4322 FB = XMVectorAdd(VF, Sqrt3);
4323 FB = XMVectorReciprocal(FB);
4324 FA = XMVectorMultiply(FA, FB);
4326 VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
4327 Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
4329 AbsF = XMVectorAbs(VF);
4330 AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
4332 G = XMVectorMultiply(VF, VF);
4334 D = XMVectorAdd(G, Q3);
4335 D = XMVectorMultiplyAdd(D, G, Q2);
4336 D = XMVectorMultiplyAdd(D, G, Q1);
4337 D = XMVectorMultiplyAdd(D, G, Q0);
4338 D = XMVectorReciprocal(D);
4340 N = XMVectorMultiplyAdd(P3, G, P2);
4341 N = XMVectorMultiplyAdd(N, G, P1);
4342 N = XMVectorMultiplyAdd(N, G, P0);
4343 N = XMVectorMultiply(N, G);
4344 Result = XMVectorMultiply(N, D);
4346 Result = XMVectorMultiplyAdd(Result, VF, VF);
4348 Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
4350 NegativeResult = XMVectorNegate(Result);
4351 Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
4353 Result = XMVectorAdd(Result, Angle1);
4355 V_LT_Zero = XMVectorLess(V, Zero);
4356 NegativeResult = XMVectorNegate(Result);
4357 Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
4359 MinV = XMVectorNegate(MaxV);
4360 NegativeHalfPi = XMVectorNegate(HalfPi);
4361 V_GT_MaxV = XMVectorGreater(V, MaxV);
4362 V_LT_MinV = XMVectorLess(V, MinV);
4363 Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
4364 Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
4368 #elif defined(_XM_SSE_INTRINSICS_)
4369 static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4370 static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4371 static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4372 static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4374 XMVECTOR VF = XMVectorAbs(V);
4375 XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
4376 XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
4377 VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4378 XMVECTOR Zero = XMVectorZero();
4379 XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
4380 XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4382 XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
4384 XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
4385 Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
4388 XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
4389 FA = _mm_mul_ps(FA,VF);
4390 FA = _mm_add_ps(FA,VF);
4391 FA = _mm_add_ps(FA,g_XMNegativeOne);
4393 vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
4394 vConstants = _mm_add_ps(vConstants,VF);
4395 FA = _mm_div_ps(FA,vConstants);
4398 vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
4400 vConstants = _mm_cmpgt_ps(VF,vConstants);
4401 VF = XMVectorSelect(VF, FA, vConstants);
4402 Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
4404 XMVECTOR AbsF = XMVectorAbs(VF);
4406 XMVECTOR G = _mm_mul_ps(VF,VF);
4407 XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
4408 D = _mm_add_ps(D,G);
4409 D = _mm_mul_ps(D,G);
4410 vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
4411 D = _mm_add_ps(D,vConstants);
4412 D = _mm_mul_ps(D,G);
4413 vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
4414 D = _mm_add_ps(D,vConstants);
4415 D = _mm_mul_ps(D,G);
4416 vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
4417 D = _mm_add_ps(D,vConstants);
4419 XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
4420 N = _mm_mul_ps(N,G);
4421 vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
4422 N = _mm_add_ps(N,vConstants);
4423 N = _mm_mul_ps(N,G);
4424 vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
4425 N = _mm_add_ps(N,vConstants);
4426 N = _mm_mul_ps(N,G);
4427 vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
4428 N = _mm_add_ps(N,vConstants);
4429 N = _mm_mul_ps(N,G);
4430 XMVECTOR Result = _mm_div_ps(N,D);
4432 Result = _mm_mul_ps(Result,VF);
4433 Result = _mm_add_ps(Result,VF);
4435 vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
4436 vConstants = _mm_cmpge_ps(vConstants,AbsF);
4437 Result = XMVectorSelect(Result,VF,vConstants);
4439 XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4440 Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
4441 Result = _mm_add_ps(Result,Angle1);
4443 Zero = _mm_cmpge_ps(Zero,V);
4444 NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4445 Result = XMVectorSelect(Result,NegativeResult,Zero);
4447 XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
4448 XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
4450 HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
4451 MaxV = _mm_cmple_ps(MaxV,V);
4452 MinV = _mm_cmpge_ps(MinV,V);
4453 Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
4455 Result = XMVectorSelect(Result,HalfPi,MinV);
4457 #else // _XM_VMX128_INTRINSICS_
4458 #endif // _XM_VMX128_INTRINSICS_
4461 //------------------------------------------------------------------------------
4463 XMINLINE XMVECTOR XMVectorATan2
4469 #if defined(_XM_NO_INTRINSICS_)
4471 // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
4473 // Y == 0 and X is Negative -> Pi with the sign of Y
4474 // y == 0 and x is positive -> 0 with the sign of y
4475 // Y != 0 and X == 0 -> Pi / 2 with the sign of Y
4476 // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y)
4477 // X == -Infinity and Finite Y -> Pi with the sign of Y
4478 // X == +Infinity and Finite Y -> 0 with the sign of Y
4479 // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y
4480 // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
4481 // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
4483 XMVECTOR Reciprocal;
4486 XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
4487 XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
4488 XMVECTOR ATanResultValid;
4489 XMVECTOR R0, R1, R2, R3, R4, R5;
4492 static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4494 Zero = XMVectorZero();
4495 ATanResultValid = XMVectorTrueInt();
4497 Pi = XMVectorSplatX(ATan2Constants);
4498 PiOverTwo = XMVectorSplatY(ATan2Constants);
4499 PiOverFour = XMVectorSplatZ(ATan2Constants);
4500 ThreePiOverFour = XMVectorSplatW(ATan2Constants);
4502 YEqualsZero = XMVectorEqual(Y, Zero);
4503 XEqualsZero = XMVectorEqual(X, Zero);
4504 XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
4505 XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
4506 YEqualsInfinity = XMVectorIsInfinite(Y);
4507 XEqualsInfinity = XMVectorIsInfinite(X);
4509 YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
4510 Pi = XMVectorOrInt(Pi, YSign);
4511 PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
4512 PiOverFour = XMVectorOrInt(PiOverFour, YSign);
4513 ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
4515 R1 = XMVectorSelect(Pi, YSign, XIsPositive);
4516 R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
4517 R3 = XMVectorSelect(R2, R1, YEqualsZero);
4518 R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4519 R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
4520 Result = XMVectorSelect(R3, R5, YEqualsInfinity);
4521 ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
4523 Reciprocal = XMVectorReciprocal(X);
4524 V = XMVectorMultiply(Y, Reciprocal);
4525 R0 = XMVectorATan(V);
4527 R1 = XMVectorSelect( Pi, Zero, XIsPositive );
4528 R2 = XMVectorAdd(R0, R1);
4530 Result = XMVectorSelect(Result, R2, ATanResultValid);
4534 #elif defined(_XM_SSE_INTRINSICS_)
4535 static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4537 // Mask if Y>0 && Y!=INF
4538 XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
4539 // Get the sign of (Y&0x80000000)
4540 XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
4541 // Get the sign bits of X
4542 XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
4543 // Change them to masks
4544 XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
4546 XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
4547 // Copy the sign of Y
4548 Pi = _mm_or_ps(Pi,YSign);
4549 XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
4551 XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
4552 // Get Pi/2 with with sign of Y
4553 XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
4554 PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
4555 XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
4557 vConstants = _mm_cmpeq_ps(Y,g_XMZero);
4558 R2 = XMVectorSelect(R2,R1,vConstants);
4559 // Get Pi/4 with sign of Y
4560 XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
4561 PiOverFour = _mm_or_ps(PiOverFour,YSign);
4562 // Get (Pi*3)/4 with sign of Y
4563 XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
4564 ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
4565 vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4566 XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
4567 vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
4569 XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
4570 vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
4571 // At this point, any entry that's zero will get the result
4572 // from XMVectorATan(), otherwise, return the failsafe value
4573 vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
4574 // Any entries not 0xFFFFFFFF, are considered precalculated
4575 XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
4576 // Let's do the ATan2 function
4577 vConstants = _mm_div_ps(Y,X);
4578 vConstants = XMVectorATan(vConstants);
4579 // Discard entries that have been declared void
4581 XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
4582 vConstants = _mm_add_ps( vConstants, R3 );
4584 vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
4586 #else // _XM_VMX128_INTRINSICS_
4587 #endif // _XM_VMX128_INTRINSICS_
4590 //------------------------------------------------------------------------------
4592 XMFINLINE XMVECTOR XMVectorSinEst
4597 #if defined(_XM_NO_INTRINSICS_)
4599 XMVECTOR V2, V3, V5, V7;
4600 XMVECTOR S1, S2, S3;
4603 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4604 V2 = XMVectorMultiply(V, V);
4605 V3 = XMVectorMultiply(V2, V);
4606 V5 = XMVectorMultiply(V3, V2);
4607 V7 = XMVectorMultiply(V5, V2);
4609 S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4610 S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4611 S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4613 Result = XMVectorMultiplyAdd(S1, V3, V);
4614 Result = XMVectorMultiplyAdd(S2, V5, Result);
4615 Result = XMVectorMultiplyAdd(S3, V7, Result);
4619 #elif defined(_XM_SSE_INTRINSICS_)
4620 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4621 XMVECTOR V2 = _mm_mul_ps(V,V);
4622 XMVECTOR V3 = _mm_mul_ps(V2,V);
4623 XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4624 vResult = _mm_mul_ps(vResult,V3);
4625 vResult = _mm_add_ps(vResult,V);
4626 XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4628 V3 = _mm_mul_ps(V3,V2);
4629 vConstants = _mm_mul_ps(vConstants,V3);
4630 vResult = _mm_add_ps(vResult,vConstants);
4631 vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4633 V3 = _mm_mul_ps(V3,V2);
4634 vConstants = _mm_mul_ps(vConstants,V3);
4635 vResult = _mm_add_ps(vResult,vConstants);
4637 #else // _XM_VMX128_INTRINSICS_
4638 #endif // _XM_VMX128_INTRINSICS_
4641 //------------------------------------------------------------------------------
4643 XMFINLINE XMVECTOR XMVectorCosEst
4648 #if defined(_XM_NO_INTRINSICS_)
4650 XMVECTOR V2, V4, V6;
4651 XMVECTOR C0, C1, C2, C3;
4654 V2 = XMVectorMultiply(V, V);
4655 V4 = XMVectorMultiply(V2, V2);
4656 V6 = XMVectorMultiply(V4, V2);
4658 C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4659 C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4660 C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4661 C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4663 Result = XMVectorMultiplyAdd(C1, V2, C0);
4664 Result = XMVectorMultiplyAdd(C2, V4, Result);
4665 Result = XMVectorMultiplyAdd(C3, V6, Result);
4669 #elif defined(_XM_SSE_INTRINSICS_)
4671 XMVECTOR V2 = _mm_mul_ps(V,V);
4672 XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4673 vResult = _mm_mul_ps(vResult,V2);
4674 XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4675 vResult = _mm_add_ps(vResult,vConstants);
4676 vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4678 XMVECTOR V4 = _mm_mul_ps(V2, V2);
4679 vConstants = _mm_mul_ps(vConstants,V4);
4680 vResult = _mm_add_ps(vResult,vConstants);
4681 vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4683 V4 = _mm_mul_ps(V4,V2);
4684 vConstants = _mm_mul_ps(vConstants,V4);
4685 vResult = _mm_add_ps(vResult,vConstants);
4687 #else // _XM_VMX128_INTRINSICS_
4688 #endif // _XM_VMX128_INTRINSICS_
4691 //------------------------------------------------------------------------------
4693 XMFINLINE VOID XMVectorSinCosEst
4700 #if defined(_XM_NO_INTRINSICS_)
4702 XMVECTOR V2, V3, V4, V5, V6, V7;
4703 XMVECTOR S1, S2, S3;
4704 XMVECTOR C0, C1, C2, C3;
4710 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4711 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4712 V2 = XMVectorMultiply(V, V);
4713 V3 = XMVectorMultiply(V2, V);
4714 V4 = XMVectorMultiply(V2, V2);
4715 V5 = XMVectorMultiply(V3, V2);
4716 V6 = XMVectorMultiply(V3, V3);
4717 V7 = XMVectorMultiply(V4, V3);
4719 S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4720 S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4721 S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4723 C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4724 C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4725 C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4726 C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4728 Sin = XMVectorMultiplyAdd(S1, V3, V);
4729 Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4730 Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4732 Cos = XMVectorMultiplyAdd(C1, V2, C0);
4733 Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4734 Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4739 #elif defined(_XM_SSE_INTRINSICS_)
4742 XMVECTOR V2, V3, V4, V5, V6, V7;
4743 XMVECTOR S1, S2, S3;
4744 XMVECTOR C0, C1, C2, C3;
4747 // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4748 // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4749 V2 = XMVectorMultiply(V, V);
4750 V3 = XMVectorMultiply(V2, V);
4751 V4 = XMVectorMultiply(V2, V2);
4752 V5 = XMVectorMultiply(V3, V2);
4753 V6 = XMVectorMultiply(V3, V3);
4754 V7 = XMVectorMultiply(V4, V3);
4756 S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4757 S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4758 S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4760 C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4761 C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4762 C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4763 C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4765 Sin = XMVectorMultiplyAdd(S1, V3, V);
4766 Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4767 Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4769 Cos = XMVectorMultiplyAdd(C1, V2, C0);
4770 Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4771 Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4775 #else // _XM_VMX128_INTRINSICS_
4776 #endif // _XM_VMX128_INTRINSICS_
4779 //------------------------------------------------------------------------------
4781 XMFINLINE XMVECTOR XMVectorTanEst
4786 #if defined(_XM_NO_INTRINSICS_)
4788 XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4789 XMVECTOR T0, T1, T2;
4794 OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
4796 V1 = XMVectorMultiply(V, OneOverPi);
4797 V1 = XMVectorRound(V1);
4799 V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
4801 T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
4802 T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
4803 T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
4805 V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4806 V2 = XMVectorMultiply(V1, V1);
4807 V1T0 = XMVectorMultiply(V1, T0);
4808 V1T1 = XMVectorMultiply(V1, T1);
4810 D = XMVectorReciprocalEst(V2T2);
4811 N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4813 Result = XMVectorMultiply(N, D);
4817 #elif defined(_XM_SSE_INTRINSICS_)
4818 XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4819 XMVECTOR T0, T1, T2;
4824 OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
4826 V1 = XMVectorMultiply(V, OneOverPi);
4827 V1 = XMVectorRound(V1);
4829 V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
4831 T0 = XMVectorSplatX(g_XMTanEstCoefficients);
4832 T1 = XMVectorSplatY(g_XMTanEstCoefficients);
4833 T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
4835 V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4836 V2 = XMVectorMultiply(V1, V1);
4837 V1T0 = XMVectorMultiply(V1, T0);
4838 V1T1 = XMVectorMultiply(V1, T1);
4840 D = XMVectorReciprocalEst(V2T2);
4841 N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4843 Result = XMVectorMultiply(N, D);
4846 #else // _XM_VMX128_INTRINSICS_
4847 #endif // _XM_VMX128_INTRINSICS_
4850 //------------------------------------------------------------------------------
4852 XMFINLINE XMVECTOR XMVectorSinHEst
4857 #if defined(_XM_NO_INTRINSICS_)
4862 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4864 V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
4865 V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
4867 E1 = XMVectorExpEst(V1);
4868 E2 = XMVectorExpEst(V2);
4870 Result = XMVectorSubtract(E1, E2);
4874 #elif defined(_XM_SSE_INTRINSICS_)
4878 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4880 V1 = _mm_mul_ps(V,Scale);
4881 V1 = _mm_add_ps(V1,g_XMNegativeOne);
4882 V2 = _mm_mul_ps(V,Scale);
4883 V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4884 E1 = XMVectorExpEst(V1);
4885 E2 = XMVectorExpEst(V2);
4886 Result = _mm_sub_ps(E1, E2);
4888 #else // _XM_VMX128_INTRINSICS_
4889 #endif // _XM_VMX128_INTRINSICS_
4892 //------------------------------------------------------------------------------
4894 XMFINLINE XMVECTOR XMVectorCosHEst
4899 #if defined(_XM_NO_INTRINSICS_)
4904 static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4906 V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
4907 V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
4909 E1 = XMVectorExpEst(V1);
4910 E2 = XMVectorExpEst(V2);
4912 Result = XMVectorAdd(E1, E2);
4916 #elif defined(_XM_SSE_INTRINSICS_)
4920 static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4922 V1 = _mm_mul_ps(V,Scale);
4923 V1 = _mm_add_ps(V1,g_XMNegativeOne);
4924 V2 = _mm_mul_ps(V, Scale);
4925 V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4926 E1 = XMVectorExpEst(V1);
4927 E2 = XMVectorExpEst(V2);
4928 Result = _mm_add_ps(E1, E2);
4930 #else // _XM_VMX128_INTRINSICS_
4931 #endif // _XM_VMX128_INTRINSICS_
4934 //------------------------------------------------------------------------------
4936 XMFINLINE XMVECTOR XMVectorTanHEst
4941 #if defined(_XM_NO_INTRINSICS_)
4945 static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4947 E = XMVectorMultiply(V, Scale);
4948 E = XMVectorExpEst(E);
4949 E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
4950 E = XMVectorReciprocalEst(E);
4952 Result = XMVectorSubtract(g_XMOne.v, E);
4956 #elif defined(_XM_SSE_INTRINSICS_)
4957 static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4959 XMVECTOR E = _mm_mul_ps(V, Scale);
4960 E = XMVectorExpEst(E);
4961 E = _mm_mul_ps(E,g_XMOneHalf);
4962 E = _mm_add_ps(E,g_XMOneHalf);
4963 E = XMVectorReciprocalEst(E);
4964 E = _mm_sub_ps(g_XMOne, E);
4966 #else // _XM_VMX128_INTRINSICS_
4967 #endif // _XM_VMX128_INTRINSICS_
4970 //------------------------------------------------------------------------------
4972 XMFINLINE XMVECTOR XMVectorASinEst
4977 #if defined(_XM_NO_INTRINSICS_)
4979 XMVECTOR AbsV, V2, VD, VC0, V2C3;
4980 XMVECTOR C0, C1, C2, C3;
4981 XMVECTOR D, Rsq, SqrtD;
4982 XMVECTOR OnePlusEps;
4985 AbsV = XMVectorAbs(V);
4987 OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
4989 C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
4990 C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
4991 C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
4992 C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
4994 D = XMVectorSubtract(OnePlusEps, AbsV);
4996 Rsq = XMVectorReciprocalSqrtEst(D);
4997 SqrtD = XMVectorMultiply(D, Rsq);
4999 V2 = XMVectorMultiply(V, AbsV);
5000 V2C3 = XMVectorMultiply(V2, C3);
5001 VD = XMVectorMultiply(D, AbsV);
5002 VC0 = XMVectorMultiply(V, C0);
5004 Result = XMVectorMultiply(V, C1);
5005 Result = XMVectorMultiplyAdd(V2, C2, Result);
5006 Result = XMVectorMultiplyAdd(V2C3, VD, Result);
5007 Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
5011 #elif defined(_XM_SSE_INTRINSICS_)
5013 XMVECTOR vAbsV = _mm_setzero_ps();
5014 vAbsV = _mm_sub_ps(vAbsV,V);
5015 vAbsV = _mm_max_ps(vAbsV,V);
5017 XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
5018 D = _mm_sub_ps(D,vAbsV);
5019 // Since this is an estimate, rqsrt is okay
5020 XMVECTOR vConstants = _mm_rsqrt_ps(D);
5021 XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
5022 // V2 = V^2 retaining sign
5023 XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
5024 D = _mm_mul_ps(D,vAbsV);
5026 XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
5027 vResult = _mm_mul_ps(vResult,V);
5028 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
5029 vConstants = _mm_mul_ps(vConstants,V2);
5030 vResult = _mm_add_ps(vResult,vConstants);
5032 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
5033 vConstants = _mm_mul_ps(vConstants,V2);
5034 vConstants = _mm_mul_ps(vConstants,D);
5035 vResult = _mm_add_ps(vResult,vConstants);
5037 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
5038 vConstants = _mm_mul_ps(vConstants,V);
5039 vConstants = _mm_mul_ps(vConstants,SqrtD);
5040 vResult = _mm_add_ps(vResult,vConstants);
5042 #else // _XM_VMX128_INTRINSICS_
5043 #endif // _XM_VMX128_INTRINSICS_
5046 //------------------------------------------------------------------------------
5048 XMFINLINE XMVECTOR XMVectorACosEst
5053 #if defined(_XM_NO_INTRINSICS_)
5055 XMVECTOR AbsV, V2, VD, VC0, V2C3;
5056 XMVECTOR C0, C1, C2, C3;
5057 XMVECTOR D, Rsq, SqrtD;
5058 XMVECTOR OnePlusEps, HalfPi;
5061 // acos(V) = PI / 2 - asin(V)
5063 AbsV = XMVectorAbs(V);
5065 OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
5066 HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
5068 C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
5069 C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
5070 C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
5071 C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
5073 D = XMVectorSubtract(OnePlusEps, AbsV);
5075 Rsq = XMVectorReciprocalSqrtEst(D);
5076 SqrtD = XMVectorMultiply(D, Rsq);
5078 V2 = XMVectorMultiply(V, AbsV);
5079 V2C3 = XMVectorMultiply(V2, C3);
5080 VD = XMVectorMultiply(D, AbsV);
5081 VC0 = XMVectorMultiply(V, C0);
5083 Result = XMVectorMultiply(V, C1);
5084 Result = XMVectorMultiplyAdd(V2, C2, Result);
5085 Result = XMVectorMultiplyAdd(V2C3, VD, Result);
5086 Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
5087 Result = XMVectorSubtract(HalfPi, Result);
5091 #elif defined(_XM_SSE_INTRINSICS_)
5092 // acos(V) = PI / 2 - asin(V)
5094 XMVECTOR vAbsV = _mm_setzero_ps();
5095 vAbsV = _mm_sub_ps(vAbsV,V);
5096 vAbsV = _mm_max_ps(vAbsV,V);
5098 XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
5099 D = _mm_sub_ps(D,vAbsV);
5100 // SqrtD = sqrt(D-abs(V)) estimated
5101 XMVECTOR vConstants = _mm_rsqrt_ps(D);
5102 XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
5103 // V2 = V^2 while retaining sign
5104 XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
5105 // Drop vAbsV here. D = (Const-abs(V))*abs(V)
5106 D = _mm_mul_ps(D, vAbsV);
5108 XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
5109 vResult = _mm_mul_ps(vResult,V);
5110 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
5111 vConstants = _mm_mul_ps(vConstants,V2);
5112 vResult = _mm_add_ps(vResult,vConstants);
5114 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
5115 vConstants = _mm_mul_ps(vConstants,V2);
5116 vConstants = _mm_mul_ps(vConstants,D);
5117 vResult = _mm_add_ps(vResult,vConstants);
5119 vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
5120 vConstants = _mm_mul_ps(vConstants,V);
5121 vConstants = _mm_mul_ps(vConstants,SqrtD);
5122 vResult = _mm_add_ps(vResult,vConstants);
5124 vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
5125 vResult = _mm_sub_ps(vConstants,vResult);
5127 #else // _XM_VMX128_INTRINSICS_
5128 #endif // _XM_VMX128_INTRINSICS_
5131 //------------------------------------------------------------------------------
5133 XMFINLINE XMVECTOR XMVectorATanEst
5138 #if defined(_XM_NO_INTRINSICS_)
5140 XMVECTOR AbsV, V2S2, N, D;
5141 XMVECTOR S0, S1, S2;
5145 S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
5146 S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
5147 S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
5148 HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
5150 AbsV = XMVectorAbs(V);
5152 V2S2 = XMVectorMultiplyAdd(V, V, S2);
5153 N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
5154 D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
5155 N = XMVectorMultiply(N, V);
5156 D = XMVectorReciprocalEst(D);
5158 Result = XMVectorMultiply(N, D);
5162 #elif defined(_XM_SSE_INTRINSICS_)
5164 XMVECTOR vAbsV = _mm_setzero_ps();
5165 vAbsV = _mm_sub_ps(vAbsV,V);
5166 vAbsV = _mm_max_ps(vAbsV,V);
5168 XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
5169 vResult = _mm_mul_ps(vResult,vAbsV);
5170 XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
5171 vResult = _mm_add_ps(vResult,vConstants);
5172 vResult = _mm_mul_ps(vResult,V);
5174 XMVECTOR D = _mm_mul_ps(V,V);
5175 vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
5176 D = _mm_add_ps(D,vConstants);
5177 vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
5178 vConstants = _mm_mul_ps(vConstants,vAbsV);
5179 D = _mm_add_ps(D,vConstants);
5180 vResult = _mm_div_ps(vResult,D);
5182 #else // _XM_VMX128_INTRINSICS_
5183 #endif // _XM_VMX128_INTRINSICS_
5186 //------------------------------------------------------------------------------
5188 XMFINLINE XMVECTOR XMVectorATan2Est
5194 #if defined(_XM_NO_INTRINSICS_)
5196 XMVECTOR Reciprocal;
5199 XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
5200 XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
5201 XMVECTOR ATanResultValid;
5202 XMVECTOR R0, R1, R2, R3, R4, R5;
5205 static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5207 Zero = XMVectorZero();
5208 ATanResultValid = XMVectorTrueInt();
5210 Pi = XMVectorSplatX(ATan2Constants);
5211 PiOverTwo = XMVectorSplatY(ATan2Constants);
5212 PiOverFour = XMVectorSplatZ(ATan2Constants);
5213 ThreePiOverFour = XMVectorSplatW(ATan2Constants);
5215 YEqualsZero = XMVectorEqual(Y, Zero);
5216 XEqualsZero = XMVectorEqual(X, Zero);
5217 XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
5218 XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
5219 YEqualsInfinity = XMVectorIsInfinite(Y);
5220 XEqualsInfinity = XMVectorIsInfinite(X);
5222 YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
5223 Pi = XMVectorOrInt(Pi, YSign);
5224 PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
5225 PiOverFour = XMVectorOrInt(PiOverFour, YSign);
5226 ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
5228 R1 = XMVectorSelect(Pi, YSign, XIsPositive);
5229 R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
5230 R3 = XMVectorSelect(R2, R1, YEqualsZero);
5231 R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5232 R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
5233 Result = XMVectorSelect(R3, R5, YEqualsInfinity);
5234 ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
5236 Reciprocal = XMVectorReciprocalEst(X);
5237 V = XMVectorMultiply(Y, Reciprocal);
5238 R0 = XMVectorATanEst(V);
5240 R1 = XMVectorSelect( Pi, Zero, XIsPositive );
5241 R2 = XMVectorAdd(R0, R1);
5243 Result = XMVectorSelect(Result, R2, ATanResultValid);
5247 #elif defined(_XM_SSE_INTRINSICS_)
5248 static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5250 // Mask if Y>0 && Y!=INF
5251 XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
5252 // Get the sign of (Y&0x80000000)
5253 XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
5254 // Get the sign bits of X
5255 XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
5256 // Change them to masks
5257 XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
5259 XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
5260 // Copy the sign of Y
5261 Pi = _mm_or_ps(Pi,YSign);
5262 XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
5264 XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
5265 // Get Pi/2 with with sign of Y
5266 XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
5267 PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
5268 XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
5270 vConstants = _mm_cmpeq_ps(Y,g_XMZero);
5271 R2 = XMVectorSelect(R2,R1,vConstants);
5272 // Get Pi/4 with sign of Y
5273 XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
5274 PiOverFour = _mm_or_ps(PiOverFour,YSign);
5275 // Get (Pi*3)/4 with sign of Y
5276 XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
5277 ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
5278 vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5279 XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
5280 vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
5282 XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
5283 vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
5284 // At this point, any entry that's zero will get the result
5285 // from XMVectorATan(), otherwise, return the failsafe value
5286 vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
5287 // Any entries not 0xFFFFFFFF, are considered precalculated
5288 XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
5289 // Let's do the ATan2 function
5290 XMVECTOR Reciprocal = _mm_rcp_ps(X);
5291 vConstants = _mm_mul_ps(Y, Reciprocal);
5292 vConstants = XMVectorATanEst(vConstants);
5293 // Discard entries that have been declared void
5295 XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
5296 vConstants = _mm_add_ps( vConstants, R3 );
5298 vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
5300 #else // _XM_VMX128_INTRINSICS_
5301 #endif // _XM_VMX128_INTRINSICS_
5304 //------------------------------------------------------------------------------
5306 XMFINLINE XMVECTOR XMVectorLerp
5313 #if defined(_XM_NO_INTRINSICS_)
5319 // V0 + t * (V1 - V0)
5320 Scale = XMVectorReplicate(t);
5321 Length = XMVectorSubtract(V1, V0);
5322 Result = XMVectorMultiplyAdd(Length, Scale, V0);
5326 #elif defined(_XM_SSE_INTRINSICS_)
5330 L = _mm_sub_ps( V1, V0 );
5332 S = _mm_set_ps1( t );
5334 Result = _mm_mul_ps( L, S );
5336 return _mm_add_ps( Result, V0 );
5337 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5338 #endif // _XM_VMX128_INTRINSICS_
5341 //------------------------------------------------------------------------------
5343 XMFINLINE XMVECTOR XMVectorLerpV
5350 #if defined(_XM_NO_INTRINSICS_)
5355 // V0 + T * (V1 - V0)
5356 Length = XMVectorSubtract(V1, V0);
5357 Result = XMVectorMultiplyAdd(Length, T, V0);
5361 #elif defined(_XM_SSE_INTRINSICS_)
5365 Length = _mm_sub_ps( V1, V0 );
5367 Result = _mm_mul_ps( Length, T );
5369 return _mm_add_ps( Result, V0 );
5370 #else // _XM_VMX128_INTRINSICS_
5371 #endif // _XM_VMX128_INTRINSICS_
5374 //------------------------------------------------------------------------------
5376 XMFINLINE XMVECTOR XMVectorHermite
5378 FXMVECTOR Position0,
5380 FXMVECTOR Position1,
5385 #if defined(_XM_NO_INTRINSICS_)
5395 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5396 // (t^3 - 2 * t^2 + t) * Tangent0 +
5397 // (-2 * t^3 + 3 * t^2) * Position1 +
5398 // (t^3 - t^2) * Tangent1
5402 P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
5403 T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
5404 P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
5405 T1 = XMVectorReplicate(t3 - t2);
5407 Result = XMVectorMultiply(P0, Position0);
5408 Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5409 Result = XMVectorMultiplyAdd(P1, Position1, Result);
5410 Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5414 #elif defined(_XM_SSE_INTRINSICS_)
5418 XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
5419 XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
5420 XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
5421 XMVECTOR T1 = _mm_set_ps1(t3 - t2);
5423 XMVECTOR vResult = _mm_mul_ps(P0, Position0);
5424 XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
5425 vResult = _mm_add_ps(vResult,vTemp);
5426 vTemp = _mm_mul_ps(P1, Position1);
5427 vResult = _mm_add_ps(vResult,vTemp);
5428 vTemp = _mm_mul_ps(T1, Tangent1);
5429 vResult = _mm_add_ps(vResult,vTemp);
5431 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5432 #endif // _XM_VMX128_INTRINSICS_
5435 //------------------------------------------------------------------------------
5437 XMFINLINE XMVECTOR XMVectorHermiteV
5439 FXMVECTOR Position0,
5441 FXMVECTOR Position1,
5446 #if defined(_XM_NO_INTRINSICS_)
5456 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5457 // (t^3 - 2 * t^2 + t) * Tangent0 +
5458 // (-2 * t^3 + 3 * t^2) * Position1 +
5459 // (t^3 - t^2) * Tangent1
5460 T2 = XMVectorMultiply(T, T);
5461 T3 = XMVectorMultiply(T , T2);
5463 P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
5464 T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
5465 P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
5466 T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
5468 Result = XMVectorMultiply(P0, Position0);
5469 Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5470 Result = XMVectorMultiplyAdd(P1, Position1, Result);
5471 Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5475 #elif defined(_XM_SSE_INTRINSICS_)
5476 static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
5477 static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
5479 // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5480 // (t^3 - 2 * t^2 + t) * Tangent0 +
5481 // (-2 * t^3 + 3 * t^2) * Position1 +
5482 // (t^3 - t^2) * Tangent1
5483 XMVECTOR T2 = _mm_mul_ps(T,T);
5484 XMVECTOR T3 = _mm_mul_ps(T,T2);
5485 // Mul by the constants against t^2
5486 T2 = _mm_mul_ps(T2,CatMulT2);
5487 // Mul by the constants against t^3
5488 T3 = _mm_mul_ps(T3,CatMulT3);
5489 // T3 now has the pre-result.
5490 T3 = _mm_add_ps(T3,T2);
5491 // I need to add t.y only
5492 T2 = _mm_and_ps(T,g_XMMaskY);
5493 T3 = _mm_add_ps(T3,T2);
5495 T3 = _mm_add_ps(T3,g_XMIdentityR0);
5496 // Now, I have the constants created
5497 // Mul the x constant to Position0
5498 XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
5499 vResult = _mm_mul_ps(vResult,Position0);
5500 // Mul the y constant to Tangent0
5501 T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
5502 T2 = _mm_mul_ps(T2,Tangent0);
5503 vResult = _mm_add_ps(vResult,T2);
5504 // Mul the z constant to Position1
5505 T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
5506 T2 = _mm_mul_ps(T2,Position1);
5507 vResult = _mm_add_ps(vResult,T2);
5508 // Mul the w constant to Tangent1
5509 T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
5510 T3 = _mm_mul_ps(T3,Tangent1);
5511 vResult = _mm_add_ps(vResult,T3);
5513 #else // _XM_VMX128_INTRINSICS_
5514 #endif // _XM_VMX128_INTRINSICS_
5517 //------------------------------------------------------------------------------
5519 XMFINLINE XMVECTOR XMVectorCatmullRom
5521 FXMVECTOR Position0,
5522 FXMVECTOR Position1,
5523 FXMVECTOR Position2,
5524 CXMVECTOR Position3,
5528 #if defined(_XM_NO_INTRINSICS_)
5538 // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
5539 // (3 * t^3 - 5 * t^2 + 2) * Position1 +
5540 // (-3 * t^3 + 4 * t^2 + t) * Position2 +
5541 // (t^3 - t^2) * Position3) * 0.5
5545 P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
5546 P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5547 P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5548 P3 = XMVectorReplicate((t3 - t2) * 0.5f);
5550 Result = XMVectorMultiply(P0, Position0);
5551 Result = XMVectorMultiplyAdd(P1, Position1, Result);
5552 Result = XMVectorMultiplyAdd(P2, Position2, Result);
5553 Result = XMVectorMultiplyAdd(P3, Position3, Result);
5557 #elif defined(_XM_SSE_INTRINSICS_)
5561 XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
5562 XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5563 XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5564 XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
5566 P0 = _mm_mul_ps(P0, Position0);
5567 P1 = _mm_mul_ps(P1, Position1);
5568 P2 = _mm_mul_ps(P2, Position2);
5569 P3 = _mm_mul_ps(P3, Position3);
5570 P0 = _mm_add_ps(P0,P1);
5571 P2 = _mm_add_ps(P2,P3);
5572 P0 = _mm_add_ps(P0,P2);
5574 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5575 #endif // _XM_VMX128_INTRINSICS_
5578 //------------------------------------------------------------------------------
5580 XMFINLINE XMVECTOR XMVectorCatmullRomV
5582 FXMVECTOR Position0,
5583 FXMVECTOR Position1,
5584 FXMVECTOR Position2,
5585 CXMVECTOR Position3,
5589 #if defined(_XM_NO_INTRINSICS_)
5590 float fx = T.vector4_f32[0];
5591 float fy = T.vector4_f32[1];
5592 float fz = T.vector4_f32[2];
5593 float fw = T.vector4_f32[3];
5594 XMVECTOR vResult = {
5595 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+
5596 (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+
5597 (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+
5598 (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]),
5599 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+
5600 (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+
5601 (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+
5602 (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]),
5603 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+
5604 (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+
5605 (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+
5606 (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]),
5607 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+
5608 (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+
5609 (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+
5610 (fw*fw*fw-fw*fw)*Position3.vector4_f32[3])
5613 #elif defined(_XM_SSE_INTRINSICS_)
5614 static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
5615 static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
5616 static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
5617 static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
5618 // Cache T^2 and T^3
5619 XMVECTOR T2 = _mm_mul_ps(T,T);
5620 XMVECTOR T3 = _mm_mul_ps(T,T2);
5621 // Perform the Position0 term
5622 XMVECTOR vResult = _mm_add_ps(T2,T2);
5623 vResult = _mm_sub_ps(vResult,T);
5624 vResult = _mm_sub_ps(vResult,T3);
5625 vResult = _mm_mul_ps(vResult,Position0);
5626 // Perform the Position1 term and add
5627 XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
5628 XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
5629 vTemp = _mm_sub_ps(vTemp,vTemp2);
5630 vTemp = _mm_add_ps(vTemp,Catmul2);
5631 vTemp = _mm_mul_ps(vTemp,Position1);
5632 vResult = _mm_add_ps(vResult,vTemp);
5633 // Perform the Position2 term and add
5634 vTemp = _mm_mul_ps(T2,Catmul4);
5635 vTemp2 = _mm_mul_ps(T3,Catmul3);
5636 vTemp = _mm_sub_ps(vTemp,vTemp2);
5637 vTemp = _mm_add_ps(vTemp,T);
5638 vTemp = _mm_mul_ps(vTemp,Position2);
5639 vResult = _mm_add_ps(vResult,vTemp);
5640 // Position3 is the last term
5641 T3 = _mm_sub_ps(T3,T2);
5642 T3 = _mm_mul_ps(T3,Position3);
5643 vResult = _mm_add_ps(vResult,T3);
5644 // Multiply by 0.5f and exit
5645 vResult = _mm_mul_ps(vResult,g_XMOneHalf);
5647 #else // _XM_VMX128_INTRINSICS_
5648 #endif // _XM_VMX128_INTRINSICS_
5651 //------------------------------------------------------------------------------
5653 XMFINLINE XMVECTOR XMVectorBaryCentric
5655 FXMVECTOR Position0,
5656 FXMVECTOR Position1,
5657 FXMVECTOR Position2,
5662 #if defined(_XM_NO_INTRINSICS_)
5664 // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5671 P10 = XMVectorSubtract(Position1, Position0);
5672 ScaleF = XMVectorReplicate(f);
5674 P20 = XMVectorSubtract(Position2, Position0);
5675 ScaleG = XMVectorReplicate(g);
5677 Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
5678 Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
5682 #elif defined(_XM_SSE_INTRINSICS_)
5683 XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5684 XMVECTOR SF = _mm_set_ps1(f);
5685 XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5686 XMVECTOR SG = _mm_set_ps1(g);
5687 R1 = _mm_mul_ps(R1,SF);
5688 R2 = _mm_mul_ps(R2,SG);
5689 R1 = _mm_add_ps(R1,Position0);
5690 R1 = _mm_add_ps(R1,R2);
5692 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5693 #endif // _XM_VMX128_INTRINSICS_
5696 //------------------------------------------------------------------------------
5698 XMFINLINE XMVECTOR XMVectorBaryCentricV
5700 FXMVECTOR Position0,
5701 FXMVECTOR Position1,
5702 FXMVECTOR Position2,
5707 #if defined(_XM_NO_INTRINSICS_)
5709 // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5714 P10 = XMVectorSubtract(Position1, Position0);
5715 P20 = XMVectorSubtract(Position2, Position0);
5717 Result = XMVectorMultiplyAdd(P10, F, Position0);
5718 Result = XMVectorMultiplyAdd(P20, G, Result);
5722 #elif defined(_XM_SSE_INTRINSICS_)
5723 XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5724 XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5725 R1 = _mm_mul_ps(R1,F);
5726 R2 = _mm_mul_ps(R2,G);
5727 R1 = _mm_add_ps(R1,Position0);
5728 R1 = _mm_add_ps(R1,R2);
5730 #else // _XM_VMX128_INTRINSICS_
5731 #endif // _XM_VMX128_INTRINSICS_
5734 /****************************************************************************
5738 ****************************************************************************/
5740 //------------------------------------------------------------------------------
5741 // Comparison operations
5742 //------------------------------------------------------------------------------
5744 //------------------------------------------------------------------------------
5746 XMFINLINE BOOL XMVector2Equal
5752 #if defined(_XM_NO_INTRINSICS_)
5753 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
5754 #elif defined(_XM_SSE_INTRINSICS_)
5755 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5756 // z and w are don't care
5757 return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5758 #else // _XM_VMX128_INTRINSICS_
5759 return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
5764 //------------------------------------------------------------------------------
5766 XMFINLINE UINT XMVector2EqualR
5772 #if defined(_XM_NO_INTRINSICS_)
5776 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
5777 (V1.vector4_f32[1] == V2.vector4_f32[1]))
5779 CR = XM_CRMASK_CR6TRUE;
5781 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
5782 (V1.vector4_f32[1] != V2.vector4_f32[1]))
5784 CR = XM_CRMASK_CR6FALSE;
5787 #elif defined(_XM_SSE_INTRINSICS_)
5788 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5789 // z and w are don't care
5790 int iTest = _mm_movemask_ps(vTemp)&3;
5794 CR = XM_CRMASK_CR6TRUE;
5798 CR = XM_CRMASK_CR6FALSE;
5801 #else // _XM_VMX128_INTRINSICS_
5802 #endif // _XM_VMX128_INTRINSICS_
5805 //------------------------------------------------------------------------------
5807 XMFINLINE BOOL XMVector2EqualInt
5813 #if defined(_XM_NO_INTRINSICS_)
5814 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
5815 #elif defined(_XM_SSE_INTRINSICS_)
5816 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5817 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
5818 #else // _XM_VMX128_INTRINSICS_
5819 return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
5823 //------------------------------------------------------------------------------
5825 XMFINLINE UINT XMVector2EqualIntR
5831 #if defined(_XM_NO_INTRINSICS_)
5834 if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
5835 (V1.vector4_u32[1] == V2.vector4_u32[1]))
5837 CR = XM_CRMASK_CR6TRUE;
5839 else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
5840 (V1.vector4_u32[1] != V2.vector4_u32[1]))
5842 CR = XM_CRMASK_CR6FALSE;
5846 #elif defined(_XM_SSE_INTRINSICS_)
5847 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5848 int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
5852 CR = XM_CRMASK_CR6TRUE;
5856 CR = XM_CRMASK_CR6FALSE;
5859 #else // _XM_VMX128_INTRINSICS_
5860 #endif // _XM_VMX128_INTRINSICS_
5863 //------------------------------------------------------------------------------
5865 XMFINLINE BOOL XMVector2NearEqual
5872 #if defined(_XM_NO_INTRINSICS_)
5874 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
5875 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
5876 return ((dx <= Epsilon.vector4_f32[0]) &&
5877 (dy <= Epsilon.vector4_f32[1]));
5878 #elif defined(_XM_SSE_INTRINSICS_)
5879 // Get the difference
5880 XMVECTOR vDelta = _mm_sub_ps(V1,V2);
5881 // Get the absolute value of the difference
5882 XMVECTOR vTemp = _mm_setzero_ps();
5883 vTemp = _mm_sub_ps(vTemp,vDelta);
5884 vTemp = _mm_max_ps(vTemp,vDelta);
5885 vTemp = _mm_cmple_ps(vTemp,Epsilon);
5886 // z and w are don't care
5887 return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
5888 #else // _XM_VMX128_INTRINSICS_
5889 #endif // _XM_VMX128_INTRINSICS_
5892 //------------------------------------------------------------------------------
5894 XMFINLINE BOOL XMVector2NotEqual
5900 #if defined(_XM_NO_INTRINSICS_)
5901 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
5902 #elif defined(_XM_SSE_INTRINSICS_)
5903 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5904 // z and w are don't care
5905 return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
5906 #else // _XM_VMX128_INTRINSICS_
5907 return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
5911 //------------------------------------------------------------------------------
5913 XMFINLINE BOOL XMVector2NotEqualInt
5919 #if defined(_XM_NO_INTRINSICS_)
5920 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
5921 #elif defined(_XM_SSE_INTRINSICS_)
5922 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5923 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
5924 #else // _XM_VMX128_INTRINSICS_
5925 return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
5929 //------------------------------------------------------------------------------
5931 XMFINLINE BOOL XMVector2Greater
5937 #if defined(_XM_NO_INTRINSICS_)
5938 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
5940 #elif defined(_XM_SSE_INTRINSICS_)
5941 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5942 // z and w are don't care
5943 return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5944 #else // _XM_VMX128_INTRINSICS_
5945 return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
5949 //------------------------------------------------------------------------------
5951 XMFINLINE UINT XMVector2GreaterR
5957 #if defined(_XM_NO_INTRINSICS_)
5960 if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
5961 (V1.vector4_f32[1] > V2.vector4_f32[1]))
5963 CR = XM_CRMASK_CR6TRUE;
5965 else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
5966 (V1.vector4_f32[1] <= V2.vector4_f32[1]))
5968 CR = XM_CRMASK_CR6FALSE;
5971 #elif defined(_XM_SSE_INTRINSICS_)
5972 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5973 int iTest = _mm_movemask_ps(vTemp)&3;
5977 CR = XM_CRMASK_CR6TRUE;
5981 CR = XM_CRMASK_CR6FALSE;
5984 #else // _XM_VMX128_INTRINSICS_
5985 #endif // _XM_VMX128_INTRINSICS_
5988 //------------------------------------------------------------------------------
5990 XMFINLINE BOOL XMVector2GreaterOrEqual
5996 #if defined(_XM_NO_INTRINSICS_)
5997 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
5998 #elif defined(_XM_SSE_INTRINSICS_)
5999 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
6000 return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6001 #else // _XM_VMX128_INTRINSICS_
6002 return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
6006 //------------------------------------------------------------------------------
6008 XMFINLINE UINT XMVector2GreaterOrEqualR
6014 #if defined(_XM_NO_INTRINSICS_)
6016 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
6017 (V1.vector4_f32[1] >= V2.vector4_f32[1]))
6019 CR = XM_CRMASK_CR6TRUE;
6021 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
6022 (V1.vector4_f32[1] < V2.vector4_f32[1]))
6024 CR = XM_CRMASK_CR6FALSE;
6028 #elif defined(_XM_SSE_INTRINSICS_)
6029 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
6030 int iTest = _mm_movemask_ps(vTemp)&3;
6034 CR = XM_CRMASK_CR6TRUE;
6038 CR = XM_CRMASK_CR6FALSE;
6041 #else // _XM_VMX128_INTRINSICS_
6042 #endif // _XM_VMX128_INTRINSICS_
6045 //------------------------------------------------------------------------------
6047 XMFINLINE BOOL XMVector2Less
6053 #if defined(_XM_NO_INTRINSICS_)
6054 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
6055 #elif defined(_XM_SSE_INTRINSICS_)
6056 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
6057 return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6058 #else // _XM_VMX128_INTRINSICS_
6059 return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
6063 //------------------------------------------------------------------------------
6065 XMFINLINE BOOL XMVector2LessOrEqual
6071 #if defined(_XM_NO_INTRINSICS_)
6072 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
6073 #elif defined(_XM_SSE_INTRINSICS_)
6074 XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
6075 return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6076 #else // _XM_VMX128_INTRINSICS_
6077 return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
6081 //------------------------------------------------------------------------------
6083 XMFINLINE BOOL XMVector2InBounds
6089 #if defined(_XM_NO_INTRINSICS_)
6090 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
6091 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
6092 #elif defined(_XM_SSE_INTRINSICS_)
6093 // Test if less than or equal
6094 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6095 // Negate the bounds
6096 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6097 // Test if greater or equal (Reversed)
6098 vTemp2 = _mm_cmple_ps(vTemp2,V);
6100 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6101 // x and y in bounds? (z and w are don't care)
6102 return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
6103 #else // _XM_VMX128_INTRINSICS_
6104 return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
6108 //------------------------------------------------------------------------------
6110 XMFINLINE UINT XMVector2InBoundsR
6116 #if defined(_XM_NO_INTRINSICS_)
6118 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
6119 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]))
6121 CR = XM_CRMASK_CR6BOUNDS;
6125 #elif defined(_XM_SSE_INTRINSICS_)
6126 // Test if less than or equal
6127 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6128 // Negate the bounds
6129 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6130 // Test if greater or equal (Reversed)
6131 vTemp2 = _mm_cmple_ps(vTemp2,V);
6133 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6134 // x and y in bounds? (z and w are don't care)
6135 return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
6136 #else // _XM_VMX128_INTRINSICS_
6137 #endif // _XM_VMX128_INTRINSICS_
6140 //------------------------------------------------------------------------------
6142 XMFINLINE BOOL XMVector2IsNaN
6147 #if defined(_XM_NO_INTRINSICS_)
6148 return (XMISNAN(V.vector4_f32[0]) ||
6149 XMISNAN(V.vector4_f32[1]));
6150 #elif defined(_XM_SSE_INTRINSICS_)
6151 // Mask off the exponent
6152 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
6153 // Mask off the mantissa
6154 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
6155 // Are any of the exponents == 0x7F800000?
6156 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
6157 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
6158 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
6159 // Perform a not on the NaN test to be true on NON-zero mantissas
6160 vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
6161 // If x or y are NaN, the signs are true after the merge above
6162 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
6163 #else // _XM_VMX128_INTRINSICS_
6164 #endif // _XM_VMX128_INTRINSICS_
6167 //------------------------------------------------------------------------------
6169 XMFINLINE BOOL XMVector2IsInfinite
6174 #if defined(_XM_NO_INTRINSICS_)
6176 return (XMISINF(V.vector4_f32[0]) ||
6177 XMISINF(V.vector4_f32[1]));
6178 #elif defined(_XM_SSE_INTRINSICS_)
6179 // Mask off the sign bit
6180 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
6181 // Compare to infinity
6182 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
6183 // If x or z are infinity, the signs are true.
6184 return ((_mm_movemask_ps(vTemp)&3) != 0);
6185 #else // _XM_VMX128_INTRINSICS_
6186 #endif // _XM_VMX128_INTRINSICS_
6189 //------------------------------------------------------------------------------
6190 // Computation operations
6191 //------------------------------------------------------------------------------
6193 //------------------------------------------------------------------------------
6195 XMFINLINE XMVECTOR XMVector2Dot
6201 #if defined(_XM_NO_INTRINSICS_)
6205 Result.vector4_f32[0] =
6206 Result.vector4_f32[1] =
6207 Result.vector4_f32[2] =
6208 Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
6212 #elif defined(_XM_SSE_INTRINSICS_)
6213 // Perform the dot product on x and y
6214 XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
6215 // vTemp has y splatted
6216 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6218 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6219 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6221 #else // _XM_VMX128_INTRINSICS_
6222 #endif // _XM_VMX128_INTRINSICS_
6225 //------------------------------------------------------------------------------
6227 XMFINLINE XMVECTOR XMVector2Cross
6233 #if defined(_XM_NO_INTRINSICS_)
6234 FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
6235 XMVECTOR vResult = {
6242 #elif defined(_XM_SSE_INTRINSICS_)
6244 XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
6246 vResult = _mm_mul_ps(vResult,V1);
6248 XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
6250 vResult = _mm_sub_ss(vResult,vTemp);
6251 // Splat the cross product
6252 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
6254 #else // _XM_VMX128_INTRINSICS_
6255 #endif // _XM_VMX128_INTRINSICS_
6258 //------------------------------------------------------------------------------
6260 XMFINLINE XMVECTOR XMVector2LengthSq
6265 #if defined(_XM_NO_INTRINSICS_)
6266 return XMVector2Dot(V, V);
6267 #elif defined(_XM_SSE_INTRINSICS_)
6268 // Perform the dot product on x and y
6269 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6270 // vTemp has y splatted
6271 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6273 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6274 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6277 return XMVector2Dot(V, V);
6281 //------------------------------------------------------------------------------
6283 XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
6288 #if defined(_XM_NO_INTRINSICS_)
6292 Result = XMVector2LengthSq(V);
6293 Result = XMVectorReciprocalSqrtEst(Result);
6297 #elif defined(_XM_SSE_INTRINSICS_)
6298 // Perform the dot product on x and y
6299 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6300 // vTemp has y splatted
6301 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6303 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6304 vLengthSq = _mm_rsqrt_ss(vLengthSq);
6305 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6307 #else // _XM_VMX128_INTRINSICS_
6308 #endif // _XM_VMX128_INTRINSICS_
6311 //------------------------------------------------------------------------------
6313 XMFINLINE XMVECTOR XMVector2ReciprocalLength
6318 #if defined(_XM_NO_INTRINSICS_)
6322 Result = XMVector2LengthSq(V);
6323 Result = XMVectorReciprocalSqrt(Result);
6327 #elif defined(_XM_SSE_INTRINSICS_)
6328 // Perform the dot product on x and y
6329 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6330 // vTemp has y splatted
6331 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6333 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6334 vLengthSq = _mm_sqrt_ss(vLengthSq);
6335 vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
6336 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6338 #else // _XM_VMX128_INTRINSICS_
6339 #endif // _XM_VMX128_INTRINSICS_
6342 //------------------------------------------------------------------------------
6344 XMFINLINE XMVECTOR XMVector2LengthEst
6349 #if defined(_XM_NO_INTRINSICS_)
6351 Result = XMVector2LengthSq(V);
6352 Result = XMVectorSqrtEst(Result);
6354 #elif defined(_XM_SSE_INTRINSICS_)
6355 // Perform the dot product on x and y
6356 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6357 // vTemp has y splatted
6358 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6360 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6361 vLengthSq = _mm_sqrt_ss(vLengthSq);
6362 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6364 #else // _XM_VMX128_INTRINSICS_
6365 #endif // _XM_VMX128_INTRINSICS_
6368 //------------------------------------------------------------------------------
6370 XMFINLINE XMVECTOR XMVector2Length
6375 #if defined(_XM_NO_INTRINSICS_)
6378 Result = XMVector2LengthSq(V);
6379 Result = XMVectorSqrt(Result);
6382 #elif defined(_XM_SSE_INTRINSICS_)
6383 // Perform the dot product on x and y
6384 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6385 // vTemp has y splatted
6386 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6388 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6389 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6390 vLengthSq = _mm_sqrt_ps(vLengthSq);
6392 #else // _XM_VMX128_INTRINSICS_
6393 #endif // _XM_VMX128_INTRINSICS_
6396 //------------------------------------------------------------------------------
6397 // XMVector2NormalizeEst uses a reciprocal estimate and
6398 // returns QNaN on zero and infinite vectors.
6400 XMFINLINE XMVECTOR XMVector2NormalizeEst
6405 #if defined(_XM_NO_INTRINSICS_)
6408 Result = XMVector2ReciprocalLength(V);
6409 Result = XMVectorMultiply(V, Result);
6412 #elif defined(_XM_SSE_INTRINSICS_)
6413 // Perform the dot product on x and y
6414 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6415 // vTemp has y splatted
6416 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6418 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6419 vLengthSq = _mm_rsqrt_ss(vLengthSq);
6420 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6421 vLengthSq = _mm_mul_ps(vLengthSq,V);
6423 #else // _XM_VMX128_INTRINSICS_
6424 #endif // _XM_VMX128_INTRINSICS_
6427 //------------------------------------------------------------------------------
6429 XMFINLINE XMVECTOR XMVector2Normalize
6434 #if defined(_XM_NO_INTRINSICS_)
6438 vResult = XMVector2Length( V );
6439 fLength = vResult.vector4_f32[0];
6441 // Prevent divide by zero
6443 fLength = 1.0f/fLength;
6446 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
6447 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
6448 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
6449 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
6452 #elif defined(_XM_SSE_INTRINSICS_)
6453 // Perform the dot product on x and y only
6454 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6455 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6456 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6457 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6458 // Prepare for the division
6459 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
6460 // Create zero with a single instruction
6461 XMVECTOR vZeroMask = _mm_setzero_ps();
6462 // Test for a divide by zero (Must be FP to detect -0.0)
6463 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
6464 // Failsafe on zero (Or epsilon) length planes
6465 // If the length is infinity, set the elements to zero
6466 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
6467 // Reciprocal mul to perform the normalization
6468 vResult = _mm_div_ps(V,vResult);
6469 // Any that are infinity, set to zero
6470 vResult = _mm_and_ps(vResult,vZeroMask);
6471 // Select qnan or result based on infinite length
6472 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
6473 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
6474 vResult = _mm_or_ps(vTemp1,vTemp2);
6476 #else // _XM_VMX128_INTRINSICS_
6477 #endif // _XM_VMX128_INTRINSICS_
6480 //------------------------------------------------------------------------------
6482 XMFINLINE XMVECTOR XMVector2ClampLength
6489 #if defined(_XM_NO_INTRINSICS_)
6494 ClampMax = XMVectorReplicate(LengthMax);
6495 ClampMin = XMVectorReplicate(LengthMin);
6497 return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6499 #elif defined(_XM_SSE_INTRINSICS_)
6500 XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
6501 XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
6502 return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6503 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6504 #endif // _XM_VMX128_INTRINSICS_
6507 //------------------------------------------------------------------------------
6509 XMFINLINE XMVECTOR XMVector2ClampLengthV
6512 FXMVECTOR LengthMin,
6516 #if defined(_XM_NO_INTRINSICS_)
6518 XMVECTOR ClampLength;
6524 XMVECTOR InfiniteLength;
6525 XMVECTOR ZeroLength;
6527 XMVECTOR ControlMax;
6528 XMVECTOR ControlMin;
6532 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]));
6533 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]));
6534 XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
6535 XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
6536 XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6538 LengthSq = XMVector2LengthSq(V);
6540 Zero = XMVectorZero();
6542 RcpLength = XMVectorReciprocalSqrt(LengthSq);
6544 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
6545 ZeroLength = XMVectorEqual(LengthSq, Zero);
6547 Length = XMVectorMultiply(LengthSq, RcpLength);
6549 Normal = XMVectorMultiply(V, RcpLength);
6551 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6552 Length = XMVectorSelect(LengthSq, Length, Select);
6553 Normal = XMVectorSelect(LengthSq, Normal, Select);
6555 ControlMax = XMVectorGreater(Length, LengthMax);
6556 ControlMin = XMVectorLess(Length, LengthMin);
6558 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6559 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6561 Result = XMVectorMultiply(Normal, ClampLength);
6563 // Preserve the original vector (with no precision loss) if the length falls within the given range
6564 Control = XMVectorEqualInt(ControlMax, ControlMin);
6565 Result = XMVectorSelect(Result, V, Control);
6569 #elif defined(_XM_SSE_INTRINSICS_)
6570 XMVECTOR ClampLength;
6575 XMVECTOR InfiniteLength;
6576 XMVECTOR ZeroLength;
6578 XMVECTOR ControlMax;
6579 XMVECTOR ControlMin;
6583 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
6584 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
6585 XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
6586 XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
6587 XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6588 LengthSq = XMVector2LengthSq(V);
6589 RcpLength = XMVectorReciprocalSqrt(LengthSq);
6590 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
6591 ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
6592 Length = _mm_mul_ps(LengthSq, RcpLength);
6593 Normal = _mm_mul_ps(V, RcpLength);
6594 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6595 Length = XMVectorSelect(LengthSq, Length, Select);
6596 Normal = XMVectorSelect(LengthSq, Normal, Select);
6597 ControlMax = XMVectorGreater(Length, LengthMax);
6598 ControlMin = XMVectorLess(Length, LengthMin);
6599 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6600 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6601 Result = _mm_mul_ps(Normal, ClampLength);
6602 // Preserve the original vector (with no precision loss) if the length falls within the given range
6603 Control = XMVectorEqualInt(ControlMax, ControlMin);
6604 Result = XMVectorSelect(Result, V, Control);
6606 #else // _XM_VMX128_INTRINSICS_
6607 #endif // _XM_VMX128_INTRINSICS_
6610 //------------------------------------------------------------------------------
6612 XMFINLINE XMVECTOR XMVector2Reflect
6618 #if defined(_XM_NO_INTRINSICS_)
6622 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6623 Result = XMVector2Dot(Incident, Normal);
6624 Result = XMVectorAdd(Result, Result);
6625 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
6629 #elif defined(_XM_SSE_INTRINSICS_)
6630 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6631 XMVECTOR Result = XMVector2Dot(Incident,Normal);
6632 Result = _mm_add_ps(Result, Result);
6633 Result = _mm_mul_ps(Result, Normal);
6634 Result = _mm_sub_ps(Incident,Result);
6636 #else // _XM_VMX128_INTRINSICS_
6637 #endif // _XM_VMX128_INTRINSICS_
6640 //------------------------------------------------------------------------------
6642 XMFINLINE XMVECTOR XMVector2Refract
6646 FLOAT RefractionIndex
6649 #if defined(_XM_NO_INTRINSICS_)
6651 Index = XMVectorReplicate(RefractionIndex);
6652 return XMVector2RefractV(Incident, Normal, Index);
6654 #elif defined(_XM_SSE_INTRINSICS_)
6655 XMVECTOR Index = _mm_set_ps1(RefractionIndex);
6656 return XMVector2RefractV(Incident,Normal,Index);
6657 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6658 #endif // _XM_VMX128_INTRINSICS_
6661 //------------------------------------------------------------------------------
6663 // Return the refraction of a 2D vector
6664 XMFINLINE XMVECTOR XMVector2RefractV
6668 FXMVECTOR RefractionIndex
6671 #if defined(_XM_NO_INTRINSICS_)
6675 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
6676 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6677 IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]);
6678 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6679 RY = 1.0f-(IDotN*IDotN);
6680 RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]);
6681 RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]);
6683 RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
6688 RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
6692 vResult.vector4_f32[0] = RX;
6693 vResult.vector4_f32[1] = RY;
6694 vResult.vector4_f32[2] = 0.0f;
6695 vResult.vector4_f32[3] = 0.0f;
6697 #elif defined(_XM_SSE_INTRINSICS_)
6698 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
6699 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6700 // Get the 2D Dot product of Incident-Normal
6701 XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
6702 XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
6703 IDotN = _mm_add_ss(IDotN,vTemp);
6704 IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
6705 // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6706 vTemp = _mm_mul_ps(IDotN,IDotN);
6707 vTemp = _mm_sub_ps(g_XMOne,vTemp);
6708 vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6709 vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6710 vTemp = _mm_sub_ps(g_XMOne,vTemp);
6711 // If any terms are <=0, sqrt() will fail, punt to zero
6712 XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
6713 // R = RefractionIndex * IDotN + sqrt(R)
6714 vTemp = _mm_sqrt_ps(vTemp);
6715 XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
6716 vTemp = _mm_add_ps(vTemp,vResult);
6717 // Result = RefractionIndex * Incident - Normal * R
6718 vResult = _mm_mul_ps(RefractionIndex,Incident);
6719 vTemp = _mm_mul_ps(vTemp,Normal);
6720 vResult = _mm_sub_ps(vResult,vTemp);
6721 vResult = _mm_and_ps(vResult,vMask);
6723 #else // _XM_VMX128_INTRINSICS_
6724 #endif // _XM_VMX128_INTRINSICS_
6727 //------------------------------------------------------------------------------
6729 XMFINLINE XMVECTOR XMVector2Orthogonal
6734 #if defined(_XM_NO_INTRINSICS_)
6738 Result.vector4_f32[0] = -V.vector4_f32[1];
6739 Result.vector4_f32[1] = V.vector4_f32[0];
6743 #elif defined(_XM_SSE_INTRINSICS_)
6744 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
6745 vResult = _mm_mul_ps(vResult,g_XMNegateX);
6747 #else // _XM_VMX128_INTRINSICS_
6748 #endif // _XM_VMX128_INTRINSICS_
6751 //------------------------------------------------------------------------------
6753 XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
6759 #if defined(_XM_NO_INTRINSICS_)
6761 XMVECTOR NegativeOne;
6765 Result = XMVector2Dot(N1, N2);
6766 NegativeOne = XMVectorSplatConstant(-1, 0);
6767 One = XMVectorSplatOne();
6768 Result = XMVectorClamp(Result, NegativeOne, One);
6769 Result = XMVectorACosEst(Result);
6773 #elif defined(_XM_SSE_INTRINSICS_)
6774 XMVECTOR vResult = XMVector2Dot(N1,N2);
6775 // Clamp to -1.0f to 1.0f
6776 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6777 vResult = _mm_min_ps(vResult,g_XMOne);;
6778 vResult = XMVectorACosEst(vResult);
6780 #else // _XM_VMX128_INTRINSICS_
6781 #endif // _XM_VMX128_INTRINSICS_
6784 //------------------------------------------------------------------------------
6786 XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
6792 #if defined(_XM_NO_INTRINSICS_)
6794 XMVECTOR NegativeOne;
6798 Result = XMVector2Dot(N1, N2);
6799 NegativeOne = XMVectorSplatConstant(-1, 0);
6800 One = XMVectorSplatOne();
6801 Result = XMVectorClamp(Result, NegativeOne, One);
6802 Result = XMVectorACos(Result);
6806 #elif defined(_XM_SSE_INTRINSICS_)
6807 XMVECTOR vResult = XMVector2Dot(N1,N2);
6808 // Clamp to -1.0f to 1.0f
6809 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6810 vResult = _mm_min_ps(vResult,g_XMOne);;
6811 vResult = XMVectorACos(vResult);
6813 #else // _XM_VMX128_INTRINSICS_
6814 #endif // _XM_VMX128_INTRINSICS_
6817 //------------------------------------------------------------------------------
6819 XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
6825 #if defined(_XM_NO_INTRINSICS_)
6831 XMVECTOR NegativeOne;
6835 L1 = XMVector2ReciprocalLength(V1);
6836 L2 = XMVector2ReciprocalLength(V2);
6838 Dot = XMVector2Dot(V1, V2);
6840 L1 = XMVectorMultiply(L1, L2);
6842 CosAngle = XMVectorMultiply(Dot, L1);
6843 NegativeOne = XMVectorSplatConstant(-1, 0);
6844 One = XMVectorSplatOne();
6845 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
6847 Result = XMVectorACos(CosAngle);
6851 #elif defined(_XM_SSE_INTRINSICS_)
6857 L1 = XMVector2ReciprocalLength(V1);
6858 L2 = XMVector2ReciprocalLength(V2);
6859 Dot = XMVector2Dot(V1, V2);
6860 L1 = _mm_mul_ps(L1, L2);
6861 CosAngle = _mm_mul_ps(Dot, L1);
6862 CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
6863 Result = XMVectorACos(CosAngle);
6865 #else // _XM_VMX128_INTRINSICS_
6866 #endif // _XM_VMX128_INTRINSICS_
6869 //------------------------------------------------------------------------------
6871 XMFINLINE XMVECTOR XMVector2LinePointDistance
6873 FXMVECTOR LinePoint1,
6874 FXMVECTOR LinePoint2,
6878 #if defined(_XM_NO_INTRINSICS_)
6880 XMVECTOR PointVector;
6881 XMVECTOR LineVector;
6882 XMVECTOR ReciprocalLengthSq;
6883 XMVECTOR PointProjectionScale;
6884 XMVECTOR DistanceVector;
6887 // Given a vector PointVector from LinePoint1 to Point and a vector
6888 // LineVector from LinePoint1 to LinePoint2, the scaled distance
6889 // PointProjectionScale from LinePoint1 to the perpendicular projection
6890 // of PointVector onto the line is defined as:
6892 // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
6894 PointVector = XMVectorSubtract(Point, LinePoint1);
6895 LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
6897 ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6898 ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
6900 PointProjectionScale = XMVector2Dot(PointVector, LineVector);
6901 PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
6903 DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
6904 DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
6906 Result = XMVector2Length(DistanceVector);
6910 #elif defined(_XM_SSE_INTRINSICS_)
6911 XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
6912 XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
6913 XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6914 XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
6915 vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
6916 vResult = _mm_mul_ps(vResult,LineVector);
6917 vResult = _mm_sub_ps(PointVector,vResult);
6918 vResult = XMVector2Length(vResult);
6920 #else // _XM_VMX128_INTRINSICS_
6921 #endif // _XM_VMX128_INTRINSICS_
6924 //------------------------------------------------------------------------------
6926 XMFINLINE XMVECTOR XMVector2IntersectLine
6928 FXMVECTOR Line1Point1,
6929 FXMVECTOR Line1Point2,
6930 FXMVECTOR Line2Point1,
6931 CXMVECTOR Line2Point2
6934 #if defined(_XM_NO_INTRINSICS_)
6942 CONST XMVECTOR Zero = XMVectorZero();
6944 V1 = XMVectorSubtract(Line1Point2, Line1Point1);
6945 V2 = XMVectorSubtract(Line2Point2, Line2Point1);
6946 V3 = XMVectorSubtract(Line1Point1, Line2Point1);
6948 C1 = XMVector2Cross(V1, V2);
6949 C2 = XMVector2Cross(V2, V3);
6951 if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
6953 if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
6956 Result = g_XMInfinity.v;
6961 Result = g_XMQNaN.v;
6966 // Intersection point = Line1Point1 + V1 * (C2 / C1)
6968 Scale = XMVectorReciprocal(C1);
6969 Scale = XMVectorMultiply(C2, Scale);
6970 Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
6975 #elif defined(_XM_SSE_INTRINSICS_)
6976 XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
6977 XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
6978 XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
6979 // Generate the cross products
6980 XMVECTOR C1 = XMVector2Cross(V1, V2);
6981 XMVECTOR C2 = XMVector2Cross(V2, V3);
6982 // If C1 is not close to epsilon, use the calculated value
6983 XMVECTOR vResultMask = _mm_setzero_ps();
6984 vResultMask = _mm_sub_ps(vResultMask,C1);
6985 vResultMask = _mm_max_ps(vResultMask,C1);
6986 // 0xFFFFFFFF if the calculated value is to be used
6987 vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
6988 // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
6989 XMVECTOR vFailMask = _mm_setzero_ps();
6990 vFailMask = _mm_sub_ps(vFailMask,C2);
6991 vFailMask = _mm_max_ps(vFailMask,C2);
6992 vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
6993 XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
6994 vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
6995 // vFail is NAN or INF
6996 vFail = _mm_or_ps(vFail,vFailMask);
6997 // Intersection point = Line1Point1 + V1 * (C2 / C1)
6998 XMVECTOR vResult = _mm_div_ps(C2,C1);
6999 vResult = _mm_mul_ps(vResult,V1);
7000 vResult = _mm_add_ps(vResult,Line1Point1);
7001 // Use result, or failure value
7002 vResult = _mm_and_ps(vResult,vResultMask);
7003 vResultMask = _mm_andnot_ps(vResultMask,vFail);
7004 vResult = _mm_or_ps(vResult,vResultMask);
7006 #else // _XM_VMX128_INTRINSICS_
7007 #endif // _XM_VMX128_INTRINSICS_
7010 //------------------------------------------------------------------------------
7012 XMFINLINE XMVECTOR XMVector2Transform
7018 #if defined(_XM_NO_INTRINSICS_)
7024 Y = XMVectorSplatY(V);
7025 X = XMVectorSplatX(V);
7027 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7028 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7032 #elif defined(_XM_SSE_INTRINSICS_)
7033 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7034 vResult = _mm_mul_ps(vResult,M.r[0]);
7035 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7036 vTemp = _mm_mul_ps(vTemp,M.r[1]);
7037 vResult = _mm_add_ps(vResult,vTemp);
7038 vResult = _mm_add_ps(vResult,M.r[3]);
7040 #else // _XM_VMX128_INTRINSICS_
7041 #endif // _XM_VMX128_INTRINSICS_
7044 //------------------------------------------------------------------------------
7046 XMINLINE XMFLOAT4* XMVector2TransformStream
7048 XMFLOAT4* pOutputStream,
7049 size_t OutputStride,
7050 CONST XMFLOAT2* pInputStream,
7056 #if defined(_XM_NO_INTRINSICS_)
7063 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
7064 BYTE* pOutputVector = (BYTE*)pOutputStream;
7066 XMASSERT(pOutputStream);
7067 XMASSERT(pInputStream);
7069 for (i = 0; i < VectorCount; i++)
7071 V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
7072 Y = XMVectorSplatY(V);
7073 X = XMVectorSplatX(V);
7074 // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7075 // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7077 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7078 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7080 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
7082 pInputVector += InputStride;
7083 pOutputVector += OutputStride;
7086 return pOutputStream;
7088 #elif defined(_XM_SSE_INTRINSICS_)
7089 XMASSERT(pOutputStream);
7090 XMASSERT(pInputStream);
7092 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
7093 BYTE* pOutputVector = (BYTE*)pOutputStream;
7095 for (i = 0; i < VectorCount; i++)
7097 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7098 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7099 vResult = _mm_mul_ps(vResult,M.r[1]);
7100 vResult = _mm_add_ps(vResult,M.r[3]);
7101 X = _mm_mul_ps(X,M.r[0]);
7102 vResult = _mm_add_ps(vResult,X);
7103 _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
7104 pInputVector += InputStride;
7105 pOutputVector += OutputStride;
7107 return pOutputStream;
7108 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7109 #endif // _XM_VMX128_INTRINSICS_
7112 //------------------------------------------------------------------------------
7114 XMINLINE XMFLOAT4* XMVector2TransformStreamNC
7116 XMFLOAT4* pOutputStream,
7117 size_t OutputStride,
7118 CONST XMFLOAT2* pInputStream,
7124 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
7125 return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
7126 #else // _XM_VMX128_INTRINSICS_
7127 #endif // _XM_VMX128_INTRINSICS_
7130 //------------------------------------------------------------------------------
7132 XMFINLINE XMVECTOR XMVector2TransformCoord
7138 #if defined(_XM_NO_INTRINSICS_)
7145 Y = XMVectorSplatY(V);
7146 X = XMVectorSplatX(V);
7148 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7149 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7151 InverseW = XMVectorSplatW(Result);
7152 InverseW = XMVectorReciprocal(InverseW);
7154 Result = XMVectorMultiply(Result, InverseW);
7158 #elif defined(_XM_SSE_INTRINSICS_)
7159 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7160 vResult = _mm_mul_ps(vResult,M.r[0]);
7161 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7162 vTemp = _mm_mul_ps(vTemp,M.r[1]);
7163 vResult = _mm_add_ps(vResult,vTemp);
7164 vResult = _mm_add_ps(vResult,M.r[3]);
7165 vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7166 vResult = _mm_div_ps(vResult,vTemp);
7168 #else // _XM_VMX128_INTRINSICS_
7169 #endif // _XM_VMX128_INTRINSICS_
7172 //------------------------------------------------------------------------------
7174 XMINLINE XMFLOAT2* XMVector2TransformCoordStream
7176 XMFLOAT2* pOutputStream,
7177 size_t OutputStride,
7178 CONST XMFLOAT2* pInputStream,
7184 #if defined(_XM_NO_INTRINSICS_)
7192 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
7193 BYTE* pOutputVector = (BYTE*)pOutputStream;
7195 XMASSERT(pOutputStream);
7196 XMASSERT(pInputStream);
7198 for (i = 0; i < VectorCount; i++)
7200 V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
7201 Y = XMVectorSplatY(V);
7202 X = XMVectorSplatX(V);
7203 // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7204 // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7206 Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7207 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7209 InverseW = XMVectorSplatW(Result);
7210 InverseW = XMVectorReciprocal(InverseW);
7212 Result = XMVectorMultiply(Result, InverseW);
7214 XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7216 pInputVector += InputStride;
7217 pOutputVector += OutputStride;
7220 return pOutputStream;
7222 #elif defined(_XM_SSE_INTRINSICS_)
7223 XMASSERT(pOutputStream);
7224 XMASSERT(pInputStream);
7226 CONST BYTE *pInputVector = (CONST BYTE*)pInputStream;
7227 BYTE *pOutputVector = (BYTE*)pOutputStream;
7229 for (i = 0; i < VectorCount; i++)
7231 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7232 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7233 vResult = _mm_mul_ps(vResult,M.r[1]);
7234 vResult = _mm_add_ps(vResult,M.r[3]);
7235 X = _mm_mul_ps(X,M.r[0]);
7236 vResult = _mm_add_ps(vResult,X);
7237 X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7238 vResult = _mm_div_ps(vResult,X);
7239 _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]);
7240 pInputVector += InputStride;
7241 pOutputVector += OutputStride;
7243 return pOutputStream;
7244 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7245 #endif // _XM_VMX128_INTRINSICS_
7248 //------------------------------------------------------------------------------
7250 XMFINLINE XMVECTOR XMVector2TransformNormal
7256 #if defined(_XM_NO_INTRINSICS_)
7262 Y = XMVectorSplatY(V);
7263 X = XMVectorSplatX(V);
7265 Result = XMVectorMultiply(Y, M.r[1]);
7266 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7270 #elif defined(_XM_SSE_INTRINSICS_)
7271 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7272 vResult = _mm_mul_ps(vResult,M.r[0]);
7273 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7274 vTemp = _mm_mul_ps(vTemp,M.r[1]);
7275 vResult = _mm_add_ps(vResult,vTemp);
7277 #else // _XM_VMX128_INTRINSICS_
7278 #endif // _XM_VMX128_INTRINSICS_
7281 //------------------------------------------------------------------------------
7283 XMINLINE XMFLOAT2* XMVector2TransformNormalStream
7285 XMFLOAT2* pOutputStream,
7286 size_t OutputStride,
7287 CONST XMFLOAT2* pInputStream,
7293 #if defined(_XM_NO_INTRINSICS_)
7300 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
7301 BYTE* pOutputVector = (BYTE*)pOutputStream;
7303 XMASSERT(pOutputStream);
7304 XMASSERT(pInputStream);
7306 for (i = 0; i < VectorCount; i++)
7308 V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
7309 Y = XMVectorSplatY(V);
7310 X = XMVectorSplatX(V);
7311 // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7312 // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7314 Result = XMVectorMultiply(Y, M.r[1]);
7315 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7317 XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7319 pInputVector += InputStride;
7320 pOutputVector += OutputStride;
7323 return pOutputStream;
7325 #elif defined(_XM_SSE_INTRINSICS_)
7326 XMASSERT(pOutputStream);
7327 XMASSERT(pInputStream);
7329 CONST BYTE*pInputVector = (CONST BYTE*)pInputStream;
7330 BYTE *pOutputVector = (BYTE*)pOutputStream;
7331 for (i = 0; i < VectorCount; i++)
7333 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
7334 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
7335 vResult = _mm_mul_ps(vResult,M.r[1]);
7336 X = _mm_mul_ps(X,M.r[0]);
7337 vResult = _mm_add_ps(vResult,X);
7338 _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]);
7340 pInputVector += InputStride;
7341 pOutputVector += OutputStride;
7344 return pOutputStream;
7345 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7346 #endif // _XM_VMX128_INTRINSICS_
7349 /****************************************************************************
7353 ****************************************************************************/
7355 //------------------------------------------------------------------------------
7356 // Comparison operations
7357 //------------------------------------------------------------------------------
7359 //------------------------------------------------------------------------------
7361 XMFINLINE BOOL XMVector3Equal
7367 #if defined(_XM_NO_INTRINSICS_)
7368 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
7369 #elif defined(_XM_SSE_INTRINSICS_)
7370 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7371 return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7372 #else // _XM_VMX128_INTRINSICS_
7373 return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
7377 //------------------------------------------------------------------------------
7379 XMFINLINE UINT XMVector3EqualR
7385 #if defined(_XM_NO_INTRINSICS_)
7387 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
7388 (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
7389 (V1.vector4_f32[2] == V2.vector4_f32[2]))
7391 CR = XM_CRMASK_CR6TRUE;
7393 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
7394 (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
7395 (V1.vector4_f32[2] != V2.vector4_f32[2]))
7397 CR = XM_CRMASK_CR6FALSE;
7400 #elif defined(_XM_SSE_INTRINSICS_)
7401 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7402 int iTest = _mm_movemask_ps(vTemp)&7;
7406 CR = XM_CRMASK_CR6TRUE;
7410 CR = XM_CRMASK_CR6FALSE;
7413 #else // _XM_VMX128_INTRINSICS_
7414 #endif // _XM_VMX128_INTRINSICS_
7417 //------------------------------------------------------------------------------
7419 XMFINLINE BOOL XMVector3EqualInt
7425 #if defined(_XM_NO_INTRINSICS_)
7426 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
7427 #elif defined(_XM_SSE_INTRINSICS_)
7428 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7429 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
7430 #else // _XM_VMX128_INTRINSICS_
7431 return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
7435 //------------------------------------------------------------------------------
7437 XMFINLINE UINT XMVector3EqualIntR
7443 #if defined(_XM_NO_INTRINSICS_)
7445 if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
7446 (V1.vector4_u32[1] == V2.vector4_u32[1]) &&
7447 (V1.vector4_u32[2] == V2.vector4_u32[2]))
7449 CR = XM_CRMASK_CR6TRUE;
7451 else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
7452 (V1.vector4_u32[1] != V2.vector4_u32[1]) &&
7453 (V1.vector4_u32[2] != V2.vector4_u32[2]))
7455 CR = XM_CRMASK_CR6FALSE;
7458 #elif defined(_XM_SSE_INTRINSICS_)
7459 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7460 int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
7464 CR = XM_CRMASK_CR6TRUE;
7468 CR = XM_CRMASK_CR6FALSE;
7471 #else // _XM_VMX128_INTRINSICS_
7472 #endif // _XM_VMX128_INTRINSICS_
7475 //------------------------------------------------------------------------------
7477 XMFINLINE BOOL XMVector3NearEqual
7484 #if defined(_XM_NO_INTRINSICS_)
7487 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
7488 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
7489 dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
7490 return (((dx <= Epsilon.vector4_f32[0]) &&
7491 (dy <= Epsilon.vector4_f32[1]) &&
7492 (dz <= Epsilon.vector4_f32[2])) != 0);
7493 #elif defined(_XM_SSE_INTRINSICS_)
7494 // Get the difference
7495 XMVECTOR vDelta = _mm_sub_ps(V1,V2);
7496 // Get the absolute value of the difference
7497 XMVECTOR vTemp = _mm_setzero_ps();
7498 vTemp = _mm_sub_ps(vTemp,vDelta);
7499 vTemp = _mm_max_ps(vTemp,vDelta);
7500 vTemp = _mm_cmple_ps(vTemp,Epsilon);
7502 return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
7503 #else // _XM_VMX128_INTRINSICS_
7504 #endif // _XM_VMX128_INTRINSICS_
7507 //------------------------------------------------------------------------------
7509 XMFINLINE BOOL XMVector3NotEqual
7515 #if defined(_XM_NO_INTRINSICS_)
7516 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
7517 #elif defined(_XM_SSE_INTRINSICS_)
7518 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7519 return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
7520 #else // _XM_VMX128_INTRINSICS_
7521 return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
7525 //------------------------------------------------------------------------------
7527 XMFINLINE BOOL XMVector3NotEqualInt
7533 #if defined(_XM_NO_INTRINSICS_)
7534 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
7535 #elif defined(_XM_SSE_INTRINSICS_)
7536 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7537 return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
7538 #else // _XM_VMX128_INTRINSICS_
7539 return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
7543 //------------------------------------------------------------------------------
7545 XMFINLINE BOOL XMVector3Greater
7551 #if defined(_XM_NO_INTRINSICS_)
7552 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
7553 #elif defined(_XM_SSE_INTRINSICS_)
7554 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7555 return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7556 #else // _XM_VMX128_INTRINSICS_
7557 return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
7561 //------------------------------------------------------------------------------
7563 XMFINLINE UINT XMVector3GreaterR
7569 #if defined(_XM_NO_INTRINSICS_)
7571 if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
7572 (V1.vector4_f32[1] > V2.vector4_f32[1]) &&
7573 (V1.vector4_f32[2] > V2.vector4_f32[2]))
7575 CR = XM_CRMASK_CR6TRUE;
7577 else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
7578 (V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
7579 (V1.vector4_f32[2] <= V2.vector4_f32[2]))
7581 CR = XM_CRMASK_CR6FALSE;
7585 #elif defined(_XM_SSE_INTRINSICS_)
7586 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7588 int iTest = _mm_movemask_ps(vTemp)&7;
7591 CR = XM_CRMASK_CR6TRUE;
7595 CR = XM_CRMASK_CR6FALSE;
7598 #else // _XM_VMX128_INTRINSICS_
7599 #endif // _XM_VMX128_INTRINSICS_
7602 //------------------------------------------------------------------------------
7604 XMFINLINE BOOL XMVector3GreaterOrEqual
7610 #if defined(_XM_NO_INTRINSICS_)
7611 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
7612 #elif defined(_XM_SSE_INTRINSICS_)
7613 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7614 return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7615 #else // _XM_VMX128_INTRINSICS_
7616 return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
7620 //------------------------------------------------------------------------------
7622 XMFINLINE UINT XMVector3GreaterOrEqualR
7628 #if defined(_XM_NO_INTRINSICS_)
7631 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
7632 (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
7633 (V1.vector4_f32[2] >= V2.vector4_f32[2]))
7635 CR = XM_CRMASK_CR6TRUE;
7637 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
7638 (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
7639 (V1.vector4_f32[2] < V2.vector4_f32[2]))
7641 CR = XM_CRMASK_CR6FALSE;
7645 #elif defined(_XM_SSE_INTRINSICS_)
7646 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7648 int iTest = _mm_movemask_ps(vTemp)&7;
7651 CR = XM_CRMASK_CR6TRUE;
7655 CR = XM_CRMASK_CR6FALSE;
7658 #else // _XM_VMX128_INTRINSICS_
7659 #endif // _XM_VMX128_INTRINSICS_
7662 //------------------------------------------------------------------------------
7664 XMFINLINE BOOL XMVector3Less
7670 #if defined(_XM_NO_INTRINSICS_)
7671 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
7672 #elif defined(_XM_SSE_INTRINSICS_)
7673 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
7674 return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7675 #else // _XM_VMX128_INTRINSICS_
7676 return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
7680 //------------------------------------------------------------------------------
7682 XMFINLINE BOOL XMVector3LessOrEqual
7688 #if defined(_XM_NO_INTRINSICS_)
7689 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
7690 #elif defined(_XM_SSE_INTRINSICS_)
7691 XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
7692 return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7693 #else // _XM_VMX128_INTRINSICS_
7694 return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
7698 //------------------------------------------------------------------------------
7700 XMFINLINE BOOL XMVector3InBounds
7706 #if defined(_XM_NO_INTRINSICS_)
7707 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
7708 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7709 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
7710 #elif defined(_XM_SSE_INTRINSICS_)
7711 // Test if less than or equal
7712 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7713 // Negate the bounds
7714 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7715 // Test if greater or equal (Reversed)
7716 vTemp2 = _mm_cmple_ps(vTemp2,V);
7718 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7719 // x,y and z in bounds? (w is don't care)
7720 return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
7722 return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
7726 //------------------------------------------------------------------------------
7728 XMFINLINE UINT XMVector3InBoundsR
7734 #if defined(_XM_NO_INTRINSICS_)
7736 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
7737 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7738 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]))
7740 CR = XM_CRMASK_CR6BOUNDS;
7744 #elif defined(_XM_SSE_INTRINSICS_)
7745 // Test if less than or equal
7746 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7747 // Negate the bounds
7748 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7749 // Test if greater or equal (Reversed)
7750 vTemp2 = _mm_cmple_ps(vTemp2,V);
7752 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7753 // x,y and z in bounds? (w is don't care)
7754 return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
7755 #else // _XM_VMX128_INTRINSICS_
7756 #endif // _XM_VMX128_INTRINSICS_
7759 //------------------------------------------------------------------------------
7761 XMFINLINE BOOL XMVector3IsNaN
7766 #if defined(_XM_NO_INTRINSICS_)
7768 return (XMISNAN(V.vector4_f32[0]) ||
7769 XMISNAN(V.vector4_f32[1]) ||
7770 XMISNAN(V.vector4_f32[2]));
7772 #elif defined(_XM_SSE_INTRINSICS_)
7773 // Mask off the exponent
7774 __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
7775 // Mask off the mantissa
7776 __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
7777 // Are any of the exponents == 0x7F800000?
7778 vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
7779 // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
7780 vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
7781 // Perform a not on the NaN test to be true on NON-zero mantissas
7782 vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
7783 // If x, y or z are NaN, the signs are true after the merge above
7784 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
7785 #else // _XM_VMX128_INTRINSICS_
7786 #endif // _XM_VMX128_INTRINSICS_
7789 //------------------------------------------------------------------------------
7791 XMFINLINE BOOL XMVector3IsInfinite
7796 #if defined(_XM_NO_INTRINSICS_)
7797 return (XMISINF(V.vector4_f32[0]) ||
7798 XMISINF(V.vector4_f32[1]) ||
7799 XMISINF(V.vector4_f32[2]));
7800 #elif defined(_XM_SSE_INTRINSICS_)
7801 // Mask off the sign bit
7802 __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
7803 // Compare to infinity
7804 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
7805 // If x,y or z are infinity, the signs are true.
7806 return ((_mm_movemask_ps(vTemp)&7) != 0);
7807 #else // _XM_VMX128_INTRINSICS_
7808 #endif // _XM_VMX128_INTRINSICS_
7811 //------------------------------------------------------------------------------
7812 // Computation operations
7813 //------------------------------------------------------------------------------
7815 //------------------------------------------------------------------------------
7817 XMFINLINE XMVECTOR XMVector3Dot
7823 #if defined(_XM_NO_INTRINSICS_)
7824 FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
7825 XMVECTOR vResult = {
7833 #elif defined(_XM_SSE_INTRINSICS_)
7834 // Perform the dot product
7835 XMVECTOR vDot = _mm_mul_ps(V1,V2);
7836 // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
7837 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7838 // Result.vector4_f32[0] = x+y
7839 vDot = _mm_add_ss(vDot,vTemp);
7840 // x=Dot.vector4_f32[2]
7841 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7842 // Result.vector4_f32[0] = (x+y)+z
7843 vDot = _mm_add_ss(vDot,vTemp);
7845 return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7846 #else // _XM_VMX128_INTRINSICS_
7847 #endif // _XM_VMX128_INTRINSICS_
7850 //------------------------------------------------------------------------------
7852 XMFINLINE XMVECTOR XMVector3Cross
7858 #if defined(_XM_NO_INTRINSICS_)
7859 XMVECTOR vResult = {
7860 (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
7861 (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
7862 (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
7867 #elif defined(_XM_SSE_INTRINSICS_)
7869 XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
7871 XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
7872 // Perform the left operation
7873 XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
7875 vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
7877 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
7878 // Perform the right operation
7879 vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
7880 // Subract the right from left, and return answer
7881 vResult = _mm_sub_ps(vResult,vTemp1);
7883 return _mm_and_ps(vResult,g_XMMask3);
7884 #else // _XM_VMX128_INTRINSICS_
7885 #endif // _XM_VMX128_INTRINSICS_
7888 //------------------------------------------------------------------------------
7890 XMFINLINE XMVECTOR XMVector3LengthSq
7895 return XMVector3Dot(V, V);
7898 //------------------------------------------------------------------------------
7900 XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
7905 #if defined(_XM_NO_INTRINSICS_)
7909 Result = XMVector3LengthSq(V);
7910 Result = XMVectorReciprocalSqrtEst(Result);
7914 #elif defined(_XM_SSE_INTRINSICS_)
7915 // Perform the dot product on x,y and z
7916 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7917 // vTemp has z and y
7918 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7920 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7922 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7924 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7925 // Splat the length squared
7926 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7927 // Get the reciprocal
7928 vLengthSq = _mm_rsqrt_ps(vLengthSq);
7930 #else // _XM_VMX128_INTRINSICS_
7931 #endif // _XM_VMX128_INTRINSICS_
7934 //------------------------------------------------------------------------------
7936 XMFINLINE XMVECTOR XMVector3ReciprocalLength
7941 #if defined(_XM_NO_INTRINSICS_)
7945 Result = XMVector3LengthSq(V);
7946 Result = XMVectorReciprocalSqrt(Result);
7950 #elif defined(_XM_SSE_INTRINSICS_)
7951 // Perform the dot product
7952 XMVECTOR vDot = _mm_mul_ps(V,V);
7954 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7956 vDot = _mm_add_ss(vDot,vTemp);
7958 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7959 // Result.x = (x+y)+z
7960 vDot = _mm_add_ss(vDot,vTemp);
7962 vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7963 // Get the reciprocal
7964 vDot = _mm_sqrt_ps(vDot);
7965 // Get the reciprocal
7966 vDot = _mm_div_ps(g_XMOne,vDot);
7968 #else // _XM_VMX128_INTRINSICS_
7969 #endif // _XM_VMX128_INTRINSICS_
7972 //------------------------------------------------------------------------------
7974 XMFINLINE XMVECTOR XMVector3LengthEst
7979 #if defined(_XM_NO_INTRINSICS_)
7983 Result = XMVector3LengthSq(V);
7984 Result = XMVectorSqrtEst(Result);
7988 #elif defined(_XM_SSE_INTRINSICS_)
7989 // Perform the dot product on x,y and z
7990 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7991 // vTemp has z and y
7992 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7994 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7996 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7998 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7999 // Splat the length squared
8000 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8002 vLengthSq = _mm_sqrt_ps(vLengthSq);
8004 #else // _XM_VMX128_INTRINSICS_
8005 #endif // _XM_VMX128_INTRINSICS_
8008 //------------------------------------------------------------------------------
8010 XMFINLINE XMVECTOR XMVector3Length
8015 #if defined(_XM_NO_INTRINSICS_)
8019 Result = XMVector3LengthSq(V);
8020 Result = XMVectorSqrt(Result);
8024 #elif defined(_XM_SSE_INTRINSICS_)
8025 // Perform the dot product on x,y and z
8026 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
8027 // vTemp has z and y
8028 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
8030 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8032 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8034 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8035 // Splat the length squared
8036 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8038 vLengthSq = _mm_sqrt_ps(vLengthSq);
8040 #else // _XM_VMX128_INTRINSICS_
8041 #endif // _XM_VMX128_INTRINSICS_
8044 //------------------------------------------------------------------------------
8045 // XMVector3NormalizeEst uses a reciprocal estimate and
8046 // returns QNaN on zero and infinite vectors.
8048 XMFINLINE XMVECTOR XMVector3NormalizeEst
8053 #if defined(_XM_NO_INTRINSICS_)
8056 Result = XMVector3ReciprocalLength(V);
8057 Result = XMVectorMultiply(V, Result);
8060 #elif defined(_XM_SSE_INTRINSICS_)
8061 // Perform the dot product
8062 XMVECTOR vDot = _mm_mul_ps(V,V);
8064 XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
8066 vDot = _mm_add_ss(vDot,vTemp);
8068 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8069 // Result.x = (x+y)+z
8070 vDot = _mm_add_ss(vDot,vTemp);
8072 vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
8073 // Get the reciprocal
8074 vDot = _mm_rsqrt_ps(vDot);
8075 // Perform the normalization
8076 vDot = _mm_mul_ps(vDot,V);
8078 #else // _XM_VMX128_INTRINSICS_
8079 #endif // _XM_VMX128_INTRINSICS_
8082 //------------------------------------------------------------------------------
8084 XMFINLINE XMVECTOR XMVector3Normalize
8089 #if defined(_XM_NO_INTRINSICS_)
8093 vResult = XMVector3Length( V );
8094 fLength = vResult.vector4_f32[0];
8096 // Prevent divide by zero
8098 fLength = 1.0f/fLength;
8101 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
8102 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
8103 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
8104 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
8107 #elif defined(_XM_SSE_INTRINSICS_)
8108 // Perform the dot product on x,y and z only
8109 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
8110 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
8111 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8112 vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8113 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8114 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8115 // Prepare for the division
8116 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
8117 // Create zero with a single instruction
8118 XMVECTOR vZeroMask = _mm_setzero_ps();
8119 // Test for a divide by zero (Must be FP to detect -0.0)
8120 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
8121 // Failsafe on zero (Or epsilon) length planes
8122 // If the length is infinity, set the elements to zero
8123 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
8124 // Divide to perform the normalization
8125 vResult = _mm_div_ps(V,vResult);
8126 // Any that are infinity, set to zero
8127 vResult = _mm_and_ps(vResult,vZeroMask);
8128 // Select qnan or result based on infinite length
8129 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
8130 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
8131 vResult = _mm_or_ps(vTemp1,vTemp2);
8133 #else // _XM_VMX128_INTRINSICS_
8134 #endif // _XM_VMX128_INTRINSICS_
8137 //------------------------------------------------------------------------------
8139 XMFINLINE XMVECTOR XMVector3ClampLength
8146 #if defined(_XM_NO_INTRINSICS_)
8151 ClampMax = XMVectorReplicate(LengthMax);
8152 ClampMin = XMVectorReplicate(LengthMin);
8154 return XMVector3ClampLengthV(V, ClampMin, ClampMax);
8156 #elif defined(_XM_SSE_INTRINSICS_)
8157 XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
8158 XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
8159 return XMVector3ClampLengthV(V,ClampMin,ClampMax);
8160 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8161 #endif // _XM_VMX128_INTRINSICS_
8164 //------------------------------------------------------------------------------
8166 XMFINLINE XMVECTOR XMVector3ClampLengthV
8169 FXMVECTOR LengthMin,
8173 #if defined(_XM_NO_INTRINSICS_)
8175 XMVECTOR ClampLength;
8181 XMVECTOR InfiniteLength;
8182 XMVECTOR ZeroLength;
8184 XMVECTOR ControlMax;
8185 XMVECTOR ControlMin;
8189 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]));
8190 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]));
8191 XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
8192 XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
8193 XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8195 LengthSq = XMVector3LengthSq(V);
8197 Zero = XMVectorZero();
8199 RcpLength = XMVectorReciprocalSqrt(LengthSq);
8201 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
8202 ZeroLength = XMVectorEqual(LengthSq, Zero);
8204 Normal = XMVectorMultiply(V, RcpLength);
8206 Length = XMVectorMultiply(LengthSq, RcpLength);
8208 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8209 Length = XMVectorSelect(LengthSq, Length, Select);
8210 Normal = XMVectorSelect(LengthSq, Normal, Select);
8212 ControlMax = XMVectorGreater(Length, LengthMax);
8213 ControlMin = XMVectorLess(Length, LengthMin);
8215 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8216 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8218 Result = XMVectorMultiply(Normal, ClampLength);
8220 // Preserve the original vector (with no precision loss) if the length falls within the given range
8221 Control = XMVectorEqualInt(ControlMax, ControlMin);
8222 Result = XMVectorSelect(Result, V, Control);
8226 #elif defined(_XM_SSE_INTRINSICS_)
8227 XMVECTOR ClampLength;
8232 XMVECTOR InfiniteLength;
8233 XMVECTOR ZeroLength;
8235 XMVECTOR ControlMax;
8236 XMVECTOR ControlMin;
8240 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
8241 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
8242 XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
8243 XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
8244 XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8246 LengthSq = XMVector3LengthSq(V);
8247 RcpLength = XMVectorReciprocalSqrt(LengthSq);
8248 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
8249 ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
8250 Normal = _mm_mul_ps(V, RcpLength);
8251 Length = _mm_mul_ps(LengthSq, RcpLength);
8252 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8253 Length = XMVectorSelect(LengthSq, Length, Select);
8254 Normal = XMVectorSelect(LengthSq, Normal, Select);
8255 ControlMax = XMVectorGreater(Length, LengthMax);
8256 ControlMin = XMVectorLess(Length, LengthMin);
8257 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8258 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8259 Result = _mm_mul_ps(Normal, ClampLength);
8260 // Preserve the original vector (with no precision loss) if the length falls within the given range
8261 Control = XMVectorEqualInt(ControlMax, ControlMin);
8262 Result = XMVectorSelect(Result, V, Control);
8264 #else // _XM_VMX128_INTRINSICS_
8265 #endif // _XM_VMX128_INTRINSICS_
8268 //------------------------------------------------------------------------------
8270 XMFINLINE XMVECTOR XMVector3Reflect
8276 #if defined(_XM_NO_INTRINSICS_)
8280 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8281 Result = XMVector3Dot(Incident, Normal);
8282 Result = XMVectorAdd(Result, Result);
8283 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
8287 #elif defined(_XM_SSE_INTRINSICS_)
8288 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8289 XMVECTOR Result = XMVector3Dot(Incident, Normal);
8290 Result = _mm_add_ps(Result, Result);
8291 Result = _mm_mul_ps(Result, Normal);
8292 Result = _mm_sub_ps(Incident,Result);
8294 #else // _XM_VMX128_INTRINSICS_
8295 #endif // _XM_VMX128_INTRINSICS_
8298 //------------------------------------------------------------------------------
8300 XMFINLINE XMVECTOR XMVector3Refract
8304 FLOAT RefractionIndex
8307 #if defined(_XM_NO_INTRINSICS_)
8310 Index = XMVectorReplicate(RefractionIndex);
8311 return XMVector3RefractV(Incident, Normal, Index);
8313 #elif defined(_XM_SSE_INTRINSICS_)
8314 XMVECTOR Index = _mm_set_ps1(RefractionIndex);
8315 return XMVector3RefractV(Incident,Normal,Index);
8316 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8317 #endif // _XM_VMX128_INTRINSICS_
8320 //------------------------------------------------------------------------------
8322 XMFINLINE XMVECTOR XMVector3RefractV
8326 FXMVECTOR RefractionIndex
8329 #if defined(_XM_NO_INTRINSICS_)
8333 CONST XMVECTOR Zero = XMVectorZero();
8335 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
8336 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8338 IDotN = XMVector3Dot(Incident, Normal);
8340 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8341 R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
8342 R = XMVectorMultiply(R, RefractionIndex);
8343 R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
8345 if (XMVector4LessOrEqual(R, Zero))
8347 // Total internal reflection
8354 // R = RefractionIndex * IDotN + sqrt(R)
8355 R = XMVectorSqrt(R);
8356 R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
8358 // Result = RefractionIndex * Incident - Normal * R
8359 Result = XMVectorMultiply(RefractionIndex, Incident);
8360 Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
8365 #elif defined(_XM_SSE_INTRINSICS_)
8366 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
8367 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8368 XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
8369 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8370 XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
8371 R = _mm_sub_ps(g_XMOne,R);
8372 R = _mm_mul_ps(R, RefractionIndex);
8373 R = _mm_mul_ps(R, RefractionIndex);
8374 R = _mm_sub_ps(g_XMOne,R);
8376 XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
8377 if (_mm_movemask_ps(vResult)==0x0f)
8379 // Total internal reflection
8384 // R = RefractionIndex * IDotN + sqrt(R)
8386 vResult = _mm_mul_ps(RefractionIndex,IDotN);
8387 R = _mm_add_ps(R,vResult);
8388 // Result = RefractionIndex * Incident - Normal * R
8389 vResult = _mm_mul_ps(RefractionIndex, Incident);
8390 R = _mm_mul_ps(R,Normal);
8391 vResult = _mm_sub_ps(vResult,R);
8394 #else // _XM_VMX128_INTRINSICS_
8395 #endif // _XM_VMX128_INTRINSICS_
8398 //------------------------------------------------------------------------------
8400 XMFINLINE XMVECTOR XMVector3Orthogonal
8405 #if defined(_XM_NO_INTRINSICS_)
8409 XMVECTOR ZIsNegative, YZYYIsNegative;
8415 static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8416 static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8418 Zero = XMVectorZero();
8419 Z = XMVectorSplatZ(V);
8420 YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
8422 NegativeV = XMVectorSubtract(Zero, V);
8424 ZIsNegative = XMVectorLess(Z, Zero);
8425 YZYYIsNegative = XMVectorLess(YZYY, Zero);
8427 S = XMVectorAdd(YZYY, Z);
8428 D = XMVectorSubtract(YZYY, Z);
8430 Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8432 R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
8433 R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
8435 Result = XMVectorSelect(R1, R0, Select);
8439 #elif defined(_XM_SSE_INTRINSICS_)
8442 XMVECTOR ZIsNegative, YZYYIsNegative;
8448 static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8449 static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8451 Zero = XMVectorZero();
8452 Z = XMVectorSplatZ(V);
8453 YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
8455 NegativeV = _mm_sub_ps(Zero, V);
8457 ZIsNegative = XMVectorLess(Z, Zero);
8458 YZYYIsNegative = XMVectorLess(YZYY, Zero);
8460 S = _mm_add_ps(YZYY, Z);
8461 D = _mm_sub_ps(YZYY, Z);
8463 Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8465 R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
8466 R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
8467 Result = XMVectorSelect(R1, R0, Select);
8469 #else // _XM_VMX128_INTRINSICS_
8470 #endif // _XM_VMX128_INTRINSICS_
8473 //------------------------------------------------------------------------------
8475 XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
8481 #if defined(_XM_NO_INTRINSICS_)
8484 XMVECTOR NegativeOne;
8487 Result = XMVector3Dot(N1, N2);
8488 NegativeOne = XMVectorSplatConstant(-1, 0);
8489 One = XMVectorSplatOne();
8490 Result = XMVectorClamp(Result, NegativeOne, One);
8491 Result = XMVectorACosEst(Result);
8495 #elif defined(_XM_SSE_INTRINSICS_)
8496 XMVECTOR vResult = XMVector3Dot(N1,N2);
8497 // Clamp to -1.0f to 1.0f
8498 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8499 vResult = _mm_min_ps(vResult,g_XMOne);
8500 vResult = XMVectorACosEst(vResult);
8502 #else // _XM_VMX128_INTRINSICS_
8503 #endif // _XM_VMX128_INTRINSICS_
8506 //------------------------------------------------------------------------------
8508 XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
8514 #if defined(_XM_NO_INTRINSICS_)
8517 XMVECTOR NegativeOne;
8520 Result = XMVector3Dot(N1, N2);
8521 NegativeOne = XMVectorSplatConstant(-1, 0);
8522 One = XMVectorSplatOne();
8523 Result = XMVectorClamp(Result, NegativeOne, One);
8524 Result = XMVectorACos(Result);
8528 #elif defined(_XM_SSE_INTRINSICS_)
8529 XMVECTOR vResult = XMVector3Dot(N1,N2);
8530 // Clamp to -1.0f to 1.0f
8531 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8532 vResult = _mm_min_ps(vResult,g_XMOne);
8533 vResult = XMVectorACos(vResult);
8535 #else // _XM_VMX128_INTRINSICS_
8536 #endif // _XM_VMX128_INTRINSICS_
8539 //------------------------------------------------------------------------------
8541 XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
8547 #if defined(_XM_NO_INTRINSICS_)
8553 XMVECTOR NegativeOne;
8557 L1 = XMVector3ReciprocalLength(V1);
8558 L2 = XMVector3ReciprocalLength(V2);
8560 Dot = XMVector3Dot(V1, V2);
8562 L1 = XMVectorMultiply(L1, L2);
8564 NegativeOne = XMVectorSplatConstant(-1, 0);
8565 One = XMVectorSplatOne();
8567 CosAngle = XMVectorMultiply(Dot, L1);
8569 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
8571 Result = XMVectorACos(CosAngle);
8575 #elif defined(_XM_SSE_INTRINSICS_)
8582 L1 = XMVector3ReciprocalLength(V1);
8583 L2 = XMVector3ReciprocalLength(V2);
8584 Dot = XMVector3Dot(V1, V2);
8585 L1 = _mm_mul_ps(L1, L2);
8586 CosAngle = _mm_mul_ps(Dot, L1);
8587 CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
8588 Result = XMVectorACos(CosAngle);
8590 #else // _XM_VMX128_INTRINSICS_
8591 #endif // _XM_VMX128_INTRINSICS_
8594 //------------------------------------------------------------------------------
8596 XMFINLINE XMVECTOR XMVector3LinePointDistance
8598 FXMVECTOR LinePoint1,
8599 FXMVECTOR LinePoint2,
8603 #if defined(_XM_NO_INTRINSICS_)
8605 XMVECTOR PointVector;
8606 XMVECTOR LineVector;
8607 XMVECTOR ReciprocalLengthSq;
8608 XMVECTOR PointProjectionScale;
8609 XMVECTOR DistanceVector;
8612 // Given a vector PointVector from LinePoint1 to Point and a vector
8613 // LineVector from LinePoint1 to LinePoint2, the scaled distance
8614 // PointProjectionScale from LinePoint1 to the perpendicular projection
8615 // of PointVector onto the line is defined as:
8617 // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
8619 PointVector = XMVectorSubtract(Point, LinePoint1);
8620 LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
8622 ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8623 ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
8625 PointProjectionScale = XMVector3Dot(PointVector, LineVector);
8626 PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
8628 DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
8629 DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
8631 Result = XMVector3Length(DistanceVector);
8635 #elif defined(_XM_SSE_INTRINSICS_)
8636 XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
8637 XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
8638 XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8639 XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
8640 vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
8641 vResult = _mm_mul_ps(vResult,LineVector);
8642 vResult = _mm_sub_ps(PointVector,vResult);
8643 vResult = XMVector3Length(vResult);
8645 #else // _XM_VMX128_INTRINSICS_
8646 #endif // _XM_VMX128_INTRINSICS_
8649 //------------------------------------------------------------------------------
8651 XMFINLINE VOID XMVector3ComponentsFromNormal
8653 XMVECTOR* pParallel,
8654 XMVECTOR* pPerpendicular,
8659 #if defined(_XM_NO_INTRINSICS_)
8664 XMASSERT(pParallel);
8665 XMASSERT(pPerpendicular);
8667 Scale = XMVector3Dot(V, Normal);
8669 Parallel = XMVectorMultiply(Normal, Scale);
8671 *pParallel = Parallel;
8672 *pPerpendicular = XMVectorSubtract(V, Parallel);
8674 #elif defined(_XM_SSE_INTRINSICS_)
8675 XMASSERT(pParallel);
8676 XMASSERT(pPerpendicular);
8677 XMVECTOR Scale = XMVector3Dot(V, Normal);
8678 XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
8679 *pParallel = Parallel;
8680 *pPerpendicular = _mm_sub_ps(V,Parallel);
8681 #else // _XM_VMX128_INTRINSICS_
8682 #endif // _XM_VMX128_INTRINSICS_
8685 //------------------------------------------------------------------------------
8686 // Transform a vector using a rotation expressed as a unit quaternion
8688 XMFINLINE XMVECTOR XMVector3Rotate
8691 FXMVECTOR RotationQuaternion
8694 #if defined(_XM_NO_INTRINSICS_)
8700 A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8701 Q = XMQuaternionConjugate(RotationQuaternion);
8702 Result = XMQuaternionMultiply(Q, A);
8703 Result = XMQuaternionMultiply(Result, RotationQuaternion);
8707 #elif defined(_XM_SSE_INTRINSICS_)
8712 A = _mm_and_ps(V,g_XMMask3);
8713 Q = XMQuaternionConjugate(RotationQuaternion);
8714 Result = XMQuaternionMultiply(Q, A);
8715 Result = XMQuaternionMultiply(Result, RotationQuaternion);
8717 #else // _XM_VMX128_INTRINSICS_
8718 #endif // _XM_VMX128_INTRINSICS_
8721 //------------------------------------------------------------------------------
8722 // Transform a vector using the inverse of a rotation expressed as a unit quaternion
8724 XMFINLINE XMVECTOR XMVector3InverseRotate
8727 FXMVECTOR RotationQuaternion
8730 #if defined(_XM_NO_INTRINSICS_)
8736 A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8737 Result = XMQuaternionMultiply(RotationQuaternion, A);
8738 Q = XMQuaternionConjugate(RotationQuaternion);
8739 Result = XMQuaternionMultiply(Result, Q);
8743 #elif defined(_XM_SSE_INTRINSICS_)
8747 A = _mm_and_ps(V,g_XMMask3);
8748 Result = XMQuaternionMultiply(RotationQuaternion, A);
8749 Q = XMQuaternionConjugate(RotationQuaternion);
8750 Result = XMQuaternionMultiply(Result, Q);
8752 #else // _XM_VMX128_INTRINSICS_
8753 #endif // _XM_VMX128_INTRINSICS_
8756 //------------------------------------------------------------------------------
8758 XMFINLINE XMVECTOR XMVector3Transform
8764 #if defined(_XM_NO_INTRINSICS_)
8771 Z = XMVectorSplatZ(V);
8772 Y = XMVectorSplatY(V);
8773 X = XMVectorSplatX(V);
8775 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8776 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8777 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8781 #elif defined(_XM_SSE_INTRINSICS_)
8782 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8783 vResult = _mm_mul_ps(vResult,M.r[0]);
8784 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8785 vTemp = _mm_mul_ps(vTemp,M.r[1]);
8786 vResult = _mm_add_ps(vResult,vTemp);
8787 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8788 vTemp = _mm_mul_ps(vTemp,M.r[2]);
8789 vResult = _mm_add_ps(vResult,vTemp);
8790 vResult = _mm_add_ps(vResult,M.r[3]);
8792 #else // _XM_VMX128_INTRINSICS_
8793 #endif // _XM_VMX128_INTRINSICS_
8796 //------------------------------------------------------------------------------
8798 XMINLINE XMFLOAT4* XMVector3TransformStream
8800 XMFLOAT4* pOutputStream,
8801 size_t OutputStride,
8802 CONST XMFLOAT3* pInputStream,
8808 #if defined(_XM_NO_INTRINSICS_)
8816 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
8817 BYTE* pOutputVector = (BYTE*)pOutputStream;
8819 XMASSERT(pOutputStream);
8820 XMASSERT(pInputStream);
8822 for (i = 0; i < VectorCount; i++)
8824 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
8825 Z = XMVectorSplatZ(V);
8826 Y = XMVectorSplatY(V);
8827 X = XMVectorSplatX(V);
8829 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8830 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8831 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8833 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
8835 pInputVector += InputStride;
8836 pOutputVector += OutputStride;
8839 return pOutputStream;
8841 #elif defined(_XM_SSE_INTRINSICS_)
8842 XMASSERT(pOutputStream);
8843 XMASSERT(pInputStream);
8845 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
8846 BYTE* pOutputVector = (BYTE*)pOutputStream;
8848 for (i = 0; i < VectorCount; i++)
8850 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
8851 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
8852 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
8853 vResult = _mm_mul_ps(vResult,M.r[2]);
8854 vResult = _mm_add_ps(vResult,M.r[3]);
8855 Y = _mm_mul_ps(Y,M.r[1]);
8856 vResult = _mm_add_ps(vResult,Y);
8857 X = _mm_mul_ps(X,M.r[0]);
8858 vResult = _mm_add_ps(vResult,X);
8859 _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
8860 pInputVector += InputStride;
8861 pOutputVector += OutputStride;
8864 return pOutputStream;
8865 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8866 #endif // _XM_VMX128_INTRINSICS_
8869 //------------------------------------------------------------------------------
8871 XMINLINE XMFLOAT4* XMVector3TransformStreamNC
8873 XMFLOAT4* pOutputStream,
8874 size_t OutputStride,
8875 CONST XMFLOAT3* pInputStream,
8881 #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
8882 return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
8883 #else // _XM_VMX128_INTRINSICS_
8884 #endif // _XM_VMX128_INTRINSICS_
8887 //------------------------------------------------------------------------------
8889 XMFINLINE XMVECTOR XMVector3TransformCoord
8895 #if defined(_XM_NO_INTRINSICS_)
8903 Z = XMVectorSplatZ(V);
8904 Y = XMVectorSplatY(V);
8905 X = XMVectorSplatX(V);
8907 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8908 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8909 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8911 InverseW = XMVectorSplatW(Result);
8912 InverseW = XMVectorReciprocal(InverseW);
8914 Result = XMVectorMultiply(Result, InverseW);
8918 #elif defined(_XM_SSE_INTRINSICS_)
8919 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8920 vResult = _mm_mul_ps(vResult,M.r[0]);
8921 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8922 vTemp = _mm_mul_ps(vTemp,M.r[1]);
8923 vResult = _mm_add_ps(vResult,vTemp);
8924 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8925 vTemp = _mm_mul_ps(vTemp,M.r[2]);
8926 vResult = _mm_add_ps(vResult,vTemp);
8927 vResult = _mm_add_ps(vResult,M.r[3]);
8928 vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
8929 vResult = _mm_div_ps(vResult,vTemp);
8931 #else // _XM_VMX128_INTRINSICS_
8932 #endif // _XM_VMX128_INTRINSICS_
8935 //------------------------------------------------------------------------------
8937 XMINLINE XMFLOAT3* XMVector3TransformCoordStream
8939 XMFLOAT3* pOutputStream,
8940 size_t OutputStride,
8941 CONST XMFLOAT3* pInputStream,
8947 #if defined(_XM_NO_INTRINSICS_)
8956 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
8957 BYTE* pOutputVector = (BYTE*)pOutputStream;
8959 XMASSERT(pOutputStream);
8960 XMASSERT(pInputStream);
8962 for (i = 0; i < VectorCount; i++)
8964 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
8965 Z = XMVectorSplatZ(V);
8966 Y = XMVectorSplatY(V);
8967 X = XMVectorSplatX(V);
8968 // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
8969 // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
8970 // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
8972 Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8973 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8974 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8976 InverseW = XMVectorSplatW(Result);
8977 InverseW = XMVectorReciprocal(InverseW);
8979 Result = XMVectorMultiply(Result, InverseW);
8981 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
8983 pInputVector += InputStride;
8984 pOutputVector += OutputStride;
8987 return pOutputStream;
8989 #elif defined(_XM_SSE_INTRINSICS_)
8990 XMASSERT(pOutputStream);
8991 XMASSERT(pInputStream);
8994 CONST BYTE *pInputVector = (CONST BYTE*)pInputStream;
8995 BYTE *pOutputVector = (BYTE*)pOutputStream;
8997 for (i = 0; i < VectorCount; i++)
8999 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
9000 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
9001 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
9002 vResult = _mm_mul_ps(vResult,M.r[2]);
9003 vResult = _mm_add_ps(vResult,M.r[3]);
9004 Y = _mm_mul_ps(Y,M.r[1]);
9005 vResult = _mm_add_ps(vResult,Y);
9006 X = _mm_mul_ps(X,M.r[0]);
9007 vResult = _mm_add_ps(vResult,X);
9009 X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
9010 vResult = _mm_div_ps(vResult,X);
9011 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
9012 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9013 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
9014 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9015 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
9016 pInputVector += InputStride;
9017 pOutputVector += OutputStride;
9020 return pOutputStream;
9021 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9022 #endif // _XM_VMX128_INTRINSICS_
9025 //------------------------------------------------------------------------------
9027 XMFINLINE XMVECTOR XMVector3TransformNormal
9033 #if defined(_XM_NO_INTRINSICS_)
9040 Z = XMVectorSplatZ(V);
9041 Y = XMVectorSplatY(V);
9042 X = XMVectorSplatX(V);
9044 Result = XMVectorMultiply(Z, M.r[2]);
9045 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
9046 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
9050 #elif defined(_XM_SSE_INTRINSICS_)
9051 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
9052 vResult = _mm_mul_ps(vResult,M.r[0]);
9053 XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
9054 vTemp = _mm_mul_ps(vTemp,M.r[1]);
9055 vResult = _mm_add_ps(vResult,vTemp);
9056 vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
9057 vTemp = _mm_mul_ps(vTemp,M.r[2]);
9058 vResult = _mm_add_ps(vResult,vTemp);
9060 #else // _XM_VMX128_INTRINSICS_
9061 #endif // _XM_VMX128_INTRINSICS_
9064 //------------------------------------------------------------------------------
9066 XMINLINE XMFLOAT3* XMVector3TransformNormalStream
9068 XMFLOAT3* pOutputStream,
9069 size_t OutputStride,
9070 CONST XMFLOAT3* pInputStream,
9076 #if defined(_XM_NO_INTRINSICS_)
9084 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
9085 BYTE* pOutputVector = (BYTE*)pOutputStream;
9087 XMASSERT(pOutputStream);
9088 XMASSERT(pInputStream);
9090 for (i = 0; i < VectorCount; i++)
9092 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
9093 Z = XMVectorSplatZ(V);
9094 Y = XMVectorSplatY(V);
9095 X = XMVectorSplatX(V);
9096 // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
9097 // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
9098 // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
9100 Result = XMVectorMultiply(Z, M.r[2]);
9101 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
9102 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
9104 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9106 pInputVector += InputStride;
9107 pOutputVector += OutputStride;
9110 return pOutputStream;
9112 #elif defined(_XM_SSE_INTRINSICS_)
9113 XMASSERT(pOutputStream);
9114 XMASSERT(pInputStream);
9117 CONST BYTE *pInputVector = (CONST BYTE*)pInputStream;
9118 BYTE *pOutputVector = (BYTE*)pOutputStream;
9120 for (i = 0; i < VectorCount; i++)
9122 XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
9123 XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
9124 XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
9125 vResult = _mm_mul_ps(vResult,M.r[2]);
9126 Y = _mm_mul_ps(Y,M.r[1]);
9127 vResult = _mm_add_ps(vResult,Y);
9128 X = _mm_mul_ps(X,M.r[0]);
9129 vResult = _mm_add_ps(vResult,X);
9130 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
9131 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9132 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
9133 vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9134 _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
9135 pInputVector += InputStride;
9136 pOutputVector += OutputStride;
9139 return pOutputStream;
9140 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9141 #endif // _XM_VMX128_INTRINSICS_
9144 //------------------------------------------------------------------------------
9146 XMINLINE XMVECTOR XMVector3Project
9151 FLOAT ViewportWidth,
9152 FLOAT ViewportHeight,
9155 CXMMATRIX Projection,
9160 #if defined(_XM_NO_INTRINSICS_)
9166 FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
9167 FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
9169 Scale = XMVectorSet(HalfViewportWidth,
9170 -HalfViewportHeight,
9171 ViewportMaxZ - ViewportMinZ,
9174 Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9175 ViewportY + HalfViewportHeight,
9179 Transform = XMMatrixMultiply(World, View);
9180 Transform = XMMatrixMultiply(Transform, Projection);
9182 Result = XMVector3TransformCoord(V, Transform);
9184 Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9188 #elif defined(_XM_SSE_INTRINSICS_)
9193 FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
9194 FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
9196 Scale = XMVectorSet(HalfViewportWidth,
9197 -HalfViewportHeight,
9198 ViewportMaxZ - ViewportMinZ,
9201 Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9202 ViewportY + HalfViewportHeight,
9205 Transform = XMMatrixMultiply(World, View);
9206 Transform = XMMatrixMultiply(Transform, Projection);
9207 Result = XMVector3TransformCoord(V, Transform);
9208 Result = _mm_mul_ps(Result,Scale);
9209 Result = _mm_add_ps(Result,Offset);
9211 #else // _XM_VMX128_INTRINSICS_
9212 #endif // _XM_VMX128_INTRINSICS_
9215 //------------------------------------------------------------------------------
9217 XMINLINE XMFLOAT3* XMVector3ProjectStream
9219 XMFLOAT3* pOutputStream,
9220 size_t OutputStride,
9221 CONST XMFLOAT3* pInputStream,
9226 FLOAT ViewportWidth,
9227 FLOAT ViewportHeight,
9230 CXMMATRIX Projection,
9235 #if defined(_XM_NO_INTRINSICS_)
9243 FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
9244 FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
9245 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
9246 BYTE* pOutputVector = (BYTE*)pOutputStream;
9248 XMASSERT(pOutputStream);
9249 XMASSERT(pInputStream);
9251 Scale = XMVectorSet(HalfViewportWidth,
9252 -HalfViewportHeight,
9253 ViewportMaxZ - ViewportMinZ,
9256 Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9257 ViewportY + HalfViewportHeight,
9261 Transform = XMMatrixMultiply(World, View);
9262 Transform = XMMatrixMultiply(Transform, Projection);
9264 for (i = 0; i < VectorCount; i++)
9266 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
9268 Result = XMVector3TransformCoord(V, Transform);
9270 Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9272 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9274 pInputVector += InputStride;
9275 pOutputVector += OutputStride;
9278 return pOutputStream;
9280 #elif defined(_XM_SSE_INTRINSICS_)
9281 XMASSERT(pOutputStream);
9282 XMASSERT(pInputStream);
9289 FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
9290 FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
9291 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
9292 BYTE* pOutputVector = (BYTE*)pOutputStream;
9294 Scale = XMVectorSet(HalfViewportWidth,
9295 -HalfViewportHeight,
9296 ViewportMaxZ - ViewportMinZ,
9299 Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9300 ViewportY + HalfViewportHeight,
9304 Transform = XMMatrixMultiply(World, View);
9305 Transform = XMMatrixMultiply(Transform, Projection);
9307 for (i = 0; i < VectorCount; i++)
9309 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
9311 Result = XMVector3TransformCoord(V, Transform);
9313 Result = _mm_mul_ps(Result,Scale);
9314 Result = _mm_add_ps(Result,Offset);
9315 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9316 pInputVector += InputStride;
9317 pOutputVector += OutputStride;
9319 return pOutputStream;
9321 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9322 #endif // _XM_VMX128_INTRINSICS_
9325 //------------------------------------------------------------------------------
9327 XMFINLINE XMVECTOR XMVector3Unproject
9332 FLOAT ViewportWidth,
9333 FLOAT ViewportHeight,
9336 CXMMATRIX Projection,
9341 #if defined(_XM_NO_INTRINSICS_)
9346 XMVECTOR Determinant;
9348 CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9350 Scale = XMVectorSet(ViewportWidth * 0.5f,
9351 -ViewportHeight * 0.5f,
9352 ViewportMaxZ - ViewportMinZ,
9354 Scale = XMVectorReciprocal(Scale);
9356 Offset = XMVectorSet(-ViewportX,
9360 Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9362 Transform = XMMatrixMultiply(World, View);
9363 Transform = XMMatrixMultiply(Transform, Projection);
9364 Transform = XMMatrixInverse(&Determinant, Transform);
9366 Result = XMVectorMultiplyAdd(V, Scale, Offset);
9368 Result = XMVector3TransformCoord(Result, Transform);
9372 #elif defined(_XM_SSE_INTRINSICS_)
9376 XMVECTOR Determinant;
9378 CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
9380 Scale = XMVectorSet(ViewportWidth * 0.5f,
9381 -ViewportHeight * 0.5f,
9382 ViewportMaxZ - ViewportMinZ,
9384 Scale = XMVectorReciprocal(Scale);
9386 Offset = XMVectorSet(-ViewportX,
9390 Offset = _mm_mul_ps(Offset,Scale);
9391 Offset = _mm_add_ps(Offset,D);
9393 Transform = XMMatrixMultiply(World, View);
9394 Transform = XMMatrixMultiply(Transform, Projection);
9395 Transform = XMMatrixInverse(&Determinant, Transform);
9397 Result = _mm_mul_ps(V,Scale);
9398 Result = _mm_add_ps(Result,Offset);
9400 Result = XMVector3TransformCoord(Result, Transform);
9403 #else // _XM_VMX128_INTRINSICS_
9404 #endif // _XM_VMX128_INTRINSICS_
9407 //------------------------------------------------------------------------------
9409 XMINLINE XMFLOAT3* XMVector3UnprojectStream
9411 XMFLOAT3* pOutputStream,
9412 size_t OutputStride,
9413 CONST XMFLOAT3* pInputStream,
9418 FLOAT ViewportWidth,
9419 FLOAT ViewportHeight,
9422 CXMMATRIX Projection,
9426 #if defined(_XM_NO_INTRINSICS_)
9432 XMVECTOR Determinant;
9435 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
9436 BYTE* pOutputVector = (BYTE*)pOutputStream;
9437 CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9439 XMASSERT(pOutputStream);
9440 XMASSERT(pInputStream);
9442 Scale = XMVectorSet(ViewportWidth * 0.5f,
9443 -ViewportHeight * 0.5f,
9444 ViewportMaxZ - ViewportMinZ,
9446 Scale = XMVectorReciprocal(Scale);
9448 Offset = XMVectorSet(-ViewportX,
9452 Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9454 Transform = XMMatrixMultiply(World, View);
9455 Transform = XMMatrixMultiply(Transform, Projection);
9456 Transform = XMMatrixInverse(&Determinant, Transform);
9458 for (i = 0; i < VectorCount; i++)
9460 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
9462 Result = XMVectorMultiplyAdd(V, Scale, Offset);
9464 Result = XMVector3TransformCoord(Result, Transform);
9466 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9468 pInputVector += InputStride;
9469 pOutputVector += OutputStride;
9472 return pOutputStream;
9474 #elif defined(_XM_SSE_INTRINSICS_)
9475 XMASSERT(pOutputStream);
9476 XMASSERT(pInputStream);
9481 XMVECTOR Determinant;
9484 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
9485 BYTE* pOutputVector = (BYTE*)pOutputStream;
9486 CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
9488 Scale = XMVectorSet(ViewportWidth * 0.5f,
9489 -ViewportHeight * 0.5f,
9490 ViewportMaxZ - ViewportMinZ,
9492 Scale = XMVectorReciprocal(Scale);
9494 Offset = XMVectorSet(-ViewportX,
9498 Offset = _mm_mul_ps(Offset,Scale);
9499 Offset = _mm_add_ps(Offset,D);
9501 Transform = XMMatrixMultiply(World, View);
9502 Transform = XMMatrixMultiply(Transform, Projection);
9503 Transform = XMMatrixInverse(&Determinant, Transform);
9505 for (i = 0; i < VectorCount; i++)
9507 V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
9509 Result = XMVectorMultiplyAdd(V, Scale, Offset);
9511 Result = XMVector3TransformCoord(Result, Transform);
9513 XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9515 pInputVector += InputStride;
9516 pOutputVector += OutputStride;
9519 return pOutputStream;
9520 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9521 #endif // _XM_VMX128_INTRINSICS_
9524 /****************************************************************************
9528 ****************************************************************************/
9530 //------------------------------------------------------------------------------
9531 // Comparison operations
9532 //------------------------------------------------------------------------------
9534 //------------------------------------------------------------------------------
9536 XMFINLINE BOOL XMVector4Equal
9542 #if defined(_XM_NO_INTRINSICS_)
9543 return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
9544 #elif defined(_XM_SSE_INTRINSICS_)
9545 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9546 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9548 return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
9552 //------------------------------------------------------------------------------
9554 XMFINLINE UINT XMVector4EqualR
9560 #if defined(_XM_NO_INTRINSICS_)
9564 if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
9565 (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
9566 (V1.vector4_f32[2] == V2.vector4_f32[2]) &&
9567 (V1.vector4_f32[3] == V2.vector4_f32[3]))
9569 CR = XM_CRMASK_CR6TRUE;
9571 else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
9572 (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
9573 (V1.vector4_f32[2] != V2.vector4_f32[2]) &&
9574 (V1.vector4_f32[3] != V2.vector4_f32[3]))
9576 CR = XM_CRMASK_CR6FALSE;
9580 #elif defined(_XM_SSE_INTRINSICS_)
9581 XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9582 int iTest = _mm_movemask_ps(vTemp);
9584 if (iTest==0xf) // All equal?
9586 CR = XM_CRMASK_CR6TRUE;
9588 else if (iTest==0) // All not equal?
9590 CR = XM_CRMASK_CR6FALSE;
9593 #else // _XM_VMX128_INTRINSICS_
9594 #endif // _XM_VMX128_INTRINSICS_
9597 //------------------------------------------------------------------------------
9599 XMFINLINE BOOL XMVector4EqualInt
9605 #if defined(_XM_NO_INTRINSICS_)
9606 return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
9607 #elif defined(_XM_SSE_INTRINSICS_)
9608 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9609 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
9611 return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
9615 //------------------------------------------------------------------------------
9617 XMFINLINE UINT XMVector4EqualIntR
9623 #if defined(_XM_NO_INTRINSICS_)
9625 if (V1.vector4_u32[0] == V2.vector4_u32[0] &&
9626 V1.vector4_u32[1] == V2.vector4_u32[1] &&
9627 V1.vector4_u32[2] == V2.vector4_u32[2] &&
9628 V1.vector4_u32[3] == V2.vector4_u32[3])
9630 CR = XM_CRMASK_CR6TRUE;
9632 else if (V1.vector4_u32[0] != V2.vector4_u32[0] &&
9633 V1.vector4_u32[1] != V2.vector4_u32[1] &&
9634 V1.vector4_u32[2] != V2.vector4_u32[2] &&
9635 V1.vector4_u32[3] != V2.vector4_u32[3])
9637 CR = XM_CRMASK_CR6FALSE;
9641 #elif defined(_XM_SSE_INTRINSICS_)
9642 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9643 int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
9645 if (iTest==0xf) // All equal?
9647 CR = XM_CRMASK_CR6TRUE;
9649 else if (iTest==0) // All not equal?
9651 CR = XM_CRMASK_CR6FALSE;
9654 #else // _XM_VMX128_INTRINSICS_
9655 #endif // _XM_VMX128_INTRINSICS_
9658 XMFINLINE BOOL XMVector4NearEqual
9665 #if defined(_XM_NO_INTRINSICS_)
9666 FLOAT dx, dy, dz, dw;
9668 dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
9669 dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
9670 dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
9671 dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
9672 return (((dx <= Epsilon.vector4_f32[0]) &&
9673 (dy <= Epsilon.vector4_f32[1]) &&
9674 (dz <= Epsilon.vector4_f32[2]) &&
9675 (dw <= Epsilon.vector4_f32[3])) != 0);
9676 #elif defined(_XM_SSE_INTRINSICS_)
9677 // Get the difference
9678 XMVECTOR vDelta = _mm_sub_ps(V1,V2);
9679 // Get the absolute value of the difference
9680 XMVECTOR vTemp = _mm_setzero_ps();
9681 vTemp = _mm_sub_ps(vTemp,vDelta);
9682 vTemp = _mm_max_ps(vTemp,vDelta);
9683 vTemp = _mm_cmple_ps(vTemp,Epsilon);
9684 return ((_mm_movemask_ps(vTemp)==0xf) != 0);
9685 #else // _XM_VMX128_INTRINSICS_
9686 #endif // _XM_VMX128_INTRINSICS_
9689 //------------------------------------------------------------------------------
9691 XMFINLINE BOOL XMVector4NotEqual
9697 #if defined(_XM_NO_INTRINSICS_)
9698 return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
9699 #elif defined(_XM_SSE_INTRINSICS_)
9700 XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
9701 return ((_mm_movemask_ps(vTemp)) != 0);
9703 return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
9707 //------------------------------------------------------------------------------
9709 XMFINLINE BOOL XMVector4NotEqualInt
9715 #if defined(_XM_NO_INTRINSICS_)
9716 return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
9717 #elif defined(_XM_SSE_INTRINSICS_)
9718 __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9719 return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
9721 return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
9725 //------------------------------------------------------------------------------
9727 XMFINLINE BOOL XMVector4Greater
9733 #if defined(_XM_NO_INTRINSICS_)
9734 return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
9735 #elif defined(_XM_SSE_INTRINSICS_)
9736 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9737 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9739 return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
9743 //------------------------------------------------------------------------------
9745 XMFINLINE UINT XMVector4GreaterR
9751 #if defined(_XM_NO_INTRINSICS_)
9753 if (V1.vector4_f32[0] > V2.vector4_f32[0] &&
9754 V1.vector4_f32[1] > V2.vector4_f32[1] &&
9755 V1.vector4_f32[2] > V2.vector4_f32[2] &&
9756 V1.vector4_f32[3] > V2.vector4_f32[3])
9758 CR = XM_CRMASK_CR6TRUE;
9760 else if (V1.vector4_f32[0] <= V2.vector4_f32[0] &&
9761 V1.vector4_f32[1] <= V2.vector4_f32[1] &&
9762 V1.vector4_f32[2] <= V2.vector4_f32[2] &&
9763 V1.vector4_f32[3] <= V2.vector4_f32[3])
9765 CR = XM_CRMASK_CR6FALSE;
9769 #elif defined(_XM_SSE_INTRINSICS_)
9771 XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9772 int iTest = _mm_movemask_ps(vTemp);
9774 CR = XM_CRMASK_CR6TRUE;
9778 CR = XM_CRMASK_CR6FALSE;
9781 #else // _XM_VMX128_INTRINSICS_
9782 #endif // _XM_VMX128_INTRINSICS_
9785 //------------------------------------------------------------------------------
9787 XMFINLINE BOOL XMVector4GreaterOrEqual
9793 #if defined(_XM_NO_INTRINSICS_)
9794 return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
9795 #elif defined(_XM_SSE_INTRINSICS_)
9796 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9797 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9799 return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
9803 //------------------------------------------------------------------------------
9805 XMFINLINE UINT XMVector4GreaterOrEqualR
9811 #if defined(_XM_NO_INTRINSICS_)
9813 if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
9814 (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
9815 (V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
9816 (V1.vector4_f32[3] >= V2.vector4_f32[3]))
9818 CR = XM_CRMASK_CR6TRUE;
9820 else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
9821 (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
9822 (V1.vector4_f32[2] < V2.vector4_f32[2]) &&
9823 (V1.vector4_f32[3] < V2.vector4_f32[3]))
9825 CR = XM_CRMASK_CR6FALSE;
9829 #elif defined(_XM_SSE_INTRINSICS_)
9831 XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9832 int iTest = _mm_movemask_ps(vTemp);
9835 CR = XM_CRMASK_CR6TRUE;
9839 CR = XM_CRMASK_CR6FALSE;
9842 #else // _XM_VMX128_INTRINSICS_
9843 #endif // _XM_VMX128_INTRINSICS_
9846 //------------------------------------------------------------------------------
9848 XMFINLINE BOOL XMVector4Less
9854 #if defined(_XM_NO_INTRINSICS_)
9855 return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
9856 #elif defined(_XM_SSE_INTRINSICS_)
9857 XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
9858 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9860 return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
9864 //------------------------------------------------------------------------------
9866 XMFINLINE BOOL XMVector4LessOrEqual
9872 #if defined(_XM_NO_INTRINSICS_)
9873 return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
9874 #elif defined(_XM_SSE_INTRINSICS_)
9875 XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
9876 return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9878 return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
9882 //------------------------------------------------------------------------------
9884 XMFINLINE BOOL XMVector4InBounds
9890 #if defined(_XM_NO_INTRINSICS_)
9891 return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
9892 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9893 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9894 (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
9895 #elif defined(_XM_SSE_INTRINSICS_)
9896 // Test if less than or equal
9897 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9898 // Negate the bounds
9899 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9900 // Test if greater or equal (Reversed)
9901 vTemp2 = _mm_cmple_ps(vTemp2,V);
9903 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9905 return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
9907 return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
9911 //------------------------------------------------------------------------------
9913 XMFINLINE UINT XMVector4InBoundsR
9919 #if defined(_XM_NO_INTRINSICS_)
9922 if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
9923 (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9924 (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9925 (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]))
9927 CR = XM_CRMASK_CR6BOUNDS;
9931 #elif defined(_XM_SSE_INTRINSICS_)
9932 // Test if less than or equal
9933 XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9934 // Negate the bounds
9935 XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9936 // Test if greater or equal (Reversed)
9937 vTemp2 = _mm_cmple_ps(vTemp2,V);
9939 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9941 return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
9942 #else // _XM_VMX128_INTRINSICS_
9943 #endif // _XM_VMX128_INTRINSICS_
9946 //------------------------------------------------------------------------------
9948 XMFINLINE BOOL XMVector4IsNaN
9953 #if defined(_XM_NO_INTRINSICS_)
9954 return (XMISNAN(V.vector4_f32[0]) ||
9955 XMISNAN(V.vector4_f32[1]) ||
9956 XMISNAN(V.vector4_f32[2]) ||
9957 XMISNAN(V.vector4_f32[3]));
9958 #elif defined(_XM_SSE_INTRINSICS_)
9959 // Test against itself. NaN is always not equal
9960 XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
9961 // If any are NaN, the mask is non-zero
9962 return (_mm_movemask_ps(vTempNan)!=0);
9963 #else // _XM_VMX128_INTRINSICS_
9964 #endif // _XM_VMX128_INTRINSICS_
9967 //------------------------------------------------------------------------------
9969 XMFINLINE BOOL XMVector4IsInfinite
9974 #if defined(_XM_NO_INTRINSICS_)
9976 return (XMISINF(V.vector4_f32[0]) ||
9977 XMISINF(V.vector4_f32[1]) ||
9978 XMISINF(V.vector4_f32[2]) ||
9979 XMISINF(V.vector4_f32[3]));
9981 #elif defined(_XM_SSE_INTRINSICS_)
9982 // Mask off the sign bit
9983 XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
9984 // Compare to infinity
9985 vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
9986 // If any are infinity, the signs are true.
9987 return (_mm_movemask_ps(vTemp) != 0);
9988 #else // _XM_VMX128_INTRINSICS_
9989 #endif // _XM_VMX128_INTRINSICS_
9992 //------------------------------------------------------------------------------
9993 // Computation operations
9994 //------------------------------------------------------------------------------
9996 //------------------------------------------------------------------------------
9998 XMFINLINE XMVECTOR XMVector4Dot
10004 #if defined(_XM_NO_INTRINSICS_)
10008 Result.vector4_f32[0] =
10009 Result.vector4_f32[1] =
10010 Result.vector4_f32[2] =
10011 Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
10015 #elif defined(_XM_SSE_INTRINSICS_)
10016 XMVECTOR vTemp2 = V2;
10017 XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
10018 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
10019 vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
10020 vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
10021 vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
10022 return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
10023 #else // _XM_VMX128_INTRINSICS_
10024 #endif // _XM_VMX128_INTRINSICS_
10027 //------------------------------------------------------------------------------
10029 XMFINLINE XMVECTOR XMVector4Cross
10036 #if defined(_XM_NO_INTRINSICS_)
10039 Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]);
10040 Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]);
10041 Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]);
10042 Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]);
10045 #elif defined(_XM_SSE_INTRINSICS_)
10047 XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
10048 XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
10049 vResult = _mm_mul_ps(vResult,vTemp3);
10050 // - V2wzwy * V3zwyz
10051 XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
10052 vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
10053 vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
10054 vResult = _mm_sub_ps(vResult,vTemp2);
10056 XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
10057 vResult = _mm_mul_ps(vResult,vTemp1);
10060 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
10061 vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
10062 vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
10063 // - V2wxwx * V3ywxz
10064 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
10065 vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
10066 vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
10067 vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
10068 // vResult - temp * V1zzyy
10069 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
10070 vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
10071 vResult = _mm_sub_ps(vResult,vTemp1);
10074 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
10075 vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
10076 vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
10077 // - V2zxyx * V3yzxy
10078 vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
10079 vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
10080 vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
10081 vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
10082 // vResult + term * V1wwwz
10083 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
10084 vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
10085 vResult = _mm_add_ps(vResult,vTemp3);
10087 #else // _XM_VMX128_INTRINSICS_
10088 #endif // _XM_VMX128_INTRINSICS_
10091 //------------------------------------------------------------------------------
10093 XMFINLINE XMVECTOR XMVector4LengthSq
10098 return XMVector4Dot(V, V);
10101 //------------------------------------------------------------------------------
10103 XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
10108 #if defined(_XM_NO_INTRINSICS_)
10112 Result = XMVector4LengthSq(V);
10113 Result = XMVectorReciprocalSqrtEst(Result);
10117 #elif defined(_XM_SSE_INTRINSICS_)
10118 // Perform the dot product on x,y,z and w
10119 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10120 // vTemp has z and w
10121 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10123 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10125 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10127 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10128 // ??,??,x+z+y+w,??
10129 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10130 // Splat the length
10131 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10132 // Get the reciprocal
10133 vLengthSq = _mm_rsqrt_ps(vLengthSq);
10135 #else // _XM_VMX128_INTRINSICS_
10136 #endif // _XM_VMX128_INTRINSICS_
10139 //------------------------------------------------------------------------------
10141 XMFINLINE XMVECTOR XMVector4ReciprocalLength
10146 #if defined(_XM_NO_INTRINSICS_)
10150 Result = XMVector4LengthSq(V);
10151 Result = XMVectorReciprocalSqrt(Result);
10155 #elif defined(_XM_SSE_INTRINSICS_)
10156 // Perform the dot product on x,y,z and w
10157 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10158 // vTemp has z and w
10159 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10161 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10163 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10165 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10166 // ??,??,x+z+y+w,??
10167 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10168 // Splat the length
10169 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10170 // Get the reciprocal
10171 vLengthSq = _mm_sqrt_ps(vLengthSq);
10173 vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
10175 #else // _XM_VMX128_INTRINSICS_
10176 #endif // _XM_VMX128_INTRINSICS_
10179 //------------------------------------------------------------------------------
10181 XMFINLINE XMVECTOR XMVector4LengthEst
10186 #if defined(_XM_NO_INTRINSICS_)
10190 Result = XMVector4LengthSq(V);
10191 Result = XMVectorSqrtEst(Result);
10195 #elif defined(_XM_SSE_INTRINSICS_)
10196 // Perform the dot product on x,y,z and w
10197 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10198 // vTemp has z and w
10199 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10201 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10203 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10205 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10206 // ??,??,x+z+y+w,??
10207 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10208 // Splat the length
10209 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10210 // Prepare for the division
10211 vLengthSq = _mm_sqrt_ps(vLengthSq);
10213 #else // _XM_VMX128_INTRINSICS_
10214 #endif // _XM_VMX128_INTRINSICS_
10217 //------------------------------------------------------------------------------
10219 XMFINLINE XMVECTOR XMVector4Length
10224 #if defined(_XM_NO_INTRINSICS_)
10228 Result = XMVector4LengthSq(V);
10229 Result = XMVectorSqrt(Result);
10233 #elif defined(_XM_SSE_INTRINSICS_)
10234 // Perform the dot product on x,y,z and w
10235 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10236 // vTemp has z and w
10237 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10239 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10241 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10243 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10244 // ??,??,x+z+y+w,??
10245 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10246 // Splat the length
10247 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10248 // Prepare for the division
10249 vLengthSq = _mm_sqrt_ps(vLengthSq);
10251 #else // _XM_VMX128_INTRINSICS_
10252 #endif // _XM_VMX128_INTRINSICS_
10255 //------------------------------------------------------------------------------
10256 // XMVector4NormalizeEst uses a reciprocal estimate and
10257 // returns QNaN on zero and infinite vectors.
10259 XMFINLINE XMVECTOR XMVector4NormalizeEst
10264 #if defined(_XM_NO_INTRINSICS_)
10267 Result = XMVector4ReciprocalLength(V);
10268 Result = XMVectorMultiply(V, Result);
10271 #elif defined(_XM_SSE_INTRINSICS_)
10272 // Perform the dot product on x,y,z and w
10273 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10274 // vTemp has z and w
10275 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10277 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10279 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10281 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10282 // ??,??,x+z+y+w,??
10283 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10284 // Splat the length
10285 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10286 // Get the reciprocal
10287 XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
10288 // Reciprocal mul to perform the normalization
10289 vResult = _mm_mul_ps(vResult,V);
10291 #else // _XM_VMX128_INTRINSICS_
10292 #endif // _XM_VMX128_INTRINSICS_
10295 //------------------------------------------------------------------------------
10297 XMFINLINE XMVECTOR XMVector4Normalize
10302 #if defined(_XM_NO_INTRINSICS_)
10306 vResult = XMVector4Length( V );
10307 fLength = vResult.vector4_f32[0];
10309 // Prevent divide by zero
10311 fLength = 1.0f/fLength;
10314 vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
10315 vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
10316 vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
10317 vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
10320 #elif defined(_XM_SSE_INTRINSICS_)
10321 // Perform the dot product on x,y,z and w
10322 XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10323 // vTemp has z and w
10324 XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10326 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10328 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10330 vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10331 // ??,??,x+z+y+w,??
10332 vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10333 // Splat the length
10334 vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10335 // Prepare for the division
10336 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
10337 // Create zero with a single instruction
10338 XMVECTOR vZeroMask = _mm_setzero_ps();
10339 // Test for a divide by zero (Must be FP to detect -0.0)
10340 vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
10341 // Failsafe on zero (Or epsilon) length planes
10342 // If the length is infinity, set the elements to zero
10343 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
10344 // Divide to perform the normalization
10345 vResult = _mm_div_ps(V,vResult);
10346 // Any that are infinity, set to zero
10347 vResult = _mm_and_ps(vResult,vZeroMask);
10348 // Select qnan or result based on infinite length
10349 XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
10350 XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
10351 vResult = _mm_or_ps(vTemp1,vTemp2);
10353 #else // _XM_VMX128_INTRINSICS_
10354 #endif // _XM_VMX128_INTRINSICS_
10357 //------------------------------------------------------------------------------
10359 XMFINLINE XMVECTOR XMVector4ClampLength
10366 #if defined(_XM_NO_INTRINSICS_)
10371 ClampMax = XMVectorReplicate(LengthMax);
10372 ClampMin = XMVectorReplicate(LengthMin);
10374 return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10376 #elif defined(_XM_SSE_INTRINSICS_)
10377 XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
10378 XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
10379 return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10380 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10381 #endif // _XM_VMX128_INTRINSICS_
10384 //------------------------------------------------------------------------------
10386 XMFINLINE XMVECTOR XMVector4ClampLengthV
10389 FXMVECTOR LengthMin,
10390 FXMVECTOR LengthMax
10393 #if defined(_XM_NO_INTRINSICS_)
10395 XMVECTOR ClampLength;
10397 XMVECTOR RcpLength;
10401 XMVECTOR InfiniteLength;
10402 XMVECTOR ZeroLength;
10404 XMVECTOR ControlMax;
10405 XMVECTOR ControlMin;
10409 XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0]));
10410 XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0]));
10411 XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
10412 XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
10413 XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10415 LengthSq = XMVector4LengthSq(V);
10417 Zero = XMVectorZero();
10419 RcpLength = XMVectorReciprocalSqrt(LengthSq);
10421 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
10422 ZeroLength = XMVectorEqual(LengthSq, Zero);
10424 Normal = XMVectorMultiply(V, RcpLength);
10426 Length = XMVectorMultiply(LengthSq, RcpLength);
10428 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10429 Length = XMVectorSelect(LengthSq, Length, Select);
10430 Normal = XMVectorSelect(LengthSq, Normal, Select);
10432 ControlMax = XMVectorGreater(Length, LengthMax);
10433 ControlMin = XMVectorLess(Length, LengthMin);
10435 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10436 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10438 Result = XMVectorMultiply(Normal, ClampLength);
10440 // Preserve the original vector (with no precision loss) if the length falls within the given range
10441 Control = XMVectorEqualInt(ControlMax, ControlMin);
10442 Result = XMVectorSelect(Result, V, Control);
10446 #elif defined(_XM_SSE_INTRINSICS_)
10447 XMVECTOR ClampLength;
10449 XMVECTOR RcpLength;
10453 XMVECTOR InfiniteLength;
10454 XMVECTOR ZeroLength;
10456 XMVECTOR ControlMax;
10457 XMVECTOR ControlMin;
10461 XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
10462 XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
10463 XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
10464 XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
10465 XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10467 LengthSq = XMVector4LengthSq(V);
10468 Zero = XMVectorZero();
10469 RcpLength = XMVectorReciprocalSqrt(LengthSq);
10470 InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
10471 ZeroLength = XMVectorEqual(LengthSq, Zero);
10472 Normal = _mm_mul_ps(V, RcpLength);
10473 Length = _mm_mul_ps(LengthSq, RcpLength);
10474 Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10475 Length = XMVectorSelect(LengthSq, Length, Select);
10476 Normal = XMVectorSelect(LengthSq, Normal, Select);
10477 ControlMax = XMVectorGreater(Length, LengthMax);
10478 ControlMin = XMVectorLess(Length, LengthMin);
10479 ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10480 ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10481 Result = _mm_mul_ps(Normal, ClampLength);
10482 // Preserve the original vector (with no precision loss) if the length falls within the given range
10483 Control = XMVectorEqualInt(ControlMax,ControlMin);
10484 Result = XMVectorSelect(Result,V,Control);
10487 #else // _XM_VMX128_INTRINSICS_
10488 #endif // _XM_VMX128_INTRINSICS_
10491 //------------------------------------------------------------------------------
10493 XMFINLINE XMVECTOR XMVector4Reflect
10495 FXMVECTOR Incident,
10499 #if defined(_XM_NO_INTRINSICS_)
10503 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10504 Result = XMVector4Dot(Incident, Normal);
10505 Result = XMVectorAdd(Result, Result);
10506 Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
10510 #elif defined(_XM_SSE_INTRINSICS_)
10511 // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10512 XMVECTOR Result = XMVector4Dot(Incident,Normal);
10513 Result = _mm_add_ps(Result,Result);
10514 Result = _mm_mul_ps(Result,Normal);
10515 Result = _mm_sub_ps(Incident,Result);
10517 #else // _XM_VMX128_INTRINSICS_
10518 #endif // _XM_VMX128_INTRINSICS_
10521 //------------------------------------------------------------------------------
10523 XMFINLINE XMVECTOR XMVector4Refract
10525 FXMVECTOR Incident,
10527 FLOAT RefractionIndex
10530 #if defined(_XM_NO_INTRINSICS_)
10533 Index = XMVectorReplicate(RefractionIndex);
10534 return XMVector4RefractV(Incident, Normal, Index);
10536 #elif defined(_XM_SSE_INTRINSICS_)
10537 XMVECTOR Index = _mm_set_ps1(RefractionIndex);
10538 return XMVector4RefractV(Incident,Normal,Index);
10539 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10540 #endif // _XM_VMX128_INTRINSICS_
10543 //------------------------------------------------------------------------------
10545 XMFINLINE XMVECTOR XMVector4RefractV
10547 FXMVECTOR Incident,
10549 FXMVECTOR RefractionIndex
10552 #if defined(_XM_NO_INTRINSICS_)
10556 CONST XMVECTOR Zero = XMVectorZero();
10558 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
10559 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10561 IDotN = XMVector4Dot(Incident, Normal);
10563 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10564 R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
10565 R = XMVectorMultiply(R, RefractionIndex);
10566 R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
10568 if (XMVector4LessOrEqual(R, Zero))
10570 // Total internal reflection
10577 // R = RefractionIndex * IDotN + sqrt(R)
10578 R = XMVectorSqrt(R);
10579 R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
10581 // Result = RefractionIndex * Incident - Normal * R
10582 Result = XMVectorMultiply(RefractionIndex, Incident);
10583 Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
10588 #elif defined(_XM_SSE_INTRINSICS_)
10589 // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
10590 // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10592 XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
10594 // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10595 XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
10596 R = _mm_sub_ps(g_XMOne,R);
10597 R = _mm_mul_ps(R, RefractionIndex);
10598 R = _mm_mul_ps(R, RefractionIndex);
10599 R = _mm_sub_ps(g_XMOne,R);
10601 XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
10602 if (_mm_movemask_ps(vResult)==0x0f)
10604 // Total internal reflection
10605 vResult = g_XMZero;
10609 // R = RefractionIndex * IDotN + sqrt(R)
10610 R = _mm_sqrt_ps(R);
10611 vResult = _mm_mul_ps(RefractionIndex, IDotN);
10612 R = _mm_add_ps(R,vResult);
10613 // Result = RefractionIndex * Incident - Normal * R
10614 vResult = _mm_mul_ps(RefractionIndex, Incident);
10615 R = _mm_mul_ps(R,Normal);
10616 vResult = _mm_sub_ps(vResult,R);
10619 #else // _XM_VMX128_INTRINSICS_
10620 #endif // _XM_VMX128_INTRINSICS_
10623 //------------------------------------------------------------------------------
10625 XMFINLINE XMVECTOR XMVector4Orthogonal
10630 #if defined(_XM_NO_INTRINSICS_)
10633 Result.vector4_f32[0] = V.vector4_f32[2];
10634 Result.vector4_f32[1] = V.vector4_f32[3];
10635 Result.vector4_f32[2] = -V.vector4_f32[0];
10636 Result.vector4_f32[3] = -V.vector4_f32[1];
10639 #elif defined(_XM_SSE_INTRINSICS_)
10640 static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
10641 XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
10642 vResult = _mm_mul_ps(vResult,FlipZW);
10644 #else // _XM_VMX128_INTRINSICS_
10645 #endif // _XM_VMX128_INTRINSICS_
10648 //------------------------------------------------------------------------------
10650 XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
10656 #if defined(_XM_NO_INTRINSICS_)
10658 XMVECTOR NegativeOne;
10662 Result = XMVector4Dot(N1, N2);
10663 NegativeOne = XMVectorSplatConstant(-1, 0);
10664 One = XMVectorSplatOne();
10665 Result = XMVectorClamp(Result, NegativeOne, One);
10666 Result = XMVectorACosEst(Result);
10670 #elif defined(_XM_SSE_INTRINSICS_)
10671 XMVECTOR vResult = XMVector4Dot(N1,N2);
10672 // Clamp to -1.0f to 1.0f
10673 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10674 vResult = _mm_min_ps(vResult,g_XMOne);;
10675 vResult = XMVectorACosEst(vResult);
10677 #else // _XM_VMX128_INTRINSICS_
10678 #endif // _XM_VMX128_INTRINSICS_
10681 //------------------------------------------------------------------------------
10683 XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
10689 #if defined(_XM_NO_INTRINSICS_)
10691 XMVECTOR NegativeOne;
10695 Result = XMVector4Dot(N1, N2);
10696 NegativeOne = XMVectorSplatConstant(-1, 0);
10697 One = XMVectorSplatOne();
10698 Result = XMVectorClamp(Result, NegativeOne, One);
10699 Result = XMVectorACos(Result);
10703 #elif defined(_XM_SSE_INTRINSICS_)
10704 XMVECTOR vResult = XMVector4Dot(N1,N2);
10705 // Clamp to -1.0f to 1.0f
10706 vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10707 vResult = _mm_min_ps(vResult,g_XMOne);;
10708 vResult = XMVectorACos(vResult);
10710 #else // _XM_VMX128_INTRINSICS_
10711 #endif // _XM_VMX128_INTRINSICS_
10714 //------------------------------------------------------------------------------
10716 XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
10722 #if defined(_XM_NO_INTRINSICS_)
10728 XMVECTOR NegativeOne;
10732 L1 = XMVector4ReciprocalLength(V1);
10733 L2 = XMVector4ReciprocalLength(V2);
10735 Dot = XMVector4Dot(V1, V2);
10737 L1 = XMVectorMultiply(L1, L2);
10739 CosAngle = XMVectorMultiply(Dot, L1);
10740 NegativeOne = XMVectorSplatConstant(-1, 0);
10741 One = XMVectorSplatOne();
10742 CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
10744 Result = XMVectorACos(CosAngle);
10748 #elif defined(_XM_SSE_INTRINSICS_)
10755 L1 = XMVector4ReciprocalLength(V1);
10756 L2 = XMVector4ReciprocalLength(V2);
10757 Dot = XMVector4Dot(V1, V2);
10758 L1 = _mm_mul_ps(L1,L2);
10759 CosAngle = _mm_mul_ps(Dot,L1);
10760 CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
10761 Result = XMVectorACos(CosAngle);
10764 #else // _XM_VMX128_INTRINSICS_
10765 #endif // _XM_VMX128_INTRINSICS_
10768 //------------------------------------------------------------------------------
10770 XMFINLINE XMVECTOR XMVector4Transform
10776 #if defined(_XM_NO_INTRINSICS_)
10777 FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]);
10778 FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]);
10779 FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]);
10780 FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]);
10781 XMVECTOR vResult = {
10789 #elif defined(_XM_SSE_INTRINSICS_)
10790 // Splat x,y,z and w
10791 XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
10792 XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
10793 XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
10794 XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
10795 // Mul by the matrix
10796 vTempX = _mm_mul_ps(vTempX,M.r[0]);
10797 vTempY = _mm_mul_ps(vTempY,M.r[1]);
10798 vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
10799 vTempW = _mm_mul_ps(vTempW,M.r[3]);
10800 // Add them all together
10801 vTempX = _mm_add_ps(vTempX,vTempY);
10802 vTempZ = _mm_add_ps(vTempZ,vTempW);
10803 vTempX = _mm_add_ps(vTempX,vTempZ);
10805 #else // _XM_VMX128_INTRINSICS_
10806 #endif // _XM_VMX128_INTRINSICS_
10809 //------------------------------------------------------------------------------
10811 XMINLINE XMFLOAT4* XMVector4TransformStream
10813 XMFLOAT4* pOutputStream,
10814 size_t OutputStride,
10815 CONST XMFLOAT4* pInputStream,
10816 size_t InputStride,
10817 size_t VectorCount,
10821 #if defined(_XM_NO_INTRINSICS_)
10830 CONST BYTE* pInputVector = (CONST BYTE*)pInputStream;
10831 BYTE* pOutputVector = (BYTE*)pOutputStream;
10833 XMASSERT(pOutputStream);
10834 XMASSERT(pInputStream);
10836 for (i = 0; i < VectorCount; i++)
10838 V = XMLoadFloat4((const XMFLOAT4*)pInputVector);
10839 W = XMVectorSplatW(V);
10840 Z = XMVectorSplatZ(V);
10841 Y = XMVectorSplatY(V);
10842 X = XMVectorSplatX(V);
10843 // W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
10844 // Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
10845 // Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
10846 // X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
10848 Result = XMVectorMultiply(W, M.r[3]);
10849 Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
10850 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
10851 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
10853 XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
10855 pInputVector += InputStride;
10856 pOutputVector += OutputStride;
10859 return pOutputStream;
10861 #elif defined(_XM_SSE_INTRINSICS_)
10864 XMASSERT(pOutputStream);
10865 XMASSERT(pInputStream);
10867 const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream);
10868 BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
10869 for (i = 0; i < VectorCount; i++)
10871 // Fetch the row and splat it
10872 XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
10873 XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
10874 XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
10875 XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
10876 vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
10877 vTempx = _mm_mul_ps(vTempx,M.r[0]);
10878 vTempy = _mm_mul_ps(vTempy,M.r[1]);
10879 vTempz = _mm_mul_ps(vTempz,M.r[2]);
10880 vTempw = _mm_mul_ps(vTempw,M.r[3]);
10881 vTempx = _mm_add_ps(vTempx,vTempy);
10882 vTempw = _mm_add_ps(vTempw,vTempz);
10883 vTempw = _mm_add_ps(vTempw,vTempx);
10884 // Store the transformed vector
10885 _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
10887 pInputVector += InputStride;
10888 pOutputVector += OutputStride;
10890 return pOutputStream;
10891 #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10892 #endif // _XM_VMX128_INTRINSICS_
10897 /****************************************************************************
10899 * XMVECTOR operators
10901 ****************************************************************************/
10903 #ifndef XM_NO_OPERATOR_OVERLOADS
10905 //------------------------------------------------------------------------------
10907 XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
10912 //------------------------------------------------------------------------------
10914 XMFINLINE XMVECTOR operator- (FXMVECTOR V)
10916 return XMVectorNegate(V);
10919 //------------------------------------------------------------------------------
10921 XMFINLINE XMVECTOR& operator+=
10927 V1 = XMVectorAdd(V1, V2);
10931 //------------------------------------------------------------------------------
10933 XMFINLINE XMVECTOR& operator-=
10939 V1 = XMVectorSubtract(V1, V2);
10943 //------------------------------------------------------------------------------
10945 XMFINLINE XMVECTOR& operator*=
10951 V1 = XMVectorMultiply(V1, V2);
10955 //------------------------------------------------------------------------------
10957 XMFINLINE XMVECTOR& operator/=
10963 V1 = XMVectorDivide(V1,V2);
10967 //------------------------------------------------------------------------------
10969 XMFINLINE XMVECTOR& operator*=
10975 V = XMVectorScale(V, S);
10979 //------------------------------------------------------------------------------
10981 XMFINLINE XMVECTOR& operator/=
10987 V = XMVectorScale(V, 1.0f / S);
10991 //------------------------------------------------------------------------------
10993 XMFINLINE XMVECTOR operator+
10999 return XMVectorAdd(V1, V2);
11002 //------------------------------------------------------------------------------
11004 XMFINLINE XMVECTOR operator-
11010 return XMVectorSubtract(V1, V2);
11013 //------------------------------------------------------------------------------
11015 XMFINLINE XMVECTOR operator*
11021 return XMVectorMultiply(V1, V2);
11024 //------------------------------------------------------------------------------
11026 XMFINLINE XMVECTOR operator/
11032 return XMVectorDivide(V1,V2);
11035 //------------------------------------------------------------------------------
11037 XMFINLINE XMVECTOR operator*
11043 return XMVectorScale(V, S);
11046 //------------------------------------------------------------------------------
11048 XMFINLINE XMVECTOR operator/
11054 return XMVectorScale(V, 1.0f / S);
11057 //------------------------------------------------------------------------------
11059 XMFINLINE XMVECTOR operator*
11065 return XMVectorScale(V, S);
11068 #endif // !XM_NO_OPERATOR_OVERLOADS
11070 /****************************************************************************
11072 * XMFLOAT2 operators
11074 ****************************************************************************/
11076 //------------------------------------------------------------------------------
11078 XMFINLINE _XMFLOAT2::_XMFLOAT2
11080 CONST FLOAT* pArray
11087 //------------------------------------------------------------------------------
11089 XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
11091 CONST _XMFLOAT2& Float2
11099 //------------------------------------------------------------------------------
11101 XMFINLINE XMFLOAT2A& XMFLOAT2A::operator=
11103 CONST XMFLOAT2A& Float2
11111 /****************************************************************************
11115 ****************************************************************************/
11117 XMFINLINE _XMINT2::_XMINT2
11126 //------------------------------------------------------------------------------
11128 XMFINLINE XMINT2& _XMINT2::operator=
11130 CONST _XMINT2& Int2
11138 /****************************************************************************
11140 * XMUINT2 operators
11142 ****************************************************************************/
11144 XMFINLINE _XMUINT2::_XMUINT2
11153 //------------------------------------------------------------------------------
11155 XMFINLINE XMUINT2& _XMUINT2::operator=
11157 CONST _XMUINT2& UInt2
11165 /****************************************************************************
11167 * XMHALF2 operators
11169 ****************************************************************************/
11171 //------------------------------------------------------------------------------
11173 XMFINLINE _XMHALF2::_XMHALF2
11182 //------------------------------------------------------------------------------
11184 XMFINLINE _XMHALF2::_XMHALF2
11190 x = XMConvertFloatToHalf(_x);
11191 y = XMConvertFloatToHalf(_y);
11194 //------------------------------------------------------------------------------
11196 XMFINLINE _XMHALF2::_XMHALF2
11198 CONST FLOAT* pArray
11201 x = XMConvertFloatToHalf(pArray[0]);
11202 y = XMConvertFloatToHalf(pArray[1]);
11205 //------------------------------------------------------------------------------
11207 XMFINLINE _XMHALF2& _XMHALF2::operator=
11209 CONST _XMHALF2& Half2
11217 /****************************************************************************
11219 * XMSHORTN2 operators
11221 ****************************************************************************/
11223 //------------------------------------------------------------------------------
11225 XMFINLINE _XMSHORTN2::_XMSHORTN2
11227 CONST SHORT* pArray
11234 //------------------------------------------------------------------------------
11236 XMFINLINE _XMSHORTN2::_XMSHORTN2
11242 XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11245 //------------------------------------------------------------------------------
11247 XMFINLINE _XMSHORTN2::_XMSHORTN2
11249 CONST FLOAT* pArray
11252 XMStoreShortN2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11255 //------------------------------------------------------------------------------
11257 XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
11259 CONST _XMSHORTN2& ShortN2
11267 /****************************************************************************
11269 * XMSHORT2 operators
11271 ****************************************************************************/
11273 //------------------------------------------------------------------------------
11275 XMFINLINE _XMSHORT2::_XMSHORT2
11277 CONST SHORT* pArray
11284 //------------------------------------------------------------------------------
11286 XMFINLINE _XMSHORT2::_XMSHORT2
11292 XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11295 //------------------------------------------------------------------------------
11297 XMFINLINE _XMSHORT2::_XMSHORT2
11299 CONST FLOAT* pArray
11302 XMStoreShort2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11305 //------------------------------------------------------------------------------
11307 XMFINLINE _XMSHORT2& _XMSHORT2::operator=
11309 CONST _XMSHORT2& Short2
11317 /****************************************************************************
11319 * XMUSHORTN2 operators
11321 ****************************************************************************/
11323 //------------------------------------------------------------------------------
11325 XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11327 CONST USHORT* pArray
11334 //------------------------------------------------------------------------------
11336 XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11342 XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11345 //------------------------------------------------------------------------------
11347 XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11349 CONST FLOAT* pArray
11352 XMStoreUShortN2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11355 //------------------------------------------------------------------------------
11357 XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
11359 CONST _XMUSHORTN2& UShortN2
11367 /****************************************************************************
11369 * XMUSHORT2 operators
11371 ****************************************************************************/
11373 //------------------------------------------------------------------------------
11375 XMFINLINE _XMUSHORT2::_XMUSHORT2
11377 CONST USHORT* pArray
11384 //------------------------------------------------------------------------------
11386 XMFINLINE _XMUSHORT2::_XMUSHORT2
11392 XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11395 //------------------------------------------------------------------------------
11397 XMFINLINE _XMUSHORT2::_XMUSHORT2
11399 CONST FLOAT* pArray
11402 XMStoreUShort2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11405 //------------------------------------------------------------------------------
11407 XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
11409 CONST _XMUSHORT2& UShort2
11417 /****************************************************************************
11419 * XMBYTEN2 operators
11421 ****************************************************************************/
11423 //------------------------------------------------------------------------------
11425 XMFINLINE _XMBYTEN2::_XMBYTEN2
11434 //------------------------------------------------------------------------------
11436 XMFINLINE _XMBYTEN2::_XMBYTEN2
11442 XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11445 //------------------------------------------------------------------------------
11447 XMFINLINE _XMBYTEN2::_XMBYTEN2
11449 CONST FLOAT* pArray
11452 XMStoreByteN2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11455 //------------------------------------------------------------------------------
11457 XMFINLINE _XMBYTEN2& _XMBYTEN2::operator=
11459 CONST _XMBYTEN2& ByteN2
11467 /****************************************************************************
11469 * XMBYTE2 operators
11471 ****************************************************************************/
11473 //------------------------------------------------------------------------------
11475 XMFINLINE _XMBYTE2::_XMBYTE2
11484 //------------------------------------------------------------------------------
11486 XMFINLINE _XMBYTE2::_XMBYTE2
11492 XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11495 //------------------------------------------------------------------------------
11497 XMFINLINE _XMBYTE2::_XMBYTE2
11499 CONST FLOAT* pArray
11502 XMStoreByte2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11505 //------------------------------------------------------------------------------
11507 XMFINLINE _XMBYTE2& _XMBYTE2::operator=
11509 CONST _XMBYTE2& Byte2
11517 /****************************************************************************
11519 * XMUBYTEN2 operators
11521 ****************************************************************************/
11523 //------------------------------------------------------------------------------
11525 XMFINLINE _XMUBYTEN2::_XMUBYTEN2
11534 //------------------------------------------------------------------------------
11536 XMFINLINE _XMUBYTEN2::_XMUBYTEN2
11542 XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11545 //------------------------------------------------------------------------------
11547 XMFINLINE _XMUBYTEN2::_XMUBYTEN2
11549 CONST FLOAT* pArray
11552 XMStoreUByteN2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11555 //------------------------------------------------------------------------------
11557 XMFINLINE _XMUBYTEN2& _XMUBYTEN2::operator=
11559 CONST _XMUBYTEN2& UByteN2
11567 /****************************************************************************
11569 * XMUBYTE2 operators
11571 ****************************************************************************/
11573 //------------------------------------------------------------------------------
11575 XMFINLINE _XMUBYTE2::_XMUBYTE2
11584 //------------------------------------------------------------------------------
11586 XMFINLINE _XMUBYTE2::_XMUBYTE2
11592 XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11595 //------------------------------------------------------------------------------
11597 XMFINLINE _XMUBYTE2::_XMUBYTE2
11599 CONST FLOAT* pArray
11602 XMStoreUByte2(this, XMLoadFloat2((const XMFLOAT2*)pArray));
11605 //------------------------------------------------------------------------------
11607 XMFINLINE _XMUBYTE2& _XMUBYTE2::operator=
11609 CONST _XMUBYTE2& UByte2
11617 /****************************************************************************
11619 * XMFLOAT3 operators
11621 ****************************************************************************/
11623 //------------------------------------------------------------------------------
11625 XMFINLINE _XMFLOAT3::_XMFLOAT3
11627 CONST FLOAT* pArray
11635 //------------------------------------------------------------------------------
11637 XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
11639 CONST _XMFLOAT3& Float3
11648 //------------------------------------------------------------------------------
11650 XMFINLINE XMFLOAT3A& XMFLOAT3A::operator=
11652 CONST XMFLOAT3A& Float3
11661 /****************************************************************************
11665 ****************************************************************************/
11667 XMFINLINE _XMINT3::_XMINT3
11677 //------------------------------------------------------------------------------
11679 XMFINLINE XMINT3& _XMINT3::operator=
11681 CONST _XMINT3& Int3
11690 /****************************************************************************
11692 * XMUINT3 operators
11694 ****************************************************************************/
11696 XMFINLINE _XMUINT3::_XMUINT3
11706 //------------------------------------------------------------------------------
11708 XMFINLINE XMUINT3& _XMUINT3::operator=
11710 CONST _XMUINT3& UInt3
11719 /****************************************************************************
11721 * XMHENDN3 operators
11723 ****************************************************************************/
11725 //------------------------------------------------------------------------------
11727 XMFINLINE _XMHENDN3::_XMHENDN3
11734 XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11737 //------------------------------------------------------------------------------
11739 XMFINLINE _XMHENDN3::_XMHENDN3
11741 CONST FLOAT* pArray
11744 XMStoreHenDN3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11747 //------------------------------------------------------------------------------
11749 XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11751 CONST _XMHENDN3& HenDN3
11758 //------------------------------------------------------------------------------
11760 XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11769 /****************************************************************************
11771 * XMHEND3 operators
11773 ****************************************************************************/
11775 //------------------------------------------------------------------------------
11777 XMFINLINE _XMHEND3::_XMHEND3
11784 XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11787 //------------------------------------------------------------------------------
11789 XMFINLINE _XMHEND3::_XMHEND3
11791 CONST FLOAT* pArray
11794 XMStoreHenD3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11797 //------------------------------------------------------------------------------
11799 XMFINLINE _XMHEND3& _XMHEND3::operator=
11801 CONST _XMHEND3& HenD3
11808 //------------------------------------------------------------------------------
11810 XMFINLINE _XMHEND3& _XMHEND3::operator=
11819 /****************************************************************************
11821 * XMUHENDN3 operators
11823 ****************************************************************************/
11825 //------------------------------------------------------------------------------
11827 XMFINLINE _XMUHENDN3::_XMUHENDN3
11834 XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11837 //------------------------------------------------------------------------------
11839 XMFINLINE _XMUHENDN3::_XMUHENDN3
11841 CONST FLOAT* pArray
11844 XMStoreUHenDN3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11847 //------------------------------------------------------------------------------
11849 XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11851 CONST _XMUHENDN3& UHenDN3
11858 //------------------------------------------------------------------------------
11860 XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11869 /****************************************************************************
11871 * XMUHEND3 operators
11873 ****************************************************************************/
11875 //------------------------------------------------------------------------------
11877 XMFINLINE _XMUHEND3::_XMUHEND3
11884 XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11887 //------------------------------------------------------------------------------
11889 XMFINLINE _XMUHEND3::_XMUHEND3
11891 CONST FLOAT* pArray
11894 XMStoreUHenD3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11897 //------------------------------------------------------------------------------
11899 XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11901 CONST _XMUHEND3& UHenD3
11908 //------------------------------------------------------------------------------
11910 XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11919 /****************************************************************************
11921 * XMDHENN3 operators
11923 ****************************************************************************/
11925 //------------------------------------------------------------------------------
11927 XMFINLINE _XMDHENN3::_XMDHENN3
11934 XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11937 //------------------------------------------------------------------------------
11939 XMFINLINE _XMDHENN3::_XMDHENN3
11941 CONST FLOAT* pArray
11944 XMStoreDHenN3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11947 //------------------------------------------------------------------------------
11949 XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11951 CONST _XMDHENN3& DHenN3
11958 //------------------------------------------------------------------------------
11960 XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11969 /****************************************************************************
11971 * XMDHEN3 operators
11973 ****************************************************************************/
11975 //------------------------------------------------------------------------------
11977 XMFINLINE _XMDHEN3::_XMDHEN3
11984 XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
11987 //------------------------------------------------------------------------------
11989 XMFINLINE _XMDHEN3::_XMDHEN3
11991 CONST FLOAT* pArray
11994 XMStoreDHen3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
11997 //------------------------------------------------------------------------------
11999 XMFINLINE _XMDHEN3& _XMDHEN3::operator=
12001 CONST _XMDHEN3& DHen3
12008 //------------------------------------------------------------------------------
12010 XMFINLINE _XMDHEN3& _XMDHEN3::operator=
12019 /****************************************************************************
12021 * XMUDHENN3 operators
12023 ****************************************************************************/
12025 //------------------------------------------------------------------------------
12027 XMFINLINE _XMUDHENN3::_XMUDHENN3
12034 XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
12037 //------------------------------------------------------------------------------
12039 XMFINLINE _XMUDHENN3::_XMUDHENN3
12041 CONST FLOAT* pArray
12044 XMStoreUDHenN3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
12047 //------------------------------------------------------------------------------
12049 XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
12051 CONST _XMUDHENN3& UDHenN3
12058 //------------------------------------------------------------------------------
12060 XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
12069 /****************************************************************************
12071 * XMUDHEN3 operators
12073 ****************************************************************************/
12075 //------------------------------------------------------------------------------
12077 XMFINLINE _XMUDHEN3::_XMUDHEN3
12084 XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
12087 //------------------------------------------------------------------------------
12089 XMFINLINE _XMUDHEN3::_XMUDHEN3
12091 CONST FLOAT* pArray
12094 XMStoreUDHen3(this, XMLoadFloat3((const XMFLOAT3*)pArray));
12097 //------------------------------------------------------------------------------
12099 XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
12101 CONST _XMUDHEN3& UDHen3
12108 //------------------------------------------------------------------------------
12110 XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
12119 /****************************************************************************
12123 ****************************************************************************/
12125 XMFINLINE _XMU565::_XMU565
12135 XMFINLINE _XMU565::_XMU565
12142 XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
12145 XMFINLINE _XMU565::_XMU565
12147 CONST FLOAT *pArray
12150 XMStoreU565(this, XMLoadFloat3((const XMFLOAT3*)pArray ));
12153 XMFINLINE _XMU565& _XMU565::operator=
12155 CONST _XMU565& U565
12162 XMFINLINE _XMU565& _XMU565::operator=
12164 CONST USHORT Packed
12171 /****************************************************************************
12173 * XMFLOAT3PK operators
12175 ****************************************************************************/
12177 XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
12184 XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
12187 XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
12189 CONST FLOAT *pArray
12192 XMStoreFloat3PK(this, XMLoadFloat3((const XMFLOAT3*)pArray ));
12195 XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
12197 CONST _XMFLOAT3PK& float3pk
12204 XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
12213 /****************************************************************************
12215 * XMFLOAT3SE operators
12217 ****************************************************************************/
12219 XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
12226 XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
12229 XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
12231 CONST FLOAT *pArray
12234 XMStoreFloat3SE(this, XMLoadFloat3((const XMFLOAT3*)pArray ));
12237 XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
12239 CONST _XMFLOAT3SE& float3se
12246 XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
12255 /****************************************************************************
12257 * XMFLOAT4 operators
12259 ****************************************************************************/
12261 //------------------------------------------------------------------------------
12263 XMFINLINE _XMFLOAT4::_XMFLOAT4
12265 CONST FLOAT* pArray
12274 //------------------------------------------------------------------------------
12276 XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
12278 CONST _XMFLOAT4& Float4
12288 //------------------------------------------------------------------------------
12290 XMFINLINE XMFLOAT4A& XMFLOAT4A::operator=
12292 CONST XMFLOAT4A& Float4
12302 /****************************************************************************
12306 ****************************************************************************/
12308 XMFINLINE _XMINT4::_XMINT4
12319 //------------------------------------------------------------------------------
12321 XMFINLINE XMINT4& _XMINT4::operator=
12323 CONST _XMINT4& Int4
12333 /****************************************************************************
12335 * XMUINT4 operators
12337 ****************************************************************************/
12339 XMFINLINE _XMUINT4::_XMUINT4
12350 //------------------------------------------------------------------------------
12352 XMFINLINE XMUINT4& _XMUINT4::operator=
12354 CONST _XMUINT4& UInt4
12364 /****************************************************************************
12366 * XMHALF4 operators
12368 ****************************************************************************/
12370 //------------------------------------------------------------------------------
12372 XMFINLINE _XMHALF4::_XMHALF4
12383 //------------------------------------------------------------------------------
12385 XMFINLINE _XMHALF4::_XMHALF4
12393 x = XMConvertFloatToHalf(_x);
12394 y = XMConvertFloatToHalf(_y);
12395 z = XMConvertFloatToHalf(_z);
12396 w = XMConvertFloatToHalf(_w);
12399 //------------------------------------------------------------------------------
12401 XMFINLINE _XMHALF4::_XMHALF4
12403 CONST FLOAT* pArray
12406 XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
12409 //------------------------------------------------------------------------------
12411 XMFINLINE _XMHALF4& _XMHALF4::operator=
12413 CONST _XMHALF4& Half4
12423 /****************************************************************************
12425 * XMSHORTN4 operators
12427 ****************************************************************************/
12429 //------------------------------------------------------------------------------
12431 XMFINLINE _XMSHORTN4::_XMSHORTN4
12433 CONST SHORT* pArray
12442 //------------------------------------------------------------------------------
12444 XMFINLINE _XMSHORTN4::_XMSHORTN4
12452 XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
12455 //------------------------------------------------------------------------------
12457 XMFINLINE _XMSHORTN4::_XMSHORTN4
12459 CONST FLOAT* pArray
12462 XMStoreShortN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12465 //------------------------------------------------------------------------------
12467 XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
12469 CONST _XMSHORTN4& ShortN4
12479 /****************************************************************************
12481 * XMSHORT4 operators
12483 ****************************************************************************/
12485 //------------------------------------------------------------------------------
12487 XMFINLINE _XMSHORT4::_XMSHORT4
12489 CONST SHORT* pArray
12498 //------------------------------------------------------------------------------
12500 XMFINLINE _XMSHORT4::_XMSHORT4
12508 XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
12511 //------------------------------------------------------------------------------
12513 XMFINLINE _XMSHORT4::_XMSHORT4
12515 CONST FLOAT* pArray
12518 XMStoreShort4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12521 //------------------------------------------------------------------------------
12523 XMFINLINE _XMSHORT4& _XMSHORT4::operator=
12525 CONST _XMSHORT4& Short4
12535 /****************************************************************************
12537 * XMUSHORTN4 operators
12539 ****************************************************************************/
12541 //------------------------------------------------------------------------------
12543 XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12545 CONST USHORT* pArray
12554 //------------------------------------------------------------------------------
12556 XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12564 XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
12567 //------------------------------------------------------------------------------
12569 XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12571 CONST FLOAT* pArray
12574 XMStoreUShortN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12577 //------------------------------------------------------------------------------
12579 XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
12581 CONST _XMUSHORTN4& UShortN4
12591 /****************************************************************************
12593 * XMUSHORT4 operators
12595 ****************************************************************************/
12597 //------------------------------------------------------------------------------
12599 XMFINLINE _XMUSHORT4::_XMUSHORT4
12601 CONST USHORT* pArray
12610 //------------------------------------------------------------------------------
12612 XMFINLINE _XMUSHORT4::_XMUSHORT4
12620 XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
12623 //------------------------------------------------------------------------------
12625 XMFINLINE _XMUSHORT4::_XMUSHORT4
12627 CONST FLOAT* pArray
12630 XMStoreUShort4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12633 //------------------------------------------------------------------------------
12635 XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
12637 CONST _XMUSHORT4& UShort4
12647 /****************************************************************************
12649 * XMXDECN4 operators
12651 ****************************************************************************/
12653 //------------------------------------------------------------------------------
12655 XMFINLINE _XMXDECN4::_XMXDECN4
12663 XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
12666 //------------------------------------------------------------------------------
12668 XMFINLINE _XMXDECN4::_XMXDECN4
12670 CONST FLOAT* pArray
12673 XMStoreXDecN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12676 //------------------------------------------------------------------------------
12678 XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12680 CONST _XMXDECN4& XDecN4
12687 //------------------------------------------------------------------------------
12689 XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12698 /****************************************************************************
12700 * XMXDEC4 operators
12702 ****************************************************************************/
12704 //------------------------------------------------------------------------------
12706 XMFINLINE _XMXDEC4::_XMXDEC4
12714 XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
12717 //------------------------------------------------------------------------------
12719 XMFINLINE _XMXDEC4::_XMXDEC4
12721 CONST FLOAT* pArray
12724 XMStoreXDec4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12727 //------------------------------------------------------------------------------
12729 XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12731 CONST _XMXDEC4& XDec4
12738 //------------------------------------------------------------------------------
12740 XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12749 /****************************************************************************
12751 * XMDECN4 operators
12753 ****************************************************************************/
12755 //------------------------------------------------------------------------------
12757 XMFINLINE _XMDECN4::_XMDECN4
12765 XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
12768 //------------------------------------------------------------------------------
12770 XMFINLINE _XMDECN4::_XMDECN4
12772 CONST FLOAT* pArray
12775 XMStoreDecN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12778 //------------------------------------------------------------------------------
12780 XMFINLINE _XMDECN4& _XMDECN4::operator=
12782 CONST _XMDECN4& DecN4
12789 //------------------------------------------------------------------------------
12791 XMFINLINE _XMDECN4& _XMDECN4::operator=
12800 /****************************************************************************
12804 ****************************************************************************/
12806 //------------------------------------------------------------------------------
12808 XMFINLINE _XMDEC4::_XMDEC4
12816 XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
12819 //------------------------------------------------------------------------------
12821 XMFINLINE _XMDEC4::_XMDEC4
12823 CONST FLOAT* pArray
12826 XMStoreDec4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12829 //------------------------------------------------------------------------------
12831 XMFINLINE _XMDEC4& _XMDEC4::operator=
12833 CONST _XMDEC4& Dec4
12840 //------------------------------------------------------------------------------
12842 XMFINLINE _XMDEC4& _XMDEC4::operator=
12851 /****************************************************************************
12853 * XMUDECN4 operators
12855 ****************************************************************************/
12857 //------------------------------------------------------------------------------
12859 XMFINLINE _XMUDECN4::_XMUDECN4
12867 XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
12870 //------------------------------------------------------------------------------
12872 XMFINLINE _XMUDECN4::_XMUDECN4
12874 CONST FLOAT* pArray
12877 XMStoreUDecN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12880 //------------------------------------------------------------------------------
12882 XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12884 CONST _XMUDECN4& UDecN4
12891 //------------------------------------------------------------------------------
12893 XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12902 /****************************************************************************
12904 * XMUDEC4 operators
12906 ****************************************************************************/
12908 //------------------------------------------------------------------------------
12910 XMFINLINE _XMUDEC4::_XMUDEC4
12918 XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
12921 //------------------------------------------------------------------------------
12923 XMFINLINE _XMUDEC4::_XMUDEC4
12925 CONST FLOAT* pArray
12928 XMStoreUDec4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12931 //------------------------------------------------------------------------------
12933 XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12935 CONST _XMUDEC4& UDec4
12942 //------------------------------------------------------------------------------
12944 XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12953 /****************************************************************************
12955 * XMXICON4 operators
12957 ****************************************************************************/
12959 //------------------------------------------------------------------------------
12961 XMFINLINE _XMXICON4::_XMXICON4
12969 XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12972 //------------------------------------------------------------------------------
12974 XMFINLINE _XMXICON4::_XMXICON4
12976 CONST FLOAT* pArray
12979 XMStoreXIcoN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
12982 //------------------------------------------------------------------------------
12984 XMFINLINE _XMXICON4& _XMXICON4::operator=
12986 CONST _XMXICON4& XIcoN4
12993 //------------------------------------------------------------------------------
12995 XMFINLINE _XMXICON4& _XMXICON4::operator=
12997 CONST UINT64 Packed
13004 /****************************************************************************
13006 * XMXICO4 operators
13008 ****************************************************************************/
13010 //------------------------------------------------------------------------------
13012 XMFINLINE _XMXICO4::_XMXICO4
13020 XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
13023 //------------------------------------------------------------------------------
13025 XMFINLINE _XMXICO4::_XMXICO4
13027 CONST FLOAT* pArray
13030 XMStoreXIco4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13033 //------------------------------------------------------------------------------
13035 XMFINLINE _XMXICO4& _XMXICO4::operator=
13037 CONST _XMXICO4& XIco4
13044 //------------------------------------------------------------------------------
13046 XMFINLINE _XMXICO4& _XMXICO4::operator=
13048 CONST UINT64 Packed
13055 /****************************************************************************
13057 * XMICON4 operators
13059 ****************************************************************************/
13061 //------------------------------------------------------------------------------
13063 XMFINLINE _XMICON4::_XMICON4
13071 XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
13074 //------------------------------------------------------------------------------
13076 XMFINLINE _XMICON4::_XMICON4
13078 CONST FLOAT* pArray
13081 XMStoreIcoN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13084 //------------------------------------------------------------------------------
13086 XMFINLINE _XMICON4& _XMICON4::operator=
13088 CONST _XMICON4& IcoN4
13095 //------------------------------------------------------------------------------
13097 XMFINLINE _XMICON4& _XMICON4::operator=
13099 CONST UINT64 Packed
13106 /****************************************************************************
13110 ****************************************************************************/
13112 //------------------------------------------------------------------------------
13114 XMFINLINE _XMICO4::_XMICO4
13122 XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
13125 //------------------------------------------------------------------------------
13127 XMFINLINE _XMICO4::_XMICO4
13129 CONST FLOAT* pArray
13132 XMStoreIco4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13135 //------------------------------------------------------------------------------
13137 XMFINLINE _XMICO4& _XMICO4::operator=
13139 CONST _XMICO4& Ico4
13146 //------------------------------------------------------------------------------
13148 XMFINLINE _XMICO4& _XMICO4::operator=
13150 CONST UINT64 Packed
13157 /****************************************************************************
13159 * XMUICON4 operators
13161 ****************************************************************************/
13163 //------------------------------------------------------------------------------
13165 XMFINLINE _XMUICON4::_XMUICON4
13173 XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
13176 //------------------------------------------------------------------------------
13178 XMFINLINE _XMUICON4::_XMUICON4
13180 CONST FLOAT* pArray
13183 XMStoreUIcoN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13186 //------------------------------------------------------------------------------
13188 XMFINLINE _XMUICON4& _XMUICON4::operator=
13190 CONST _XMUICON4& UIcoN4
13197 //------------------------------------------------------------------------------
13199 XMFINLINE _XMUICON4& _XMUICON4::operator=
13201 CONST UINT64 Packed
13208 /****************************************************************************
13210 * XMUICO4 operators
13212 ****************************************************************************/
13214 //------------------------------------------------------------------------------
13216 XMFINLINE _XMUICO4::_XMUICO4
13224 XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
13227 //------------------------------------------------------------------------------
13229 XMFINLINE _XMUICO4::_XMUICO4
13231 CONST FLOAT* pArray
13234 XMStoreUIco4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13237 //------------------------------------------------------------------------------
13239 XMFINLINE _XMUICO4& _XMUICO4::operator=
13241 CONST _XMUICO4& UIco4
13248 //------------------------------------------------------------------------------
13250 XMFINLINE _XMUICO4& _XMUICO4::operator=
13252 CONST UINT64 Packed
13259 /****************************************************************************
13261 * XMCOLOR4 operators
13263 ****************************************************************************/
13265 //------------------------------------------------------------------------------
13267 XMFINLINE _XMCOLOR::_XMCOLOR
13275 XMStoreColor(this, XMVectorSet(_r, _g, _b, _a));
13278 //------------------------------------------------------------------------------
13280 XMFINLINE _XMCOLOR::_XMCOLOR
13282 CONST FLOAT* pArray
13285 XMStoreColor(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13288 //------------------------------------------------------------------------------
13290 XMFINLINE _XMCOLOR& _XMCOLOR::operator=
13292 CONST _XMCOLOR& Color
13299 //------------------------------------------------------------------------------
13301 XMFINLINE _XMCOLOR& _XMCOLOR::operator=
13310 /****************************************************************************
13312 * XMBYTEN4 operators
13314 ****************************************************************************/
13316 //------------------------------------------------------------------------------
13318 XMFINLINE _XMBYTEN4::_XMBYTEN4
13329 //------------------------------------------------------------------------------
13331 XMFINLINE _XMBYTEN4::_XMBYTEN4
13339 XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
13342 //------------------------------------------------------------------------------
13344 XMFINLINE _XMBYTEN4::_XMBYTEN4
13346 CONST FLOAT* pArray
13349 XMStoreByteN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13352 //------------------------------------------------------------------------------
13354 XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
13356 CONST _XMBYTEN4& ByteN4
13366 /****************************************************************************
13368 * XMBYTE4 operators
13370 ****************************************************************************/
13372 //------------------------------------------------------------------------------
13374 XMFINLINE _XMBYTE4::_XMBYTE4
13385 //------------------------------------------------------------------------------
13387 XMFINLINE _XMBYTE4::_XMBYTE4
13395 XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
13398 //------------------------------------------------------------------------------
13400 XMFINLINE _XMBYTE4::_XMBYTE4
13402 CONST FLOAT* pArray
13405 XMStoreByte4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13408 //------------------------------------------------------------------------------
13410 XMFINLINE _XMBYTE4& _XMBYTE4::operator=
13412 CONST _XMBYTE4& Byte4
13422 /****************************************************************************
13424 * XMUBYTEN4 operators
13426 ****************************************************************************/
13428 //------------------------------------------------------------------------------
13430 XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13441 //------------------------------------------------------------------------------
13443 XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13451 XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
13454 //------------------------------------------------------------------------------
13456 XMFINLINE _XMUBYTEN4::_XMUBYTEN4
13458 CONST FLOAT* pArray
13461 XMStoreUByteN4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13464 //------------------------------------------------------------------------------
13466 XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
13468 CONST _XMUBYTEN4& UByteN4
13478 /****************************************************************************
13480 * XMUBYTE4 operators
13482 ****************************************************************************/
13484 //------------------------------------------------------------------------------
13486 XMFINLINE _XMUBYTE4::_XMUBYTE4
13497 //------------------------------------------------------------------------------
13499 XMFINLINE _XMUBYTE4::_XMUBYTE4
13507 XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
13510 //------------------------------------------------------------------------------
13512 XMFINLINE _XMUBYTE4::_XMUBYTE4
13514 CONST FLOAT* pArray
13517 XMStoreUByte4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13520 //------------------------------------------------------------------------------
13522 XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
13524 CONST _XMUBYTE4& UByte4
13534 /****************************************************************************
13536 * XMUNIBBLE4 operators
13538 ****************************************************************************/
13540 //------------------------------------------------------------------------------
13542 XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13553 //------------------------------------------------------------------------------
13555 XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13563 XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
13566 //------------------------------------------------------------------------------
13568 XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13570 CONST FLOAT *pArray
13573 XMStoreUNibble4(this, XMLoadFloat4((const XMFLOAT4*)pArray));
13576 //------------------------------------------------------------------------------
13578 XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13580 CONST _XMUNIBBLE4& UNibble4
13587 //------------------------------------------------------------------------------
13589 XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13591 CONST USHORT Packed
13598 /****************************************************************************
13602 ****************************************************************************/
13604 //------------------------------------------------------------------------------
13606 XMFINLINE _XMU555::_XMU555
13608 CONST CHAR *pArray,
13618 //------------------------------------------------------------------------------
13620 XMFINLINE _XMU555::_XMU555
13628 XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
13631 //------------------------------------------------------------------------------
13633 XMFINLINE _XMU555::_XMU555
13635 CONST FLOAT *pArray,
13639 XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pArray);
13640 XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
13643 //------------------------------------------------------------------------------
13645 XMFINLINE _XMU555& _XMU555::operator=
13647 CONST _XMU555& U555
13654 //------------------------------------------------------------------------------
13656 XMFINLINE _XMU555& _XMU555::operator=
13658 CONST USHORT Packed
13665 #endif // __cplusplus
13667 #if defined(_XM_NO_INTRINSICS_)
13672 #endif // __XNAMATHVECTOR_INL__