Bullet Collision Detection & Physics Library
btQuaternion.h
Go to the documentation of this file.
1/*
2Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org
3
4This software is provided 'as-is', without any express or implied warranty.
5In no event will the authors be held liable for any damages arising from the use of this software.
6Permission is granted to anyone to use this software for any purpose,
7including commercial applications, and to alter it and redistribute it freely,
8subject to the following restrictions:
9
101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
123. This notice may not be removed or altered from any source distribution.
13*/
14
15#ifndef BT_SIMD__QUATERNION_H_
16#define BT_SIMD__QUATERNION_H_
17
18#include "btVector3.h"
19#include "btQuadWord.h"
20
21#ifdef BT_USE_DOUBLE_PRECISION
22#define btQuaternionData btQuaternionDoubleData
23#define btQuaternionDataName "btQuaternionDoubleData"
24#else
25#define btQuaternionData btQuaternionFloatData
26#define btQuaternionDataName "btQuaternionFloatData"
27#endif //BT_USE_DOUBLE_PRECISION
28
29#ifdef BT_USE_SSE
30
31//const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
32#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
33
34#endif
35
36#if defined(BT_USE_SSE)
37
38#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
39#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
40
41#elif defined(BT_USE_NEON)
42
43const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
44const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
45
46#endif
47
50{
51public:
54
55#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
56 // Set Vector
57 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
58 {
59 mVec128 = vec;
60 }
61
62 // Copy constructor
64 {
65 mVec128 = rhs.mVec128;
66 }
67
68 // Assignment Operator
70 operator=(const btQuaternion& v)
71 {
72 mVec128 = v.mVec128;
73
74 return *this;
75 }
76
77#endif
78
79 // template <typename btScalar>
80 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
82 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
83 : btQuadWord(_x, _y, _z, _w)
84 {
85 }
89 btQuaternion(const btVector3& _axis, const btScalar& _angle)
90 {
91 setRotation(_axis, _angle);
92 }
97 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
98 {
99#ifndef BT_EULER_DEFAULT_ZYX
100 setEuler(yaw, pitch, roll);
101#else
102 setEulerZYX(yaw, pitch, roll);
103#endif
104 }
108 void setRotation(const btVector3& axis, const btScalar& _angle)
109 {
110 btScalar d = axis.length();
111 btAssert(d != btScalar(0.0));
112 btScalar s = btSin(_angle * btScalar(0.5)) / d;
113 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
114 btCos(_angle * btScalar(0.5)));
115 }
120 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
121 {
122 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
123 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
124 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
125 btScalar cosYaw = btCos(halfYaw);
126 btScalar sinYaw = btSin(halfYaw);
127 btScalar cosPitch = btCos(halfPitch);
128 btScalar sinPitch = btSin(halfPitch);
129 btScalar cosRoll = btCos(halfRoll);
130 btScalar sinRoll = btSin(halfRoll);
131 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
132 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
133 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
134 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
135 }
140 void setEulerZYX(const btScalar& yawZ, const btScalar& pitchY, const btScalar& rollX)
141 {
142 btScalar halfYaw = btScalar(yawZ) * btScalar(0.5);
143 btScalar halfPitch = btScalar(pitchY) * btScalar(0.5);
144 btScalar halfRoll = btScalar(rollX) * btScalar(0.5);
145 btScalar cosYaw = btCos(halfYaw);
146 btScalar sinYaw = btSin(halfYaw);
147 btScalar cosPitch = btCos(halfPitch);
148 btScalar sinPitch = btSin(halfPitch);
149 btScalar cosRoll = btCos(halfRoll);
150 btScalar sinRoll = btSin(halfRoll);
151 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
152 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
153 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
154 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
155 }
156
161 void getEulerZYX(btScalar& yawZ, btScalar& pitchY, btScalar& rollX) const
162 {
163 btScalar squ;
164 btScalar sqx;
165 btScalar sqy;
166 btScalar sqz;
167 btScalar sarg;
168 sqx = m_floats[0] * m_floats[0];
169 sqy = m_floats[1] * m_floats[1];
170 sqz = m_floats[2] * m_floats[2];
171 squ = m_floats[3] * m_floats[3];
172 sarg = btScalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
173
174 // If the pitch angle is PI/2 or -PI/2, we can only compute
175 // the sum roll + yaw. However, any combination that gives
176 // the right sum will produce the correct orientation, so we
177 // set rollX = 0 and compute yawZ.
178 if (sarg <= -btScalar(0.99999))
179 {
180 pitchY = btScalar(-0.5) * SIMD_PI;
181 rollX = 0;
182 yawZ = btScalar(2) * btAtan2(m_floats[0], -m_floats[1]);
183 }
184 else if (sarg >= btScalar(0.99999))
185 {
186 pitchY = btScalar(0.5) * SIMD_PI;
187 rollX = 0;
188 yawZ = btScalar(2) * btAtan2(-m_floats[0], m_floats[1]);
189 }
190 else
191 {
192 pitchY = btAsin(sarg);
193 rollX = btAtan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
194 yawZ = btAtan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
195 }
196 }
197
201 {
202#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
203 mVec128 = _mm_add_ps(mVec128, q.mVec128);
204#elif defined(BT_USE_NEON)
205 mVec128 = vaddq_f32(mVec128, q.mVec128);
206#else
207 m_floats[0] += q.x();
208 m_floats[1] += q.y();
209 m_floats[2] += q.z();
210 m_floats[3] += q.m_floats[3];
211#endif
212 return *this;
213 }
214
218 {
219#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
220 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
221#elif defined(BT_USE_NEON)
222 mVec128 = vsubq_f32(mVec128, q.mVec128);
223#else
224 m_floats[0] -= q.x();
225 m_floats[1] -= q.y();
226 m_floats[2] -= q.z();
227 m_floats[3] -= q.m_floats[3];
228#endif
229 return *this;
230 }
231
235 {
236#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
237 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
238 vs = bt_pshufd_ps(vs, 0); // (S S S S)
239 mVec128 = _mm_mul_ps(mVec128, vs);
240#elif defined(BT_USE_NEON)
241 mVec128 = vmulq_n_f32(mVec128, s);
242#else
243 m_floats[0] *= s;
244 m_floats[1] *= s;
245 m_floats[2] *= s;
246 m_floats[3] *= s;
247#endif
248 return *this;
249 }
250
255 {
256#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
257 __m128 vQ2 = q.get128();
258
259 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0, 1, 2, 0));
260 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));
261
262 A1 = A1 * B1;
263
264 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 1));
265 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
266
267 A2 = A2 * B2;
268
269 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2, 0, 1, 2));
270 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
271
272 B1 = B1 * B2; // A3 *= B3
273
274 mVec128 = bt_splat_ps(mVec128, 3); // A0
275 mVec128 = mVec128 * vQ2; // A0 * B0
276
277 A1 = A1 + A2; // AB12
278 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
279 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
280 mVec128 = mVec128 + A1; // AB03 + AB12
281
282#elif defined(BT_USE_NEON)
283
284 float32x4_t vQ1 = mVec128;
285 float32x4_t vQ2 = q.get128();
286 float32x4_t A0, A1, B1, A2, B2, A3, B3;
287 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
288
289 {
290 float32x2x2_t tmp;
291 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
292 vQ1zx = tmp.val[0];
293
294 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
295 vQ2zx = tmp.val[0];
296 }
297 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
298
299 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
300
301 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
302 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
303
304 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
305 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
306
307 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
308 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
309
310 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
311 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
312
313 A1 = vmulq_f32(A1, B1);
314 A2 = vmulq_f32(A2, B2);
315 A3 = vmulq_f32(A3, B3); // A3 *= B3
316 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
317
318 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
319 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
320
321 // change the sign of the last element
322 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
323 A0 = vaddq_f32(A0, A1); // AB03 + AB12
324
325 mVec128 = A0;
326#else
327 setValue(
328 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
329 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
330 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
331 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
332#endif
333 return *this;
334 }
337 btScalar dot(const btQuaternion& q) const
338 {
339#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
340 __m128 vd;
341
342 vd = _mm_mul_ps(mVec128, q.mVec128);
343
344 __m128 t = _mm_movehl_ps(vd, vd);
345 vd = _mm_add_ps(vd, t);
346 t = _mm_shuffle_ps(vd, vd, 0x55);
347 vd = _mm_add_ss(vd, t);
348
349 return _mm_cvtss_f32(vd);
350#elif defined(BT_USE_NEON)
351 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
352 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
353 x = vpadd_f32(x, x);
354 return vget_lane_f32(x, 0);
355#else
356 return m_floats[0] * q.x() +
357 m_floats[1] * q.y() +
358 m_floats[2] * q.z() +
359 m_floats[3] * q.m_floats[3];
360#endif
361 }
362
365 {
366 return dot(*this);
367 }
368
371 {
372 return btSqrt(length2());
373 }
375 {
376 btScalar l2 = length2();
377 if (l2 > SIMD_EPSILON)
378 {
379 normalize();
380 }
381 return *this;
382 }
386 {
387#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
388 __m128 vd;
389
390 vd = _mm_mul_ps(mVec128, mVec128);
391
392 __m128 t = _mm_movehl_ps(vd, vd);
393 vd = _mm_add_ps(vd, t);
394 t = _mm_shuffle_ps(vd, vd, 0x55);
395 vd = _mm_add_ss(vd, t);
396
397 vd = _mm_sqrt_ss(vd);
398 vd = _mm_div_ss(vOnes, vd);
399 vd = bt_pshufd_ps(vd, 0); // splat
400 mVec128 = _mm_mul_ps(mVec128, vd);
401
402 return *this;
403#else
404 return *this /= length();
405#endif
406 }
407
411 operator*(const btScalar& s) const
412 {
413#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
414 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
415 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
416
417 return btQuaternion(_mm_mul_ps(mVec128, vs));
418#elif defined(BT_USE_NEON)
419 return btQuaternion(vmulq_n_f32(mVec128, s));
420#else
421 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
422#endif
423 }
424
428 {
429 btAssert(s != btScalar(0.0));
430 return *this * (btScalar(1.0) / s);
431 }
432
436 {
437 btAssert(s != btScalar(0.0));
438 return *this *= btScalar(1.0) / s;
439 }
440
443 {
444 return *this / length();
445 }
449 {
450 btScalar s = btSqrt(length2() * q.length2());
451 btAssert(s != btScalar(0.0));
452 return btAcos(dot(q) / s);
453 }
454
458 {
459 btScalar s = btSqrt(length2() * q.length2());
460 btAssert(s != btScalar(0.0));
461 if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
462 return btAcos(dot(-q) / s) * btScalar(2.0);
463 else
464 return btAcos(dot(q) / s) * btScalar(2.0);
465 }
466
469 {
470 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
471 return s;
472 }
473
476 {
477 btScalar s;
478 if (m_floats[3] >= 0)
479 s = btScalar(2.) * btAcos(m_floats[3]);
480 else
481 s = btScalar(2.) * btAcos(-m_floats[3]);
482 return s;
483 }
484
487 {
488 btScalar s_squared = 1.f - m_floats[3] * m_floats[3];
489
490 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
491 return btVector3(1.0, 0.0, 0.0); // Arbitrary
492 btScalar s = 1.f / btSqrt(s_squared);
493 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
494 }
495
498 {
499#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
500 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
501#elif defined(BT_USE_NEON)
502 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
503#else
504 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
505#endif
506 }
507
511 operator+(const btQuaternion& q2) const
512 {
513#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
514 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
515#elif defined(BT_USE_NEON)
516 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
517#else
518 const btQuaternion& q1 = *this;
519 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
520#endif
521 }
522
526 operator-(const btQuaternion& q2) const
527 {
528#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
529 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
530#elif defined(BT_USE_NEON)
531 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
532#else
533 const btQuaternion& q1 = *this;
534 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
535#endif
536 }
537
541 {
542#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
543 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
544#elif defined(BT_USE_NEON)
545 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask));
546#else
547 const btQuaternion& q2 = *this;
548 return btQuaternion(-q2.x(), -q2.y(), -q2.z(), -q2.m_floats[3]);
549#endif
550 }
553 {
554 btQuaternion diff, sum;
555 diff = *this - qd;
556 sum = *this + qd;
557 if (diff.dot(diff) > sum.dot(sum))
558 return qd;
559 return (-qd);
560 }
561
564 {
565 btQuaternion diff, sum;
566 diff = *this - qd;
567 sum = *this + qd;
568 if (diff.dot(diff) < sum.dot(sum))
569 return qd;
570 return (-qd);
571 }
572
577 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
578 {
579 const btScalar magnitude = btSqrt(length2() * q.length2());
580 btAssert(magnitude > btScalar(0));
581
582 const btScalar product = dot(q) / magnitude;
583 const btScalar absproduct = btFabs(product);
584
585 if (absproduct < btScalar(1.0 - SIMD_EPSILON))
586 {
587 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
588 const btScalar theta = btAcos(absproduct);
589 const btScalar d = btSin(theta);
590 btAssert(d > btScalar(0));
591
592 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
593 const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
594 const btScalar s1 = btSin(sign * t * theta) / d;
595
596 return btQuaternion(
597 (m_floats[0] * s0 + q.x() * s1),
598 (m_floats[1] * s0 + q.y() * s1),
599 (m_floats[2] * s0 + q.z() * s1),
600 (m_floats[3] * s0 + q.w() * s1));
601 }
602 else
603 {
604 return *this;
605 }
606 }
607
608 static const btQuaternion& getIdentity()
609 {
610 static const btQuaternion identityQuat(btScalar(0.), btScalar(0.), btScalar(0.), btScalar(1.));
611 return identityQuat;
612 }
613
614 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
615
616 SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
617
618 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionFloatData& dataIn);
619
620 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionDoubleData& dataIn);
621
622 SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
623
625
626 SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
627
629};
630
634{
635#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
636 __m128 vQ1 = q1.get128();
637 __m128 vQ2 = q2.get128();
638 __m128 A0, A1, B1, A2, B2;
639
640 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
641 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
642
643 A1 = A1 * B1;
644
645 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
646 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
647
648 A2 = A2 * B2;
649
650 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
651 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
652
653 B1 = B1 * B2; // A3 *= B3
654
655 A0 = bt_splat_ps(vQ1, 3); // A0
656 A0 = A0 * vQ2; // A0 * B0
657
658 A1 = A1 + A2; // AB12
659 A0 = A0 - B1; // AB03 = AB0 - AB3
660
661 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
662 A0 = A0 + A1; // AB03 + AB12
663
664 return btQuaternion(A0);
665
666#elif defined(BT_USE_NEON)
667
668 float32x4_t vQ1 = q1.get128();
669 float32x4_t vQ2 = q2.get128();
670 float32x4_t A0, A1, B1, A2, B2, A3, B3;
671 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
672
673 {
674 float32x2x2_t tmp;
675 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
676 vQ1zx = tmp.val[0];
677
678 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
679 vQ2zx = tmp.val[0];
680 }
681 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
682
683 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
684
685 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
686 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
687
688 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
689 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
690
691 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
692 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
693
694 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
695 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
696
697 A1 = vmulq_f32(A1, B1);
698 A2 = vmulq_f32(A2, B2);
699 A3 = vmulq_f32(A3, B3); // A3 *= B3
700 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
701
702 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
703 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
704
705 // change the sign of the last element
706 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
707 A0 = vaddq_f32(A0, A1); // AB03 + AB12
708
709 return btQuaternion(A0);
710
711#else
712 return btQuaternion(
713 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
714 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
715 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
716 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
717#endif
718}
719
722{
723#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
724 __m128 vQ1 = q.get128();
725 __m128 vQ2 = w.get128();
726 __m128 A1, B1, A2, B2, A3, B3;
727
728 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3, 3, 3, 0));
729 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0, 1, 2, 0));
730
731 A1 = A1 * B1;
732
733 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
734 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
735
736 A2 = A2 * B2;
737
738 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
739 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
740
741 A3 = A3 * B3; // A3 *= B3
742
743 A1 = A1 + A2; // AB12
744 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
745 A1 = A1 - A3; // AB123 = AB12 - AB3
746
747 return btQuaternion(A1);
748
749#elif defined(BT_USE_NEON)
750
751 float32x4_t vQ1 = q.get128();
752 float32x4_t vQ2 = w.get128();
753 float32x4_t A1, B1, A2, B2, A3, B3;
754 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
755
756 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
757 {
758 float32x2x2_t tmp;
759
760 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
761 vQ2zx = tmp.val[0];
762
763 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
764 vQ1zx = tmp.val[0];
765 }
766
767 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
768
769 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
770 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
771
772 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
773 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
774
775 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
776 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
777
778 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
779 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
780
781 A1 = vmulq_f32(A1, B1);
782 A2 = vmulq_f32(A2, B2);
783 A3 = vmulq_f32(A3, B3); // A3 *= B3
784
785 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
786
787 // change the sign of the last element
788 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
789
790 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
791
792 return btQuaternion(A1);
793
794#else
795 return btQuaternion(
796 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
797 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
798 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
799 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
800#endif
801}
802
805{
806#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
807 __m128 vQ1 = w.get128();
808 __m128 vQ2 = q.get128();
809 __m128 A1, B1, A2, B2, A3, B3;
810
811 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x
812 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X
813
814 A1 = A1 * B1;
815
816 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
817 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
818
819 A2 = A2 * B2;
820
821 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
822 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
823
824 A3 = A3 * B3; // A3 *= B3
825
826 A1 = A1 + A2; // AB12
827 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
828 A1 = A1 - A3; // AB123 = AB12 - AB3
829
830 return btQuaternion(A1);
831
832#elif defined(BT_USE_NEON)
833
834 float32x4_t vQ1 = w.get128();
835 float32x4_t vQ2 = q.get128();
836 float32x4_t A1, B1, A2, B2, A3, B3;
837 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
838
839 {
840 float32x2x2_t tmp;
841
842 tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
843 vQ1zx = tmp.val[0];
844
845 tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
846 vQ2zx = tmp.val[0];
847 }
848 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
849
850 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
851
852 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
853 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
854
855 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
856 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
857
858 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
859 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
860
861 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
862 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
863
864 A1 = vmulq_f32(A1, B1);
865 A2 = vmulq_f32(A2, B2);
866 A3 = vmulq_f32(A3, B3); // A3 *= B3
867
868 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
869
870 // change the sign of the last element
871 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
872
873 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
874
875 return btQuaternion(A1);
876
877#else
878 return btQuaternion(
879 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
880 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
881 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
882 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
883#endif
884}
885
888dot(const btQuaternion& q1, const btQuaternion& q2)
889{
890 return q1.dot(q2);
891}
892
896{
897 return q.length();
898}
899
902btAngle(const btQuaternion& q1, const btQuaternion& q2)
903{
904 return q1.angle(q2);
905}
906
910{
911 return q.inverse();
912}
913
920slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
921{
922 return q1.slerp(q2, t);
923}
924
926quatRotate(const btQuaternion& rotation, const btVector3& v)
927{
928 btQuaternion q = rotation * v;
929 q *= rotation.inverse();
930#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
931 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
932#elif defined(BT_USE_NEON)
933 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
934#else
935 return btVector3(q.getX(), q.getY(), q.getZ());
936#endif
937}
938
940shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
941{
942 btVector3 c = v0.cross(v1);
943 btScalar d = v0.dot(v1);
944
945 if (d < -1.0 + SIMD_EPSILON)
946 {
947 btVector3 n, unused;
948 btPlaneSpace1(v0, n, unused);
949 return btQuaternion(n.x(), n.y(), n.z(), 0.0f); // just pick any vector that is orthogonal to v0
950 }
951
952 btScalar s = btSqrt((1.0f + d) * 2.0f);
953 btScalar rs = 1.0f / s;
954
955 return btQuaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
956}
957
960{
961 v0.normalize();
962 v1.normalize();
963 return shortestArcQuat(v0, v1);
964}
965
967{
968 float m_floats[4];
969};
970
972{
973 double m_floats[4];
974};
975
977{
979 for (int i = 0; i < 4; i++)
980 dataOut.m_floats[i] = float(m_floats[i]);
981}
982
984{
985 for (int i = 0; i < 4; i++)
986 m_floats[i] = btScalar(dataIn.m_floats[i]);
987}
988
990{
992 for (int i = 0; i < 4; i++)
993 dataOut.m_floats[i] = double(m_floats[i]);
994}
995
997{
998 for (int i = 0; i < 4; i++)
999 m_floats[i] = btScalar(dataIn.m_floats[i]);
1000}
1001
1003{
1005 for (int i = 0; i < 4; i++)
1006 dataOut.m_floats[i] = m_floats[i];
1007}
1008
1010{
1011 for (int i = 0; i < 4; i++)
1012 m_floats[i] = (btScalar)dataIn.m_floats[i];
1013}
1014
1016{
1017 for (int i = 0; i < 4; i++)
1018 m_floats[i] = (btScalar)dataIn.m_floats[i];
1019}
1020
1021#endif //BT_SIMD__QUATERNION_H_
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:888
btQuaternion slerp(const btQuaternion &q1, const btQuaternion &q2, const btScalar &t)
Return the result of spherical linear interpolation betwen two quaternions.
Definition: btQuaternion.h:920
btQuaternion shortestArcQuat(const btVector3 &v0, const btVector3 &v1)
Definition: btQuaternion.h:940
btQuaternion shortestArcQuatNormalize2(btVector3 &v0, btVector3 &v1)
Definition: btQuaternion.h:959
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:909
btVector3 quatRotate(const btQuaternion &rotation, const btVector3 &v)
Definition: btQuaternion.h:926
btQuaternion operator*(const btQuaternion &q1, const btQuaternion &q2)
Return the product of two quaternions.
Definition: btQuaternion.h:633
btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Definition: btQuaternion.h:895
btScalar btAngle(const btQuaternion &q1, const btQuaternion &q2)
Return the angle between two quaternions.
Definition: btQuaternion.h:902
#define btQuaternionData
Definition: btQuaternion.h:25
#define SIMD_PI
Definition: btScalar.h:526
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:314
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:99
btScalar btSqrt(btScalar y)
Definition: btScalar.h:466
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:518
btScalar btSin(btScalar x)
Definition: btScalar.h:499
btScalar btFabs(btScalar x)
Definition: btScalar.h:497
#define SIMD_FORCE_INLINE
Definition: btScalar.h:98
btScalar btCos(btScalar x)
Definition: btScalar.h:498
#define SIMD_EPSILON
Definition: btScalar.h:543
btScalar btAcos(btScalar x)
Definition: btScalar.h:501
btScalar btAsin(btScalar x)
Definition: btScalar.h:509
#define btAssert(x)
Definition: btScalar.h:153
static T sum(const btAlignedObjectArray< T > &items)
void btPlaneSpace1(const T &n, T &p, T &q)
Definition: btVector3.h:1251
The btQuadWord class is base class for btVector3 and btQuaternion.
Definition: btQuadWord.h:34
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
const btScalar & getY() const
Return the y value.
Definition: btQuadWord.h:101
const btScalar & getX() const
Return the x value.
Definition: btQuadWord.h:99
btScalar m_floats[4]
Definition: btQuadWord.h:67
const btScalar & getZ() const
Return the z value.
Definition: btQuadWord.h:103
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:117
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:115
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:149
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:113
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:50
btQuaternion operator-(const btQuaternion &q2) const
Return the difference between this quaternion and the other.
Definition: btQuaternion.h:526
btQuaternion operator/(const btScalar &s) const
Return an inversely scaled versionof this quaternion.
Definition: btQuaternion.h:427
btQuaternion slerp(const btQuaternion &q, const btScalar &t) const
Return the quaternion which is the result of Spherical Linear Interpolation between this and the othe...
Definition: btQuaternion.h:577
btQuaternion operator*(const btScalar &s) const
Return a scaled version of this quaternion.
Definition: btQuaternion.h:411
void getEulerZYX(btScalar &yawZ, btScalar &pitchY, btScalar &rollX) const
Get the euler angles from this quaternion.
Definition: btQuaternion.h:161
btScalar angleShortestPath(const btQuaternion &q) const
Return the angle between this quaternion and the other along the shortest path.
Definition: btQuaternion.h:457
void deSerializeFloat(const struct btQuaternionFloatData &dataIn)
Definition: btQuaternion.h:983
static const btQuaternion & getIdentity()
Definition: btQuaternion.h:608
btQuaternion & operator*=(const btQuaternion &q)
Multiply this quaternion by q on the right.
Definition: btQuaternion.h:254
btScalar length() const
Return the length of the quaternion.
Definition: btQuaternion.h:370
btQuaternion(const btVector3 &_axis, const btScalar &_angle)
Axis angle Constructor.
Definition: btQuaternion.h:89
btQuaternion farthest(const btQuaternion &qd) const
Definition: btQuaternion.h:552
btQuaternion()
No initialization constructor.
Definition: btQuaternion.h:53
const btScalar & getW() const
Definition: btQuaternion.h:614
btScalar dot(const btQuaternion &q) const
Return the dot product between this quaternion and another.
Definition: btQuaternion.h:337
void serialize(struct btQuaternionData &dataOut) const
void deSerializeDouble(const struct btQuaternionDoubleData &dataIn)
Definition: btQuaternion.h:996
btScalar getAngle() const
Return the angle [0, 2Pi] of rotation represented by this quaternion.
Definition: btQuaternion.h:468
void serializeDouble(struct btQuaternionDoubleData &dataOut) const
Definition: btQuaternion.h:989
btQuaternion operator+(const btQuaternion &q2) const
Return the sum of this quaternion and the other.
Definition: btQuaternion.h:511
btQuaternion(const btScalar &_x, const btScalar &_y, const btScalar &_z, const btScalar &_w)
Constructor from scalars.
Definition: btQuaternion.h:82
btScalar angle(const btQuaternion &q) const
Return the half angle between this quaternion and the other.
Definition: btQuaternion.h:448
btQuaternion nearest(const btQuaternion &qd) const
Definition: btQuaternion.h:563
btQuaternion & operator*=(const btScalar &s)
Scale this quaternion.
Definition: btQuaternion.h:234
btQuaternion(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Constructor from Euler angles.
Definition: btQuaternion.h:97
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:364
btQuaternion normalized() const
Return a normalized version of this quaternion.
Definition: btQuaternion.h:442
btQuaternion operator-() const
Return the negative of this quaternion This simply negates each element.
Definition: btQuaternion.h:540
btQuaternion & safeNormalize()
Definition: btQuaternion.h:374
btQuaternion inverse() const
Return the inverse of this quaternion.
Definition: btQuaternion.h:497
btQuaternion & operator/=(const btScalar &s)
Inversely scale this quaternion.
Definition: btQuaternion.h:435
btScalar getAngleShortestPath() const
Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path.
Definition: btQuaternion.h:475
btQuaternion & operator-=(const btQuaternion &q)
Subtract out a quaternion.
Definition: btQuaternion.h:217
btVector3 getAxis() const
Return the axis of the rotation represented by this quaternion.
Definition: btQuaternion.h:486
void setRotation(const btVector3 &axis, const btScalar &_angle)
Set the rotation using axis angle notation.
Definition: btQuaternion.h:108
void setEulerZYX(const btScalar &yawZ, const btScalar &pitchY, const btScalar &rollX)
Set the quaternion using euler angles.
Definition: btQuaternion.h:140
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:385
void setEuler(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using Euler angles.
Definition: btQuaternion.h:120
btQuaternion & operator+=(const btQuaternion &q)
Add two quaternions.
Definition: btQuaternion.h:200
void deSerialize(const struct btQuaternionFloatData &dataIn)
void serializeFloat(struct btQuaternionFloatData &dataOut) const
Definition: btQuaternion.h:976
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:82
const btScalar & getZ() const
Return the z value.
Definition: btVector3.h:565
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579
btScalar length() const
Return the length of the vector.
Definition: btVector3.h:257
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:380
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:229
const btScalar & getY() const
Return the y value.
Definition: btVector3.h:563
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
Definition: btVector3.h:303
const btScalar & getX() const
Return the x value.
Definition: btVector3.h:561
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577