Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1/*
2Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org
3
4This software is provided 'as-is', without any express or implied warranty.
5In no event will the authors be held liable for any damages arising from the use of this software.
6Permission is granted to anyone to use this software for any purpose,
7including commercial applications, and to alter it and redistribute it freely,
8subject to the following restrictions:
9
101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
123. This notice may not be removed or altered from any source distribution.
13*/
14
15#ifndef BT_MATRIX3x3_H
16#define BT_MATRIX3x3_H
17
18#include "btVector3.h"
19#include "btQuaternion.h"
20#include <stdio.h>
21
22#ifdef BT_USE_SSE
23//const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
24//const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
25#define vMPPP (_mm_set_ps(+0.0f, +0.0f, +0.0f, -0.0f))
26#endif
27
28#if defined(BT_USE_SSE)
29#define v0000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
30#define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f))
31#define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f))
32#define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f))
33#elif defined(BT_USE_NEON)
34const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0000) = {0.0f, 0.0f, 0.0f, 0.0f};
35const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
36const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
37const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
38#endif
39
40#ifdef BT_USE_DOUBLE_PRECISION
41#define btMatrix3x3Data btMatrix3x3DoubleData
42#else
43#define btMatrix3x3Data btMatrix3x3FloatData
44#endif //BT_USE_DOUBLE_PRECISION
45
50{
52 btVector3 m_el[3];
53
54public:
57
58 // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
59
61 explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
62 /*
63 template <typename btScalar>
64 Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
65 {
66 setEulerYPR(yaw, pitch, roll);
67 }
68 */
70 btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
71 const btScalar& yx, const btScalar& yy, const btScalar& yz,
72 const btScalar& zx, const btScalar& zy, const btScalar& zz)
73 {
74 setValue(xx, xy, xz,
75 yx, yy, yz,
76 zx, zy, zz);
77 }
78
79#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
80 SIMD_FORCE_INLINE btMatrix3x3(const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2)
81 {
82 m_el[0].mVec128 = v0;
83 m_el[1].mVec128 = v1;
84 m_el[2].mVec128 = v2;
85 }
86
87 SIMD_FORCE_INLINE btMatrix3x3(const btVector3& v0, const btVector3& v1, const btVector3& v2)
88 {
89 m_el[0] = v0;
90 m_el[1] = v1;
91 m_el[2] = v2;
92 }
93
94 // Copy constructor
96 {
97 m_el[0].mVec128 = rhs.m_el[0].mVec128;
98 m_el[1].mVec128 = rhs.m_el[1].mVec128;
99 m_el[2].mVec128 = rhs.m_el[2].mVec128;
100 }
101
102 // Assignment Operator
103 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
104 {
105 m_el[0].mVec128 = m.m_el[0].mVec128;
106 m_el[1].mVec128 = m.m_el[1].mVec128;
107 m_el[2].mVec128 = m.m_el[2].mVec128;
108
109 return *this;
110 }
111
112#else
113
116 {
117 m_el[0] = other.m_el[0];
118 m_el[1] = other.m_el[1];
119 m_el[2] = other.m_el[2];
120 }
121
124 {
125 m_el[0] = other.m_el[0];
126 m_el[1] = other.m_el[1];
127 m_el[2] = other.m_el[2];
128 return *this;
129 }
130
132 {
133 m_el[0] = v0;
134 m_el[1] = v1;
135 m_el[2] = v2;
136 }
137
138#endif
139
143 {
144 return btVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
145 }
146
150 {
151 btFullAssert(0 <= i && i < 3);
152 return m_el[i];
153 }
154
158 {
159 btFullAssert(0 <= i && i < 3);
160 return m_el[i];
161 }
162
166 {
167 btFullAssert(0 <= i && i < 3);
168 return m_el[i];
169 }
170
174 btMatrix3x3& operator*=(const btMatrix3x3& m);
175
180
185
189 {
190 m_el[0].setValue(m[0], m[4], m[8]);
191 m_el[1].setValue(m[1], m[5], m[9]);
192 m_el[2].setValue(m[2], m[6], m[10]);
193 }
204 void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
205 const btScalar& yx, const btScalar& yy, const btScalar& yz,
206 const btScalar& zx, const btScalar& zy, const btScalar& zz)
207 {
208 m_el[0].setValue(xx, xy, xz);
209 m_el[1].setValue(yx, yy, yz);
210 m_el[2].setValue(zx, zy, zz);
211 }
212
216 {
217 btScalar d = q.length2();
218 btFullAssert(d != btScalar(0.0));
219 btScalar s = btScalar(2.0) / d;
220
221#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
222 __m128 vs, Q = q.get128();
223 __m128i Qi = btCastfTo128i(Q);
224 __m128 Y, Z;
225 __m128 V1, V2, V3;
226 __m128 V11, V21, V31;
227 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
228 __m128i NQi = btCastfTo128i(NQ);
229
230 V1 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 2, 3))); // Y X Z W
231 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0, 0, 1, 3)); // -X -X Y W
232 V3 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(2, 1, 0, 3))); // Z Y X W
233 V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
234
235 V11 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 1, 0, 3))); // Y Y X W
236 V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
237 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0, 2, 0, 3)); // X Z -X -W
238
239 V2 = V2 * V1; //
240 V1 = V1 * V11; //
241 V3 = V3 * V31; //
242
243 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W
244 V11 = V11 * V21; //
245 V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
246 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3, 3, 1, 3)); // W W -Y -W
247 V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
248 Y = btCastiTo128f(_mm_shuffle_epi32(NQi, BT_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W
249 Z = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 1, 3))); // Y X Y W
250
251 vs = _mm_load_ss(&s);
252 V21 = V21 * Y;
253 V31 = V31 * Z;
254
255 V1 = V1 + V11;
256 V2 = V2 + V21;
257 V3 = V3 + V31;
258
259 vs = bt_splat3_ps(vs, 0);
260 // s ready
261 V1 = V1 * vs;
262 V2 = V2 * vs;
263 V3 = V3 * vs;
264
265 V1 = V1 + v1000;
266 V2 = V2 + v0100;
267 V3 = V3 + v0010;
268
269 m_el[0] = V1;
270 m_el[1] = V2;
271 m_el[2] = V3;
272#else
273 btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
274 btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
275 btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
276 btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
277 setValue(
278 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
279 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
280 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
281#endif
282 }
283
289 void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
290 {
291 setEulerZYX(roll, pitch, yaw);
292 }
293
303 void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
304 {
306 btScalar ci(btCos(eulerX));
307 btScalar cj(btCos(eulerY));
308 btScalar ch(btCos(eulerZ));
309 btScalar si(btSin(eulerX));
310 btScalar sj(btSin(eulerY));
311 btScalar sh(btSin(eulerZ));
312 btScalar cc = ci * ch;
313 btScalar cs = ci * sh;
314 btScalar sc = si * ch;
315 btScalar ss = si * sh;
316
317 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
318 cj * sh, sj * ss + cc, sj * cs - sc,
319 -sj, cj * si, cj * ci);
320 }
321
324 {
325#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
326 m_el[0] = v1000;
327 m_el[1] = v0100;
328 m_el[2] = v0010;
329#else
330 setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
331 btScalar(0.0), btScalar(1.0), btScalar(0.0),
332 btScalar(0.0), btScalar(0.0), btScalar(1.0));
333#endif
334 }
335
337 void setZero()
338 {
339#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
340 m_el[0] = v0000;
341 m_el[1] = v0000;
342 m_el[2] = v0000;
343#else
344 setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0),
345 btScalar(0.0), btScalar(0.0), btScalar(0.0),
346 btScalar(0.0), btScalar(0.0), btScalar(0.0));
347#endif
348 }
349
350 static const btMatrix3x3& getIdentity()
351 {
352#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
353 static const btMatrix3x3
354 identityMatrix(v1000, v0100, v0010);
355#else
356 static const btMatrix3x3
357 identityMatrix(
358 btScalar(1.0), btScalar(0.0), btScalar(0.0),
359 btScalar(0.0), btScalar(1.0), btScalar(0.0),
360 btScalar(0.0), btScalar(0.0), btScalar(1.0));
361#endif
362 return identityMatrix;
363 }
364
368 {
369#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
370 __m128 v0 = m_el[0].mVec128;
371 __m128 v1 = m_el[1].mVec128;
372 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
373 __m128* vm = (__m128*)m;
374 __m128 vT;
375
376 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
377
378 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
379 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
380
381 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
382 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
383 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
384
385 vm[0] = v0;
386 vm[1] = v1;
387 vm[2] = v2;
388#elif defined(BT_USE_NEON)
389 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
390 static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
391 float32x4_t* vm = (float32x4_t*)m;
392 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
393 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
394 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
395 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
396 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
397 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
398
399 vm[0] = v0;
400 vm[1] = v1;
401 vm[2] = v2;
402#else
403 m[0] = btScalar(m_el[0].x());
404 m[1] = btScalar(m_el[1].x());
405 m[2] = btScalar(m_el[2].x());
406 m[3] = btScalar(0.0);
407 m[4] = btScalar(m_el[0].y());
408 m[5] = btScalar(m_el[1].y());
409 m[6] = btScalar(m_el[2].y());
410 m[7] = btScalar(0.0);
411 m[8] = btScalar(m_el[0].z());
412 m[9] = btScalar(m_el[1].z());
413 m[10] = btScalar(m_el[2].z());
414 m[11] = btScalar(0.0);
415#endif
416 }
417
420 void getRotation(btQuaternion & q) const
421 {
422#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
423 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
424 btScalar s, x;
425
426 union {
427 btSimdFloat4 vec;
428 btScalar f[4];
429 } temp;
430
431 if (trace > btScalar(0.0))
432 {
433 x = trace + btScalar(1.0);
434
435 temp.f[0] = m_el[2].y() - m_el[1].z();
436 temp.f[1] = m_el[0].z() - m_el[2].x();
437 temp.f[2] = m_el[1].x() - m_el[0].y();
438 temp.f[3] = x;
439 //temp.f[3]= s * btScalar(0.5);
440 }
441 else
442 {
443 int i, j, k;
444 if (m_el[0].x() < m_el[1].y())
445 {
446 if (m_el[1].y() < m_el[2].z())
447 {
448 i = 2;
449 j = 0;
450 k = 1;
451 }
452 else
453 {
454 i = 1;
455 j = 2;
456 k = 0;
457 }
458 }
459 else
460 {
461 if (m_el[0].x() < m_el[2].z())
462 {
463 i = 2;
464 j = 0;
465 k = 1;
466 }
467 else
468 {
469 i = 0;
470 j = 1;
471 k = 2;
472 }
473 }
474
475 x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
476
477 temp.f[3] = (m_el[k][j] - m_el[j][k]);
478 temp.f[j] = (m_el[j][i] + m_el[i][j]);
479 temp.f[k] = (m_el[k][i] + m_el[i][k]);
480 temp.f[i] = x;
481 //temp.f[i] = s * btScalar(0.5);
482 }
483
484 s = btSqrt(x);
485 q.set128(temp.vec);
486 s = btScalar(0.5) / s;
487
488 q *= s;
489#else
490 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
491
492 btScalar temp[4];
493
494 if (trace > btScalar(0.0))
495 {
496 btScalar s = btSqrt(trace + btScalar(1.0));
497 temp[3] = (s * btScalar(0.5));
498 s = btScalar(0.5) / s;
499
500 temp[0] = ((m_el[2].y() - m_el[1].z()) * s);
501 temp[1] = ((m_el[0].z() - m_el[2].x()) * s);
502 temp[2] = ((m_el[1].x() - m_el[0].y()) * s);
503 }
504 else
505 {
506 int i = m_el[0].x() < m_el[1].y() ? (m_el[1].y() < m_el[2].z() ? 2 : 1) : (m_el[0].x() < m_el[2].z() ? 2 : 0);
507 int j = (i + 1) % 3;
508 int k = (i + 2) % 3;
509
510 btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
511 temp[i] = s * btScalar(0.5);
512 s = btScalar(0.5) / s;
513
514 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
515 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
516 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
517 }
518 q.setValue(temp[0], temp[1], temp[2], temp[3]);
519#endif
520 }
521
526 void getEulerYPR(btScalar & yaw, btScalar & pitch, btScalar & roll) const
527 {
528 // first use the normal calculus
529 yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
530 pitch = btScalar(btAsin(-m_el[2].x()));
531 roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
532
533 // on pitch = +/-HalfPI
534 if (btFabs(pitch) == SIMD_HALF_PI)
535 {
536 if (yaw > 0)
537 yaw -= SIMD_PI;
538 else
539 yaw += SIMD_PI;
540
541 if (roll > 0)
542 roll -= SIMD_PI;
543 else
544 roll += SIMD_PI;
545 }
546 };
547
553 void getEulerZYX(btScalar & yaw, btScalar & pitch, btScalar & roll, unsigned int solution_number = 1) const
554 {
555 struct Euler
556 {
557 btScalar yaw;
558 btScalar pitch;
559 btScalar roll;
560 };
561
562 Euler euler_out;
563 Euler euler_out2; //second solution
564 //get the pointer to the raw data
565
566 // Check that pitch is not at a singularity
567 if (btFabs(m_el[2].x()) >= 1)
568 {
569 euler_out.yaw = 0;
570 euler_out2.yaw = 0;
571
572 // From difference of angles formula
573 btScalar delta = btAtan2(m_el[0].x(), m_el[0].z());
574 if (m_el[2].x() > 0) //gimbal locked up
575 {
576 euler_out.pitch = SIMD_PI / btScalar(2.0);
577 euler_out2.pitch = SIMD_PI / btScalar(2.0);
578 euler_out.roll = euler_out.pitch + delta;
579 euler_out2.roll = euler_out.pitch + delta;
580 }
581 else // gimbal locked down
582 {
583 euler_out.pitch = -SIMD_PI / btScalar(2.0);
584 euler_out2.pitch = -SIMD_PI / btScalar(2.0);
585 euler_out.roll = -euler_out.pitch + delta;
586 euler_out2.roll = -euler_out.pitch + delta;
587 }
588 }
589 else
590 {
591 euler_out.pitch = -btAsin(m_el[2].x());
592 euler_out2.pitch = SIMD_PI - euler_out.pitch;
593
594 euler_out.roll = btAtan2(m_el[2].y() / btCos(euler_out.pitch),
595 m_el[2].z() / btCos(euler_out.pitch));
596 euler_out2.roll = btAtan2(m_el[2].y() / btCos(euler_out2.pitch),
597 m_el[2].z() / btCos(euler_out2.pitch));
598
599 euler_out.yaw = btAtan2(m_el[1].x() / btCos(euler_out.pitch),
600 m_el[0].x() / btCos(euler_out.pitch));
601 euler_out2.yaw = btAtan2(m_el[1].x() / btCos(euler_out2.pitch),
602 m_el[0].x() / btCos(euler_out2.pitch));
603 }
604
605 if (solution_number == 1)
606 {
607 yaw = euler_out.yaw;
608 pitch = euler_out.pitch;
609 roll = euler_out.roll;
610 }
611 else
612 {
613 yaw = euler_out2.yaw;
614 pitch = euler_out2.pitch;
615 roll = euler_out2.roll;
616 }
617 }
618
623 {
624#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
625 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
626#else
627 return btMatrix3x3(
628 m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
629 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
630 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
631#endif
632 }
633
635 btScalar determinant() const;
637 btMatrix3x3 adjoint() const;
639 btMatrix3x3 absolute() const;
641 btMatrix3x3 transpose() const;
643 btMatrix3x3 inverse() const;
644
649 {
650 btVector3 col1 = getColumn(0);
651 btVector3 col2 = getColumn(1);
652 btVector3 col3 = getColumn(2);
653
654 btScalar det = btDot(col1, btCross(col2, col3));
655 if (btFabs(det) > SIMD_EPSILON)
656 {
657 det = 1.0f / det;
658 }
659 btVector3 x;
660 x[0] = det * btDot(b, btCross(col2, col3));
661 x[1] = det * btDot(col1, btCross(b, col3));
662 x[2] = det * btDot(col1, btCross(col2, b));
663 return x;
664 }
665
666 btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
667 btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
668
670 {
671 return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
672 }
674 {
675 return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
676 }
678 {
679 return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
680 }
681
688 SIMD_FORCE_INLINE void extractRotation(btQuaternion & q, btScalar tolerance = 1.0e-9, int maxIter = 100)
689 {
690 int iter = 0;
691 btScalar w;
692 const btMatrix3x3& A = *this;
693 for (iter = 0; iter < maxIter; iter++)
694 {
695 btMatrix3x3 R(q);
696 btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1)) + R.getColumn(2).cross(A.getColumn(2))) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn(1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
697 tolerance);
698 w = omega.norm();
699 if (w < tolerance)
700 break;
701 q = btQuaternion(btVector3((btScalar(1.0) / w) * omega), w) *
702 q;
703 q.normalize();
704 }
705 }
706
716 void diagonalize(btMatrix3x3 & rot, btScalar threshold, int maxSteps)
717 {
718 rot.setIdentity();
719 for (int step = maxSteps; step > 0; step--)
720 {
721 // find off-diagonal element [p][q] with largest magnitude
722 int p = 0;
723 int q = 1;
724 int r = 2;
725 btScalar max = btFabs(m_el[0][1]);
726 btScalar v = btFabs(m_el[0][2]);
727 if (v > max)
728 {
729 q = 2;
730 r = 1;
731 max = v;
732 }
733 v = btFabs(m_el[1][2]);
734 if (v > max)
735 {
736 p = 1;
737 q = 2;
738 r = 0;
739 max = v;
740 }
741
742 btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
743 if (max <= t)
744 {
745 if (max <= SIMD_EPSILON * t)
746 {
747 return;
748 }
749 step = 1;
750 }
751
752 // compute Jacobi rotation J which leads to a zero for element [p][q]
753 btScalar mpq = m_el[p][q];
754 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
755 btScalar theta2 = theta * theta;
756 btScalar cos;
757 btScalar sin;
758 if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
759 {
760 t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
761 : 1 / (theta - btSqrt(1 + theta2));
762 cos = 1 / btSqrt(1 + t * t);
763 sin = cos * t;
764 }
765 else
766 {
767 // approximation for large theta-value, i.e., a nearly diagonal matrix
768 t = 1 / (theta * (2 + btScalar(0.5) / theta2));
769 cos = 1 - btScalar(0.5) * t * t;
770 sin = cos * t;
771 }
772
773 // apply rotation to matrix (this = J^T * this * J)
774 m_el[p][q] = m_el[q][p] = 0;
775 m_el[p][p] -= t * mpq;
776 m_el[q][q] += t * mpq;
777 btScalar mrp = m_el[r][p];
778 btScalar mrq = m_el[r][q];
779 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
780 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
781
782 // apply rotation to rot (rot = rot * J)
783 for (int i = 0; i < 3; i++)
784 {
785 btVector3& row = rot[i];
786 mrp = row[p];
787 mrq = row[q];
788 row[p] = cos * mrp - sin * mrq;
789 row[q] = cos * mrq + sin * mrp;
790 }
791 }
792 }
793
801 btScalar cofac(int r1, int c1, int r2, int c2) const
802 {
803 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
804 }
805
806 void serialize(struct btMatrix3x3Data & dataOut) const;
807
808 void serializeFloat(struct btMatrix3x3FloatData & dataOut) const;
809
810 void deSerialize(const struct btMatrix3x3Data& dataIn);
811
812 void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
813
814 void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
815};
816
819{
820#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
821 __m128 rv00, rv01, rv02;
822 __m128 rv10, rv11, rv12;
823 __m128 rv20, rv21, rv22;
824 __m128 mv0, mv1, mv2;
825
826 rv02 = m_el[0].mVec128;
827 rv12 = m_el[1].mVec128;
828 rv22 = m_el[2].mVec128;
829
830 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
831 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
832 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
833
834 // rv0
835 rv00 = bt_splat_ps(rv02, 0);
836 rv01 = bt_splat_ps(rv02, 1);
837 rv02 = bt_splat_ps(rv02, 2);
838
839 rv00 = _mm_mul_ps(rv00, mv0);
840 rv01 = _mm_mul_ps(rv01, mv1);
841 rv02 = _mm_mul_ps(rv02, mv2);
842
843 // rv1
844 rv10 = bt_splat_ps(rv12, 0);
845 rv11 = bt_splat_ps(rv12, 1);
846 rv12 = bt_splat_ps(rv12, 2);
847
848 rv10 = _mm_mul_ps(rv10, mv0);
849 rv11 = _mm_mul_ps(rv11, mv1);
850 rv12 = _mm_mul_ps(rv12, mv2);
851
852 // rv2
853 rv20 = bt_splat_ps(rv22, 0);
854 rv21 = bt_splat_ps(rv22, 1);
855 rv22 = bt_splat_ps(rv22, 2);
856
857 rv20 = _mm_mul_ps(rv20, mv0);
858 rv21 = _mm_mul_ps(rv21, mv1);
859 rv22 = _mm_mul_ps(rv22, mv2);
860
861 rv00 = _mm_add_ps(rv00, rv01);
862 rv10 = _mm_add_ps(rv10, rv11);
863 rv20 = _mm_add_ps(rv20, rv21);
864
865 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
866 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
867 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
868
869#elif defined(BT_USE_NEON)
870
871 float32x4_t rv0, rv1, rv2;
872 float32x4_t v0, v1, v2;
873 float32x4_t mv0, mv1, mv2;
874
875 v0 = m_el[0].mVec128;
876 v1 = m_el[1].mVec128;
877 v2 = m_el[2].mVec128;
878
879 mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
880 mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
881 mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
882
883 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
884 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
885 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
886
887 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
888 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
889 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
890
891 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
892 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
893 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
894
895 m_el[0].mVec128 = rv0;
896 m_el[1].mVec128 = rv1;
897 m_el[2].mVec128 = rv2;
898#else
899 setValue(
900 m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
901 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
902 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
903#endif
904 return *this;
905}
906
909{
910#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
911 m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
912 m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
913 m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
914#else
915 setValue(
916 m_el[0][0] + m.m_el[0][0],
917 m_el[0][1] + m.m_el[0][1],
918 m_el[0][2] + m.m_el[0][2],
919 m_el[1][0] + m.m_el[1][0],
920 m_el[1][1] + m.m_el[1][1],
921 m_el[1][2] + m.m_el[1][2],
922 m_el[2][0] + m.m_el[2][0],
923 m_el[2][1] + m.m_el[2][1],
924 m_el[2][2] + m.m_el[2][2]);
925#endif
926 return *this;
927}
928
930operator*(const btMatrix3x3& m, const btScalar& k)
931{
932#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
933 __m128 vk = bt_splat_ps(_mm_load_ss((float*)&k), 0x80);
934 return btMatrix3x3(
935 _mm_mul_ps(m[0].mVec128, vk),
936 _mm_mul_ps(m[1].mVec128, vk),
937 _mm_mul_ps(m[2].mVec128, vk));
938#elif defined(BT_USE_NEON)
939 return btMatrix3x3(
940 vmulq_n_f32(m[0].mVec128, k),
941 vmulq_n_f32(m[1].mVec128, k),
942 vmulq_n_f32(m[2].mVec128, k));
943#else
944 return btMatrix3x3(
945 m[0].x() * k, m[0].y() * k, m[0].z() * k,
946 m[1].x() * k, m[1].y() * k, m[1].z() * k,
947 m[2].x() * k, m[2].y() * k, m[2].z() * k);
948#endif
949}
950
952operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
953{
954#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
955 return btMatrix3x3(
956 m1[0].mVec128 + m2[0].mVec128,
957 m1[1].mVec128 + m2[1].mVec128,
958 m1[2].mVec128 + m2[2].mVec128);
959#else
960 return btMatrix3x3(
961 m1[0][0] + m2[0][0],
962 m1[0][1] + m2[0][1],
963 m1[0][2] + m2[0][2],
964
965 m1[1][0] + m2[1][0],
966 m1[1][1] + m2[1][1],
967 m1[1][2] + m2[1][2],
968
969 m1[2][0] + m2[2][0],
970 m1[2][1] + m2[2][1],
971 m1[2][2] + m2[2][2]);
972#endif
973}
974
976operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
977{
978#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
979 return btMatrix3x3(
980 m1[0].mVec128 - m2[0].mVec128,
981 m1[1].mVec128 - m2[1].mVec128,
982 m1[2].mVec128 - m2[2].mVec128);
983#else
984 return btMatrix3x3(
985 m1[0][0] - m2[0][0],
986 m1[0][1] - m2[0][1],
987 m1[0][2] - m2[0][2],
988
989 m1[1][0] - m2[1][0],
990 m1[1][1] - m2[1][1],
991 m1[1][2] - m2[1][2],
992
993 m1[2][0] - m2[2][0],
994 m1[2][1] - m2[2][1],
995 m1[2][2] - m2[2][2]);
996#endif
997}
998
1001{
1002#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1003 m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
1004 m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
1005 m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
1006#else
1007 setValue(
1008 m_el[0][0] - m.m_el[0][0],
1009 m_el[0][1] - m.m_el[0][1],
1010 m_el[0][2] - m.m_el[0][2],
1011 m_el[1][0] - m.m_el[1][0],
1012 m_el[1][1] - m.m_el[1][1],
1013 m_el[1][2] - m.m_el[1][2],
1014 m_el[2][0] - m.m_el[2][0],
1015 m_el[2][1] - m.m_el[2][1],
1016 m_el[2][2] - m.m_el[2][2]);
1017#endif
1018 return *this;
1019}
1020
1023{
1024 return btTriple((*this)[0], (*this)[1], (*this)[2]);
1025}
1026
1029{
1030#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1031 return btMatrix3x3(
1032 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
1033 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
1034 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
1035#elif defined(BT_USE_NEON)
1036 return btMatrix3x3(
1037 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
1038 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
1039 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1040#else
1041 return btMatrix3x3(
1042 btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
1043 btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
1044 btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
1045#endif
1046}
1047
1050{
1051#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1052 __m128 v0 = m_el[0].mVec128;
1053 __m128 v1 = m_el[1].mVec128;
1054 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1055 __m128 vT;
1056
1057 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1058
1059 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1060 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1061
1062 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
1063 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
1064 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1065
1066 return btMatrix3x3(v0, v1, v2);
1067#elif defined(BT_USE_NEON)
1068 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1069 static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
1070 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1071 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
1072 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
1073 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
1074 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
1075 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
1076 return btMatrix3x3(v0, v1, v2);
1077#else
1078 return btMatrix3x3(m_el[0].x(), m_el[1].x(), m_el[2].x(),
1079 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1080 m_el[0].z(), m_el[1].z(), m_el[2].z());
1081#endif
1082}
1083
1086{
1087 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1088 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1089 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1090}
1091
1094{
1095 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1096 btScalar det = (*this)[0].dot(co);
1097 //btFullAssert(det != btScalar(0.0));
1098 btAssert(det != btScalar(0.0));
1099 btScalar s = btScalar(1.0) / det;
1100 return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1101 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1102 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1103}
1104
1107{
1108#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1109 // zeros w
1110 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1111 __m128 row = m_el[0].mVec128;
1112 __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, btvFFF0fMask);
1113 __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, btvFFF0fMask);
1114 __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, btvFFF0fMask);
1115 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1116 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1117 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1118 row = m_el[1].mVec128;
1119 r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1120 r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1121 r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1122 row = m_el[2].mVec128;
1123 r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1124 r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1125 r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1126 return btMatrix3x3(r0, r1, r2);
1127
1128#elif defined BT_USE_NEON
1129 // zeros w
1130 static const uint32x4_t xyzMask = (const uint32x4_t){static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0};
1131 float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask);
1132 float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask);
1133 float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask);
1134 float32x4_t row = m_el[0].mVec128;
1135 float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
1136 float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
1137 float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
1138 row = m_el[1].mVec128;
1139 r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
1140 r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
1141 r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
1142 row = m_el[2].mVec128;
1143 r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
1144 r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
1145 r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
1146 return btMatrix3x3(r0, r1, r2);
1147#else
1148 return btMatrix3x3(
1149 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1150 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1151 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1152 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1153 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1154 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1155 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1156 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1157 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1158#endif
1159}
1160
1163{
1164#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1165 __m128 a0 = m_el[0].mVec128;
1166 __m128 a1 = m_el[1].mVec128;
1167 __m128 a2 = m_el[2].mVec128;
1168
1169 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1170 __m128 mx = mT[0].mVec128;
1171 __m128 my = mT[1].mVec128;
1172 __m128 mz = mT[2].mVec128;
1173
1174 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1175 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1176 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1177 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1178 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1179 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1180 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1181 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1182 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1183 return btMatrix3x3(r0, r1, r2);
1184
1185#elif defined BT_USE_NEON
1186 float32x4_t a0 = m_el[0].mVec128;
1187 float32x4_t a1 = m_el[1].mVec128;
1188 float32x4_t a2 = m_el[2].mVec128;
1189
1190 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1191 float32x4_t mx = mT[0].mVec128;
1192 float32x4_t my = mT[1].mVec128;
1193 float32x4_t mz = mT[2].mVec128;
1194
1195 float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
1196 float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
1197 float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
1198 r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
1199 r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
1200 r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
1201 r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
1202 r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
1203 r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
1204 return btMatrix3x3(r0, r1, r2);
1205
1206#else
1207 return btMatrix3x3(
1208 m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1209 m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1210 m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1211#endif
1212}
1213
1216{
1217#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1218 return v.dot3(m[0], m[1], m[2]);
1219#else
1220 return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1221#endif
1222}
1223
1226{
1227#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1228
1229 const __m128 vv = v.mVec128;
1230
1231 __m128 c0 = bt_splat_ps(vv, 0);
1232 __m128 c1 = bt_splat_ps(vv, 1);
1233 __m128 c2 = bt_splat_ps(vv, 2);
1234
1235 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask));
1236 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask));
1237 c0 = _mm_add_ps(c0, c1);
1238 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask));
1239
1240 return btVector3(_mm_add_ps(c0, c2));
1241#elif defined(BT_USE_NEON)
1242 const float32x4_t vv = v.mVec128;
1243 const float32x2_t vlo = vget_low_f32(vv);
1244 const float32x2_t vhi = vget_high_f32(vv);
1245
1246 float32x4_t c0, c1, c2;
1247
1248 c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1249 c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1250 c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1251
1252 c0 = vmulq_lane_f32(c0, vlo, 0);
1253 c1 = vmulq_lane_f32(c1, vlo, 1);
1254 c2 = vmulq_lane_f32(c2, vhi, 0);
1255 c0 = vaddq_f32(c0, c1);
1256 c0 = vaddq_f32(c0, c2);
1257
1258 return btVector3(c0);
1259#else
1260 return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1261#endif
1262}
1263
1265operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1266{
1267#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1268
1269 __m128 m10 = m1[0].mVec128;
1270 __m128 m11 = m1[1].mVec128;
1271 __m128 m12 = m1[2].mVec128;
1272
1273 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1274
1275 __m128 c0 = bt_splat_ps(m10, 0);
1276 __m128 c1 = bt_splat_ps(m11, 0);
1277 __m128 c2 = bt_splat_ps(m12, 0);
1278
1279 c0 = _mm_mul_ps(c0, m2v);
1280 c1 = _mm_mul_ps(c1, m2v);
1281 c2 = _mm_mul_ps(c2, m2v);
1282
1283 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1284
1285 __m128 c0_1 = bt_splat_ps(m10, 1);
1286 __m128 c1_1 = bt_splat_ps(m11, 1);
1287 __m128 c2_1 = bt_splat_ps(m12, 1);
1288
1289 c0_1 = _mm_mul_ps(c0_1, m2v);
1290 c1_1 = _mm_mul_ps(c1_1, m2v);
1291 c2_1 = _mm_mul_ps(c2_1, m2v);
1292
1293 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1294
1295 c0 = _mm_add_ps(c0, c0_1);
1296 c1 = _mm_add_ps(c1, c1_1);
1297 c2 = _mm_add_ps(c2, c2_1);
1298
1299 m10 = bt_splat_ps(m10, 2);
1300 m11 = bt_splat_ps(m11, 2);
1301 m12 = bt_splat_ps(m12, 2);
1302
1303 m10 = _mm_mul_ps(m10, m2v);
1304 m11 = _mm_mul_ps(m11, m2v);
1305 m12 = _mm_mul_ps(m12, m2v);
1306
1307 c0 = _mm_add_ps(c0, m10);
1308 c1 = _mm_add_ps(c1, m11);
1309 c2 = _mm_add_ps(c2, m12);
1310
1311 return btMatrix3x3(c0, c1, c2);
1312
1313#elif defined(BT_USE_NEON)
1314
1315 float32x4_t rv0, rv1, rv2;
1316 float32x4_t v0, v1, v2;
1317 float32x4_t mv0, mv1, mv2;
1318
1319 v0 = m1[0].mVec128;
1320 v1 = m1[1].mVec128;
1321 v2 = m1[2].mVec128;
1322
1323 mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1324 mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1325 mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1326
1327 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1328 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1329 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1330
1331 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1332 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1333 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1334
1335 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1336 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1337 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1338
1339 return btMatrix3x3(rv0, rv1, rv2);
1340
1341#else
1342 return btMatrix3x3(
1343 m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
1344 m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
1345 m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
1346#endif
1347}
1348
1349/*
1350SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1351return btMatrix3x3(
1352m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1353m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1354m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1355m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1356m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1357m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1358m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1359m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1360m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1361}
1362*/
1363
1367{
1368#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1369
1370 __m128 c0, c1, c2;
1371
1372 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1373 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1374 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1375
1376 c0 = _mm_and_ps(c0, c1);
1377 c0 = _mm_and_ps(c0, c2);
1378
1379 int m = _mm_movemask_ps((__m128)c0);
1380 return (0x7 == (m & 0x7));
1381
1382#else
1383 return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1384 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1385 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
1386#endif
1387}
1388
1391{
1393};
1394
1397{
1399};
1400
1402{
1403 for (int i = 0; i < 3; i++)
1404 m_el[i].serialize(dataOut.m_el[i]);
1405}
1406
1408{
1409 for (int i = 0; i < 3; i++)
1410 m_el[i].serializeFloat(dataOut.m_el[i]);
1411}
1412
1414{
1415 for (int i = 0; i < 3; i++)
1416 m_el[i].deSerialize(dataIn.m_el[i]);
1417}
1418
1420{
1421 for (int i = 0; i < 3; i++)
1422 m_el[i].deSerializeFloat(dataIn.m_el[i]);
1423}
1424
1426{
1427 for (int i = 0; i < 3; i++)
1428 m_el[i].deSerializeDouble(dataIn.m_el[i]);
1429}
1430
1431#endif //BT_MATRIX3x3_H
unsigned int uint32_t
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:930
#define btMatrix3x3Data
Definition: btMatrix3x3.h:43
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1366
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:952
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:976
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:888
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:909
btReducedVector & operator-=(btReducedVector &v1, const btReducedVector &v2)
btReducedVector & operator+=(btReducedVector &v1, const btReducedVector &v2)
#define SIMD_PI
Definition: btScalar.h:526
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:314
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:99
btScalar btSqrt(btScalar y)
Definition: btScalar.h:466
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:518
btScalar btSin(btScalar x)
Definition: btScalar.h:499
btScalar btFabs(btScalar x)
Definition: btScalar.h:497
#define SIMD_FORCE_INLINE
Definition: btScalar.h:98
#define btFullAssert(x)
Definition: btScalar.h:156
btScalar btCos(btScalar x)
Definition: btScalar.h:498
#define SIMD_EPSILON
Definition: btScalar.h:543
btScalar btAsin(btScalar x)
Definition: btScalar.h:509
#define SIMD_HALF_PI
Definition: btScalar.h:528
#define btAssert(x)
Definition: btScalar.h:153
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:890
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:918
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:924
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:50
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:303
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1085
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1093
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:526
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:188
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:648
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:1049
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:289
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:716
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:1000
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:420
btMatrix3x3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2)
Definition: btMatrix3x3.h:131
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:70
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1419
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:622
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:123
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:115
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:553
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:350
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:677
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:323
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:908
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:157
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:669
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:367
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:1022
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:165
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1106
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1425
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1401
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
Definition: btMatrix3x3.h:688
void setZero()
Set the matrix to the identity.
Definition: btMatrix3x3.h:337
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:673
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:52
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1407
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:142
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:818
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1413
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:204
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:56
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:215
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:149
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:801
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:61
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1162
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:1028
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:117
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:115
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:149
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:113
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:50
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:364
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:385
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:82
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:380
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:229
btScalar norm() const
Return the norm (length) of the vector.
Definition: btVector3.h:263
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:720
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:640
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577
for serialization
Definition: btMatrix3x3.h:1397
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1398
for serialization
Definition: btMatrix3x3.h:1391
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1392