25#define vMPPP (_mm_set_ps(+0.0f, +0.0f, +0.0f, -0.0f))
28#if defined(BT_USE_SSE)
29#define v0000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
30#define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f))
31#define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f))
32#define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f))
33#elif defined(BT_USE_NEON)
40#ifdef BT_USE_DOUBLE_PRECISION
41#define btMatrix3x3Data btMatrix3x3DoubleData
43#define btMatrix3x3Data btMatrix3x3FloatData
79#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
97 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
98 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
99 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
105 m_el[0].mVec128 = m.
m_el[0].mVec128;
106 m_el[1].mVec128 = m.
m_el[1].mVec128;
107 m_el[2].mVec128 = m.
m_el[2].mVec128;
117 m_el[0] = other.
m_el[0];
118 m_el[1] = other.
m_el[1];
119 m_el[2] = other.
m_el[2];
125 m_el[0] = other.
m_el[0];
126 m_el[1] = other.
m_el[1];
127 m_el[2] = other.
m_el[2];
144 return btVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
192 m_el[2].
setValue(m[2], m[6], m[10]);
221#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
222 __m128 vs, Q = q.get128();
223 __m128i Qi = btCastfTo128i(Q);
226 __m128 V11, V21, V31;
227 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
228 __m128i NQi = btCastfTo128i(NQ);
230 V1 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 2, 3)));
231 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0, 0, 1, 3));
232 V3 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(2, 1, 0, 3)));
233 V1 = _mm_xor_ps(V1, vMPPP);
235 V11 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 1, 0, 3)));
236 V21 = _mm_unpackhi_ps(Q, Q);
237 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0, 2, 0, 3));
243 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2, 3, 1, 3));
245 V21 = _mm_xor_ps(V21, vMPPP);
246 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3, 3, 1, 3));
247 V31 = _mm_xor_ps(V31, vMPPP);
248 Y = btCastiTo128f(_mm_shuffle_epi32(NQi, BT_SHUFFLE(3, 2, 0, 3)));
249 Z = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 1, 3)));
251 vs = _mm_load_ss(&s);
259 vs = bt_splat3_ps(vs, 0);
273 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
274 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
275 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
276 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
278 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
279 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
280 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
291 setEulerZYX(roll, pitch, yaw);
317 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
318 cj * sh, sj * ss + cc, sj * cs - sc,
319 -sj, cj * si, cj * ci);
325#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
339#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
352#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
354 identityMatrix(v1000, v0100, v0010);
362 return identityMatrix;
369#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
370 __m128 v0 = m_el[0].mVec128;
371 __m128 v1 = m_el[1].mVec128;
372 __m128 v2 = m_el[2].mVec128;
373 __m128* vm = (__m128*)m;
376 v2 = _mm_and_ps(v2, btvFFF0fMask);
378 vT = _mm_unpackhi_ps(v0, v1);
379 v0 = _mm_unpacklo_ps(v0, v1);
381 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3));
382 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3));
383 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
388#elif defined(BT_USE_NEON)
390 static const uint32x2_t zMask = (
const uint32x2_t){
static_cast<uint32_t>(-1), 0};
391 float32x4_t* vm = (float32x4_t*)m;
392 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128);
393 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f));
394 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
395 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
396 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
397 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);
422#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
423 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
435 temp.f[0] = m_el[2].
y() - m_el[1].
z();
436 temp.f[1] = m_el[0].
z() - m_el[2].
x();
437 temp.f[2] = m_el[1].
x() - m_el[0].
y();
444 if (m_el[0].x() < m_el[1].y())
446 if (m_el[1].y() < m_el[2].z())
461 if (m_el[0].x() < m_el[2].z())
475 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
477 temp.f[3] = (m_el[k][j] - m_el[j][k]);
478 temp.f[j] = (m_el[j][i] + m_el[i][j]);
479 temp.f[k] = (m_el[k][i] + m_el[i][k]);
490 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
500 temp[0] = ((m_el[2].
y() - m_el[1].
z()) * s);
501 temp[1] = ((m_el[0].
z() - m_el[2].
x()) * s);
502 temp[2] = ((m_el[1].
x() - m_el[0].
y()) * s);
506 int i = m_el[0].
x() < m_el[1].
y() ? (m_el[1].
y() < m_el[2].
z() ? 2 : 1) : (m_el[0].x() < m_el[2].
z() ? 2 : 0);
514 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
515 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
516 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
518 q.
setValue(temp[0], temp[1], temp[2], temp[3]);
567 if (
btFabs(m_el[2].x()) >= 1)
578 euler_out.roll = euler_out.pitch + delta;
579 euler_out2.roll = euler_out.pitch + delta;
585 euler_out.roll = -euler_out.pitch + delta;
586 euler_out2.roll = -euler_out.pitch + delta;
591 euler_out.pitch = -
btAsin(m_el[2].x());
592 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
594 euler_out.roll =
btAtan2(m_el[2].y() /
btCos(euler_out.pitch),
595 m_el[2].
z() /
btCos(euler_out.pitch));
596 euler_out2.roll =
btAtan2(m_el[2].y() /
btCos(euler_out2.pitch),
597 m_el[2].
z() /
btCos(euler_out2.pitch));
599 euler_out.yaw =
btAtan2(m_el[1].x() /
btCos(euler_out.pitch),
600 m_el[0].
x() /
btCos(euler_out.pitch));
601 euler_out2.yaw =
btAtan2(m_el[1].x() /
btCos(euler_out2.pitch),
602 m_el[0].
x() /
btCos(euler_out2.pitch));
605 if (solution_number == 1)
608 pitch = euler_out.pitch;
609 roll = euler_out.roll;
613 yaw = euler_out2.yaw;
614 pitch = euler_out2.pitch;
615 roll = euler_out2.roll;
624#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
625 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
628 m_el[0].x() * s.
x(), m_el[0].
y() * s.
y(), m_el[0].
z() * s.
z(),
629 m_el[1].
x() * s.
x(), m_el[1].
y() * s.
y(), m_el[1].
z() * s.
z(),
630 m_el[2].
x() * s.
x(), m_el[2].
y() * s.
y(), m_el[2].
z() * s.
z());
671 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
675 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
679 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
693 for (iter = 0; iter < maxIter; iter++)
719 for (
int step = maxSteps; step > 0; step--)
754 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
760 t = (theta >= 0) ? 1 / (theta +
btSqrt(1 + theta2))
761 : 1 / (theta -
btSqrt(1 + theta2));
762 cos = 1 /
btSqrt(1 + t * t);
768 t = 1 / (theta * (2 +
btScalar(0.5) / theta2));
774 m_el[p][q] = m_el[q][p] = 0;
775 m_el[p][p] -= t * mpq;
776 m_el[q][q] += t * mpq;
779 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
780 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
783 for (
int i = 0; i < 3; i++)
788 row[p] = cos * mrp - sin * mrq;
789 row[q] = cos * mrq + sin * mrp;
803 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
820#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
821 __m128 rv00, rv01, rv02;
822 __m128 rv10, rv11, rv12;
823 __m128 rv20, rv21, rv22;
824 __m128 mv0, mv1, mv2;
826 rv02 =
m_el[0].mVec128;
827 rv12 =
m_el[1].mVec128;
828 rv22 =
m_el[2].mVec128;
830 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
831 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
832 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
835 rv00 = bt_splat_ps(rv02, 0);
836 rv01 = bt_splat_ps(rv02, 1);
837 rv02 = bt_splat_ps(rv02, 2);
839 rv00 = _mm_mul_ps(rv00, mv0);
840 rv01 = _mm_mul_ps(rv01, mv1);
841 rv02 = _mm_mul_ps(rv02, mv2);
844 rv10 = bt_splat_ps(rv12, 0);
845 rv11 = bt_splat_ps(rv12, 1);
846 rv12 = bt_splat_ps(rv12, 2);
848 rv10 = _mm_mul_ps(rv10, mv0);
849 rv11 = _mm_mul_ps(rv11, mv1);
850 rv12 = _mm_mul_ps(rv12, mv2);
853 rv20 = bt_splat_ps(rv22, 0);
854 rv21 = bt_splat_ps(rv22, 1);
855 rv22 = bt_splat_ps(rv22, 2);
857 rv20 = _mm_mul_ps(rv20, mv0);
858 rv21 = _mm_mul_ps(rv21, mv1);
859 rv22 = _mm_mul_ps(rv22, mv2);
861 rv00 = _mm_add_ps(rv00, rv01);
862 rv10 = _mm_add_ps(rv10, rv11);
863 rv20 = _mm_add_ps(rv20, rv21);
865 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
866 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
867 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
869#elif defined(BT_USE_NEON)
871 float32x4_t rv0, rv1, rv2;
872 float32x4_t v0, v1, v2;
873 float32x4_t mv0, mv1, mv2;
875 v0 =
m_el[0].mVec128;
876 v1 =
m_el[1].mVec128;
877 v2 =
m_el[2].mVec128;
879 mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
880 mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
881 mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
883 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
884 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
885 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
887 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
888 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
889 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
891 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
892 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
893 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
895 m_el[0].mVec128 = rv0;
896 m_el[1].mVec128 = rv1;
897 m_el[2].mVec128 = rv2;
910#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
932#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
933 __m128 vk = bt_splat_ps(_mm_load_ss((
float*)&k), 0x80);
935 _mm_mul_ps(m[0].mVec128, vk),
936 _mm_mul_ps(m[1].mVec128, vk),
937 _mm_mul_ps(m[2].mVec128, vk));
938#elif defined(BT_USE_NEON)
940 vmulq_n_f32(m[0].mVec128, k),
941 vmulq_n_f32(m[1].mVec128, k),
942 vmulq_n_f32(m[2].mVec128, k));
945 m[0].x() * k, m[0].y() * k, m[0].z() * k,
946 m[1].x() * k, m[1].y() * k, m[1].z() * k,
947 m[2].x() * k, m[2].y() * k, m[2].z() * k);
954#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
956 m1[0].mVec128 + m2[0].mVec128,
957 m1[1].mVec128 + m2[1].mVec128,
958 m1[2].mVec128 + m2[2].mVec128);
971 m1[2][2] + m2[2][2]);
978#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
980 m1[0].mVec128 - m2[0].mVec128,
981 m1[1].mVec128 - m2[1].mVec128,
982 m1[2].mVec128 - m2[2].mVec128);
995 m1[2][2] - m2[2][2]);
1002#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1024 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
1030#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1032 _mm_and_ps(
m_el[0].mVec128, btvAbsfMask),
1033 _mm_and_ps(
m_el[1].mVec128, btvAbsfMask),
1034 _mm_and_ps(
m_el[2].mVec128, btvAbsfMask));
1035#elif defined(BT_USE_NEON)
1037 (float32x4_t)vandq_s32((int32x4_t)
m_el[0].mVec128, btv3AbsMask),
1038 (float32x4_t)vandq_s32((int32x4_t)
m_el[1].mVec128, btv3AbsMask),
1039 (float32x4_t)vandq_s32((int32x4_t)
m_el[2].mVec128, btv3AbsMask));
1051#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1052 __m128 v0 =
m_el[0].mVec128;
1053 __m128 v1 =
m_el[1].mVec128;
1054 __m128 v2 =
m_el[2].mVec128;
1057 v2 = _mm_and_ps(v2, btvFFF0fMask);
1059 vT = _mm_unpackhi_ps(v0, v1);
1060 v0 = _mm_unpacklo_ps(v0, v1);
1062 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3));
1063 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3));
1064 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
1067#elif defined(BT_USE_NEON)
1069 static const uint32x2_t zMask = (
const uint32x2_t){
static_cast<uint32_t>(-1), 0};
1070 float32x4x2_t top = vtrnq_f32(
m_el[0].mVec128,
m_el[1].mVec128);
1071 float32x2x2_t bl = vtrn_f32(vget_low_f32(
m_el[2].mVec128), vdup_n_f32(0.0f));
1072 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
1073 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
1074 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(
m_el[2].mVec128), zMask);
1075 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);
1087 return btMatrix3x3(
cofac(1, 1, 2, 2),
cofac(0, 2, 2, 1),
cofac(0, 1, 1, 2),
1088 cofac(1, 2, 2, 0),
cofac(0, 0, 2, 2),
cofac(0, 2, 1, 0),
1089 cofac(1, 0, 2, 1),
cofac(0, 1, 2, 0),
cofac(0, 0, 1, 1));
1095 btVector3 co(
cofac(1, 1, 2, 2),
cofac(1, 2, 2, 0),
cofac(1, 0, 2, 1));
1101 co.
y() * s,
cofac(0, 0, 2, 2) * s,
cofac(0, 2, 1, 0) * s,
1102 co.
z() * s,
cofac(0, 1, 2, 0) * s,
cofac(0, 0, 1, 1) * s);
1108#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1111 __m128 row =
m_el[0].mVec128;
1112 __m128 m0 = _mm_and_ps(m.
getRow(0).mVec128, btvFFF0fMask);
1113 __m128 m1 = _mm_and_ps(m.
getRow(1).mVec128, btvFFF0fMask);
1114 __m128 m2 = _mm_and_ps(m.
getRow(2).mVec128, btvFFF0fMask);
1115 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1116 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1117 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1118 row =
m_el[1].mVec128;
1119 r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1120 r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1121 r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1122 row =
m_el[2].mVec128;
1123 r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1124 r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1125 r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1128#elif defined BT_USE_NEON
1130 static const uint32x4_t xyzMask = (
const uint32x4_t){
static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1), 0};
1131 float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(0).mVec128, xyzMask);
1132 float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(1).mVec128, xyzMask);
1133 float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(2).mVec128, xyzMask);
1134 float32x4_t row =
m_el[0].mVec128;
1135 float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
1136 float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
1137 float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
1138 row =
m_el[1].mVec128;
1139 r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
1140 r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
1141 r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
1142 row =
m_el[2].mVec128;
1143 r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
1144 r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
1145 r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
1149 m_el[0].x() * m[0].x() +
m_el[1].x() * m[1].x() +
m_el[2].x() * m[2].x(),
1150 m_el[0].x() * m[0].y() +
m_el[1].x() * m[1].y() +
m_el[2].x() * m[2].y(),
1151 m_el[0].x() * m[0].z() +
m_el[1].x() * m[1].z() +
m_el[2].x() * m[2].z(),
1152 m_el[0].y() * m[0].x() +
m_el[1].y() * m[1].x() +
m_el[2].y() * m[2].x(),
1153 m_el[0].y() * m[0].y() +
m_el[1].y() * m[1].y() +
m_el[2].y() * m[2].y(),
1154 m_el[0].y() * m[0].z() +
m_el[1].y() * m[1].z() +
m_el[2].y() * m[2].z(),
1155 m_el[0].z() * m[0].x() +
m_el[1].z() * m[1].x() +
m_el[2].z() * m[2].x(),
1156 m_el[0].z() * m[0].y() +
m_el[1].z() * m[1].y() +
m_el[2].z() * m[2].y(),
1157 m_el[0].z() * m[0].z() +
m_el[1].z() * m[1].z() +
m_el[2].z() * m[2].z());
1164#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1165 __m128 a0 =
m_el[0].mVec128;
1166 __m128 a1 =
m_el[1].mVec128;
1167 __m128 a2 =
m_el[2].mVec128;
1170 __m128 mx = mT[0].mVec128;
1171 __m128 my = mT[1].mVec128;
1172 __m128 mz = mT[2].mVec128;
1174 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1175 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1176 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1177 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1178 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1179 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1180 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1181 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1182 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1185#elif defined BT_USE_NEON
1186 float32x4_t a0 =
m_el[0].mVec128;
1187 float32x4_t a1 =
m_el[1].mVec128;
1188 float32x4_t a2 =
m_el[2].mVec128;
1191 float32x4_t mx = mT[0].mVec128;
1192 float32x4_t my = mT[1].mVec128;
1193 float32x4_t mz = mT[2].mVec128;
1195 float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
1196 float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
1197 float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
1198 r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
1199 r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
1200 r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
1201 r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
1202 r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
1203 r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
1217#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1218 return v.
dot3(m[0], m[1], m[2]);
1227#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1229 const __m128 vv = v.mVec128;
1231 __m128 c0 = bt_splat_ps(vv, 0);
1232 __m128 c1 = bt_splat_ps(vv, 1);
1233 __m128 c2 = bt_splat_ps(vv, 2);
1235 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask));
1236 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask));
1237 c0 = _mm_add_ps(c0, c1);
1238 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask));
1241#elif defined(BT_USE_NEON)
1242 const float32x4_t vv = v.mVec128;
1243 const float32x2_t vlo = vget_low_f32(vv);
1244 const float32x2_t vhi = vget_high_f32(vv);
1246 float32x4_t c0, c1, c2;
1248 c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1249 c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1250 c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1252 c0 = vmulq_lane_f32(c0, vlo, 0);
1253 c1 = vmulq_lane_f32(c1, vlo, 1);
1254 c2 = vmulq_lane_f32(c2, vhi, 0);
1255 c0 = vaddq_f32(c0, c1);
1256 c0 = vaddq_f32(c0, c2);
1267#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1269 __m128 m10 = m1[0].mVec128;
1270 __m128 m11 = m1[1].mVec128;
1271 __m128 m12 = m1[2].mVec128;
1273 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1275 __m128 c0 = bt_splat_ps(m10, 0);
1276 __m128 c1 = bt_splat_ps(m11, 0);
1277 __m128 c2 = bt_splat_ps(m12, 0);
1279 c0 = _mm_mul_ps(c0, m2v);
1280 c1 = _mm_mul_ps(c1, m2v);
1281 c2 = _mm_mul_ps(c2, m2v);
1283 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1285 __m128 c0_1 = bt_splat_ps(m10, 1);
1286 __m128 c1_1 = bt_splat_ps(m11, 1);
1287 __m128 c2_1 = bt_splat_ps(m12, 1);
1289 c0_1 = _mm_mul_ps(c0_1, m2v);
1290 c1_1 = _mm_mul_ps(c1_1, m2v);
1291 c2_1 = _mm_mul_ps(c2_1, m2v);
1293 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1295 c0 = _mm_add_ps(c0, c0_1);
1296 c1 = _mm_add_ps(c1, c1_1);
1297 c2 = _mm_add_ps(c2, c2_1);
1299 m10 = bt_splat_ps(m10, 2);
1300 m11 = bt_splat_ps(m11, 2);
1301 m12 = bt_splat_ps(m12, 2);
1303 m10 = _mm_mul_ps(m10, m2v);
1304 m11 = _mm_mul_ps(m11, m2v);
1305 m12 = _mm_mul_ps(m12, m2v);
1307 c0 = _mm_add_ps(c0, m10);
1308 c1 = _mm_add_ps(c1, m11);
1309 c2 = _mm_add_ps(c2, m12);
1313#elif defined(BT_USE_NEON)
1315 float32x4_t rv0, rv1, rv2;
1316 float32x4_t v0, v1, v2;
1317 float32x4_t mv0, mv1, mv2;
1323 mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1324 mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1325 mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1327 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1328 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1329 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1331 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1332 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1333 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1335 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1336 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1337 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1368#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1372 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1373 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1374 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1376 c0 = _mm_and_ps(c0, c1);
1377 c0 = _mm_and_ps(c0, c2);
1379 int m = _mm_movemask_ps((__m128)c0);
1380 return (0x7 == (m & 0x7));
1383 return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1384 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1385 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
1403 for (
int i = 0; i < 3; i++)
1409 for (
int i = 0; i < 3; i++)
1415 for (
int i = 0; i < 3; i++)
1421 for (
int i = 0; i < 3; i++)
1427 for (
int i = 0; i < 3; i++)
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
btReducedVector & operator-=(btReducedVector &v1, const btReducedVector &v2)
btReducedVector & operator+=(btReducedVector &v1, const btReducedVector &v2)
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
#define ATTRIBUTE_ALIGNED16(a)
btScalar btSqrt(btScalar y)
btScalar btAtan2(btScalar x, btScalar y)
btScalar btSin(btScalar x)
btScalar btFabs(btScalar x)
#define SIMD_FORCE_INLINE
btScalar btCos(btScalar x)
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
btMatrix3x3 inverse() const
Return the inverse of the matrix.
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
btMatrix3x3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2)
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
static const btMatrix3x3 & getIdentity()
btScalar tdotz(const btVector3 &v) const
void setIdentity()
Set the matrix to the identity.
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
btScalar tdotx(const btVector3 &v) const
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
btScalar determinant() const
Return the determinant of the matrix.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
void serialize(struct btMatrix3x3Data &dataOut) const
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
void setZero()
Set the matrix to the identity.
btScalar tdoty(const btVector3 &v) const
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
void deSerialize(const struct btMatrix3x3Data &dataIn)
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btMatrix3x3()
No initializaion constructor.
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
const btScalar & w() const
Return the w value.
const btScalar & z() const
Return the z value.
const btScalar & y() const
Return the y value.
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
const btScalar & x() const
Return the x value.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
btScalar length2() const
Return the length squared of the quaternion.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
btVector3 can be used to represent 3D points and vectors.
const btScalar & z() const
Return the z value.
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btScalar dot(const btVector3 &v) const
Return the dot product.
btScalar norm() const
Return the norm (length) of the vector.
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
const btScalar & x() const
Return the x value.
const btScalar & y() const
Return the y value.
btVector3DoubleData m_el[3]
btVector3FloatData m_el[3]