b3Vector3.h 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304
  1. /*
  2. Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
  3. This software is provided 'as-is', without any express or implied warranty.
  4. In no event will the authors be held liable for any damages arising from the use of this software.
  5. Permission is granted to anyone to use this software for any purpose,
  6. including commercial applications, and to alter it and redistribute it freely,
  7. subject to the following restrictions:
  8. 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  9. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  10. 3. This notice may not be removed or altered from any source distribution.
  11. */
  12. #ifndef B3_VECTOR3_H
  13. #define B3_VECTOR3_H
  14. //#include <stdint.h>
  15. #include "b3Scalar.h"
  16. #include "b3MinMax.h"
  17. #include "b3AlignedAllocator.h"
  18. #ifdef B3_USE_DOUBLE_PRECISION
  19. #define b3Vector3Data b3Vector3DoubleData
  20. #define b3Vector3DataName "b3Vector3DoubleData"
  21. #else
  22. #define b3Vector3Data b3Vector3FloatData
  23. #define b3Vector3DataName "b3Vector3FloatData"
  24. #endif //B3_USE_DOUBLE_PRECISION
  25. #if defined B3_USE_SSE
  26. //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
  27. #ifdef _MSC_VER
  28. #pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
  29. #endif
  30. #define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff)
  31. //#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
  32. #define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
  33. #define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3))
  34. #define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i))
  35. #define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  36. #define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  37. #define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
  38. #define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
  39. #define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
  40. #define b3vxyzMaskf b3vFFF0fMask
  41. #define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
  42. const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
  43. const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
  44. const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
  45. const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
  46. #endif
  47. #ifdef B3_USE_NEON
  48. const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
  49. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
  50. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
  51. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
  52. #endif
  53. class b3Vector3;
  54. class b3Vector4;
  55. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  56. //#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  57. inline b3Vector3 b3MakeVector3(b3SimdFloat4 v);
  58. inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
  59. #endif
  60. inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z);
  61. inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
  62. inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
  63. /**@brief b3Vector3 can be used to represent 3D points and vectors.
  64. * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
  65. * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
  66. */
  67. B3_ATTRIBUTE_ALIGNED16(class)
  68. b3Vector3
  69. {
  70. public:
  71. #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
  72. union {
  73. b3SimdFloat4 mVec128;
  74. float m_floats[4];
  75. struct
  76. {
  77. float x, y, z, w;
  78. };
  79. };
  80. #else
  81. union {
  82. float m_floats[4];
  83. struct
  84. {
  85. float x, y, z, w;
  86. };
  87. };
  88. #endif
  89. public:
  90. B3_DECLARE_ALIGNED_ALLOCATOR();
  91. #if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
  92. /*B3_FORCE_INLINE b3Vector3()
  93. {
  94. }
  95. */
  96. B3_FORCE_INLINE b3SimdFloat4 get128() const
  97. {
  98. return mVec128;
  99. }
  100. B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
  101. {
  102. mVec128 = v128;
  103. }
  104. #endif
  105. public:
  106. /**@brief Add a vector to this one
  107. * @param The vector to add to this one */
  108. B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
  109. {
  110. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  111. mVec128 = _mm_add_ps(mVec128, v.mVec128);
  112. #elif defined(B3_USE_NEON)
  113. mVec128 = vaddq_f32(mVec128, v.mVec128);
  114. #else
  115. m_floats[0] += v.m_floats[0];
  116. m_floats[1] += v.m_floats[1];
  117. m_floats[2] += v.m_floats[2];
  118. #endif
  119. return *this;
  120. }
  121. /**@brief Subtract a vector from this one
  122. * @param The vector to subtract */
  123. B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
  124. {
  125. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  126. mVec128 = _mm_sub_ps(mVec128, v.mVec128);
  127. #elif defined(B3_USE_NEON)
  128. mVec128 = vsubq_f32(mVec128, v.mVec128);
  129. #else
  130. m_floats[0] -= v.m_floats[0];
  131. m_floats[1] -= v.m_floats[1];
  132. m_floats[2] -= v.m_floats[2];
  133. #endif
  134. return *this;
  135. }
  136. /**@brief Scale the vector
  137. * @param s Scale factor */
  138. B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
  139. {
  140. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  141. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  142. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  143. mVec128 = _mm_mul_ps(mVec128, vs);
  144. #elif defined(B3_USE_NEON)
  145. mVec128 = vmulq_n_f32(mVec128, s);
  146. #else
  147. m_floats[0] *= s;
  148. m_floats[1] *= s;
  149. m_floats[2] *= s;
  150. #endif
  151. return *this;
  152. }
  153. /**@brief Inversely scale the vector
  154. * @param s Scale factor to divide by */
  155. B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
  156. {
  157. b3FullAssert(s != b3Scalar(0.0));
  158. #if 0 //defined(B3_USE_SSE_IN_API)
  159. // this code is not faster !
  160. __m128 vs = _mm_load_ss(&s);
  161. vs = _mm_div_ss(b3v1110, vs);
  162. vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
  163. mVec128 = _mm_mul_ps(mVec128, vs);
  164. return *this;
  165. #else
  166. return *this *= b3Scalar(1.0) / s;
  167. #endif
  168. }
  169. /**@brief Return the dot product
  170. * @param v The other vector in the dot product */
  171. B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
  172. {
  173. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  174. __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
  175. __m128 z = _mm_movehl_ps(vd, vd);
  176. __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
  177. vd = _mm_add_ss(vd, y);
  178. vd = _mm_add_ss(vd, z);
  179. return _mm_cvtss_f32(vd);
  180. #elif defined(B3_USE_NEON)
  181. float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
  182. float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
  183. x = vadd_f32(x, vget_high_f32(vd));
  184. return vget_lane_f32(x, 0);
  185. #else
  186. return m_floats[0] * v.m_floats[0] +
  187. m_floats[1] * v.m_floats[1] +
  188. m_floats[2] * v.m_floats[2];
  189. #endif
  190. }
  191. /**@brief Return the length of the vector squared */
  192. B3_FORCE_INLINE b3Scalar length2() const
  193. {
  194. return dot(*this);
  195. }
  196. /**@brief Return the length of the vector */
  197. B3_FORCE_INLINE b3Scalar length() const
  198. {
  199. return b3Sqrt(length2());
  200. }
  201. /**@brief Return the distance squared between the ends of this and another vector
  202. * This is symantically treating the vector like a point */
  203. B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
  204. /**@brief Return the distance between the ends of this and another vector
  205. * This is symantically treating the vector like a point */
  206. B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
  207. B3_FORCE_INLINE b3Vector3& safeNormalize()
  208. {
  209. b3Scalar l2 = length2();
  210. //triNormal.normalize();
  211. if (l2 >= B3_EPSILON * B3_EPSILON)
  212. {
  213. (*this) /= b3Sqrt(l2);
  214. }
  215. else
  216. {
  217. setValue(1, 0, 0);
  218. }
  219. return *this;
  220. }
  221. /**@brief Normalize this vector
  222. * x^2 + y^2 + z^2 = 1 */
  223. B3_FORCE_INLINE b3Vector3& normalize()
  224. {
  225. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  226. // dot product first
  227. __m128 vd = _mm_mul_ps(mVec128, mVec128);
  228. __m128 z = _mm_movehl_ps(vd, vd);
  229. __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
  230. vd = _mm_add_ss(vd, y);
  231. vd = _mm_add_ss(vd, z);
  232. #if 0
  233. vd = _mm_sqrt_ss(vd);
  234. vd = _mm_div_ss(b3v1110, vd);
  235. vd = b3_splat_ps(vd, 0x80);
  236. mVec128 = _mm_mul_ps(mVec128, vd);
  237. #else
  238. // NR step 1/sqrt(x) - vd is x, y is output
  239. y = _mm_rsqrt_ss(vd); // estimate
  240. // one step NR
  241. z = b3v1_5;
  242. vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
  243. //x2 = vd;
  244. vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
  245. vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
  246. z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
  247. y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
  248. y = b3_splat_ps(y, 0x80);
  249. mVec128 = _mm_mul_ps(mVec128, y);
  250. #endif
  251. return *this;
  252. #else
  253. return *this /= length();
  254. #endif
  255. }
  256. /**@brief Return a normalized version of this vector */
  257. B3_FORCE_INLINE b3Vector3 normalized() const;
  258. /**@brief Return a rotated version of this vector
  259. * @param wAxis The axis to rotate about
  260. * @param angle The angle to rotate by */
  261. B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const;
  262. /**@brief Return the angle between this and another vector
  263. * @param v The other vector */
  264. B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
  265. {
  266. b3Scalar s = b3Sqrt(length2() * v.length2());
  267. b3FullAssert(s != b3Scalar(0.0));
  268. return b3Acos(dot(v) / s);
  269. }
  270. /**@brief Return a vector will the absolute values of each element */
  271. B3_FORCE_INLINE b3Vector3 absolute() const
  272. {
  273. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  274. return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
  275. #elif defined(B3_USE_NEON)
  276. return b3Vector3(vabsq_f32(mVec128));
  277. #else
  278. return b3MakeVector3(
  279. b3Fabs(m_floats[0]),
  280. b3Fabs(m_floats[1]),
  281. b3Fabs(m_floats[2]));
  282. #endif
  283. }
  284. /**@brief Return the cross product between this and another vector
  285. * @param v The other vector */
  286. B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
  287. {
  288. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  289. __m128 T, V;
  290. T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  291. V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  292. V = _mm_mul_ps(V, mVec128);
  293. T = _mm_mul_ps(T, v.mVec128);
  294. V = _mm_sub_ps(V, T);
  295. V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
  296. return b3MakeVector3(V);
  297. #elif defined(B3_USE_NEON)
  298. float32x4_t T, V;
  299. // form (Y, Z, X, _) of mVec128 and v.mVec128
  300. float32x2_t Tlow = vget_low_f32(mVec128);
  301. float32x2_t Vlow = vget_low_f32(v.mVec128);
  302. T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
  303. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
  304. V = vmulq_f32(V, mVec128);
  305. T = vmulq_f32(T, v.mVec128);
  306. V = vsubq_f32(V, T);
  307. Vlow = vget_low_f32(V);
  308. // form (Y, Z, X, _);
  309. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
  310. V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
  311. return b3Vector3(V);
  312. #else
  313. return b3MakeVector3(
  314. m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
  315. m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
  316. m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
  317. #endif
  318. }
  319. B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
  320. {
  321. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  322. // cross:
  323. __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  324. __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  325. V = _mm_mul_ps(V, v1.mVec128);
  326. T = _mm_mul_ps(T, v2.mVec128);
  327. V = _mm_sub_ps(V, T);
  328. V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
  329. // dot:
  330. V = _mm_mul_ps(V, mVec128);
  331. __m128 z = _mm_movehl_ps(V, V);
  332. __m128 y = _mm_shuffle_ps(V, V, 0x55);
  333. V = _mm_add_ss(V, y);
  334. V = _mm_add_ss(V, z);
  335. return _mm_cvtss_f32(V);
  336. #elif defined(B3_USE_NEON)
  337. // cross:
  338. float32x4_t T, V;
  339. // form (Y, Z, X, _) of mVec128 and v.mVec128
  340. float32x2_t Tlow = vget_low_f32(v1.mVec128);
  341. float32x2_t Vlow = vget_low_f32(v2.mVec128);
  342. T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
  343. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
  344. V = vmulq_f32(V, v1.mVec128);
  345. T = vmulq_f32(T, v2.mVec128);
  346. V = vsubq_f32(V, T);
  347. Vlow = vget_low_f32(V);
  348. // form (Y, Z, X, _);
  349. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
  350. // dot:
  351. V = vmulq_f32(mVec128, V);
  352. float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
  353. x = vadd_f32(x, vget_high_f32(V));
  354. return vget_lane_f32(x, 0);
  355. #else
  356. return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
  357. m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
  358. m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
  359. #endif
  360. }
  361. /**@brief Return the axis with the smallest value
  362. * Note return values are 0,1,2 for x, y, or z */
  363. B3_FORCE_INLINE int minAxis() const
  364. {
  365. return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
  366. }
  367. /**@brief Return the axis with the largest value
  368. * Note return values are 0,1,2 for x, y, or z */
  369. B3_FORCE_INLINE int maxAxis() const
  370. {
  371. return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
  372. }
  373. B3_FORCE_INLINE int furthestAxis() const
  374. {
  375. return absolute().minAxis();
  376. }
  377. B3_FORCE_INLINE int closestAxis() const
  378. {
  379. return absolute().maxAxis();
  380. }
  381. B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
  382. {
  383. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  384. __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
  385. b3Scalar s = b3Scalar(1.0) - rt;
  386. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  387. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  388. __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
  389. vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
  390. __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
  391. __m128 tmp3 = _mm_add_ps(r0, r1);
  392. mVec128 = tmp3;
  393. #elif defined(B3_USE_NEON)
  394. float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
  395. vl = vmulq_n_f32(vl, rt);
  396. mVec128 = vaddq_f32(vl, v0.mVec128);
  397. #else
  398. b3Scalar s = b3Scalar(1.0) - rt;
  399. m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
  400. m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
  401. m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
  402. //don't do the unused w component
  403. // m_co[3] = s * v0[3] + rt * v1[3];
  404. #endif
  405. }
  406. /**@brief Return the linear interpolation between this and another vector
  407. * @param v The other vector
  408. * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
  409. B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
  410. {
  411. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  412. __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
  413. vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
  414. __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
  415. vl = _mm_mul_ps(vl, vt);
  416. vl = _mm_add_ps(vl, mVec128);
  417. return b3MakeVector3(vl);
  418. #elif defined(B3_USE_NEON)
  419. float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
  420. vl = vmulq_n_f32(vl, t);
  421. vl = vaddq_f32(vl, mVec128);
  422. return b3Vector3(vl);
  423. #else
  424. return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
  425. m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
  426. m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
  427. #endif
  428. }
  429. /**@brief Elementwise multiply this vector by the other
  430. * @param v The other vector */
  431. B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
  432. {
  433. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  434. mVec128 = _mm_mul_ps(mVec128, v.mVec128);
  435. #elif defined(B3_USE_NEON)
  436. mVec128 = vmulq_f32(mVec128, v.mVec128);
  437. #else
  438. m_floats[0] *= v.m_floats[0];
  439. m_floats[1] *= v.m_floats[1];
  440. m_floats[2] *= v.m_floats[2];
  441. #endif
  442. return *this;
  443. }
  444. /**@brief Return the x value */
  445. B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
  446. /**@brief Return the y value */
  447. B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
  448. /**@brief Return the z value */
  449. B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
  450. /**@brief Return the w value */
  451. B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
  452. /**@brief Set the x value */
  453. B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
  454. /**@brief Set the y value */
  455. B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
  456. /**@brief Set the z value */
  457. B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
  458. /**@brief Set the w value */
  459. B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
  460. //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
  461. //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
  462. ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
  463. B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
  464. B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
  465. B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
  466. {
  467. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  468. return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
  469. #else
  470. return ((m_floats[3] == other.m_floats[3]) &&
  471. (m_floats[2] == other.m_floats[2]) &&
  472. (m_floats[1] == other.m_floats[1]) &&
  473. (m_floats[0] == other.m_floats[0]));
  474. #endif
  475. }
  476. B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
  477. {
  478. return !(*this == other);
  479. }
  480. /**@brief Set each element to the max of the current values and the values of another b3Vector3
  481. * @param other The other b3Vector3 to compare with
  482. */
  483. B3_FORCE_INLINE void setMax(const b3Vector3& other)
  484. {
  485. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  486. mVec128 = _mm_max_ps(mVec128, other.mVec128);
  487. #elif defined(B3_USE_NEON)
  488. mVec128 = vmaxq_f32(mVec128, other.mVec128);
  489. #else
  490. b3SetMax(m_floats[0], other.m_floats[0]);
  491. b3SetMax(m_floats[1], other.m_floats[1]);
  492. b3SetMax(m_floats[2], other.m_floats[2]);
  493. b3SetMax(m_floats[3], other.m_floats[3]);
  494. #endif
  495. }
  496. /**@brief Set each element to the min of the current values and the values of another b3Vector3
  497. * @param other The other b3Vector3 to compare with
  498. */
  499. B3_FORCE_INLINE void setMin(const b3Vector3& other)
  500. {
  501. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  502. mVec128 = _mm_min_ps(mVec128, other.mVec128);
  503. #elif defined(B3_USE_NEON)
  504. mVec128 = vminq_f32(mVec128, other.mVec128);
  505. #else
  506. b3SetMin(m_floats[0], other.m_floats[0]);
  507. b3SetMin(m_floats[1], other.m_floats[1]);
  508. b3SetMin(m_floats[2], other.m_floats[2]);
  509. b3SetMin(m_floats[3], other.m_floats[3]);
  510. #endif
  511. }
  512. B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
  513. {
  514. m_floats[0] = _x;
  515. m_floats[1] = _y;
  516. m_floats[2] = _z;
  517. m_floats[3] = b3Scalar(0.f);
  518. }
  519. void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const
  520. {
  521. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  522. __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
  523. __m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
  524. __m128 V2 = _mm_movelh_ps(V0, V);
  525. __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
  526. V0 = _mm_shuffle_ps(V0, V, 0xDB);
  527. V2 = _mm_shuffle_ps(V2, V, 0xF9);
  528. v0->mVec128 = V0;
  529. v1->mVec128 = V1;
  530. v2->mVec128 = V2;
  531. #else
  532. v0->setValue(0., -getZ(), getY());
  533. v1->setValue(getZ(), 0., -getX());
  534. v2->setValue(-getY(), getX(), 0.);
  535. #endif
  536. }
  537. void setZero()
  538. {
  539. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  540. mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
  541. #elif defined(B3_USE_NEON)
  542. int32x4_t vi = vdupq_n_s32(0);
  543. mVec128 = vreinterpretq_f32_s32(vi);
  544. #else
  545. setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
  546. #endif
  547. }
  548. B3_FORCE_INLINE bool isZero() const
  549. {
  550. return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
  551. }
  552. B3_FORCE_INLINE bool fuzzyZero() const
  553. {
  554. return length2() < B3_EPSILON;
  555. }
  556. B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const;
  557. B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
  558. B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const;
  559. B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
  560. B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const;
  561. B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
  562. /**@brief returns index of maximum dot product between this and vectors in array[]
  563. * @param array The other vectors
  564. * @param array_count The number of other vectors
  565. * @param dotOut The maximum dot product */
  566. B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
  567. /**@brief returns index of minimum dot product between this and vectors in array[]
  568. * @param array The other vectors
  569. * @param array_count The number of other vectors
  570. * @param dotOut The minimum dot product */
  571. B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
  572. /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */
  573. B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const
  574. {
  575. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  576. __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
  577. __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
  578. __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
  579. __m128 b0 = _mm_unpacklo_ps(a0, a1);
  580. __m128 b1 = _mm_unpackhi_ps(a0, a1);
  581. __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
  582. __m128 r = _mm_movelh_ps(b0, b2);
  583. r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
  584. a2 = _mm_and_ps(a2, b3vxyzMaskf);
  585. r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1))));
  586. return b3MakeVector3(r);
  587. #elif defined(B3_USE_NEON)
  588. static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
  589. float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
  590. float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
  591. float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
  592. float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
  593. a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
  594. float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
  595. float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
  596. return b3Vector3(vcombine_f32(b0, b1));
  597. #else
  598. return b3MakeVector3(dot(v0), dot(v1), dot(v2));
  599. #endif
  600. }
  601. };
  602. /**@brief Return the sum of two vectors (Point symantics)*/
  603. B3_FORCE_INLINE b3Vector3
  604. operator+(const b3Vector3& v1, const b3Vector3& v2)
  605. {
  606. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  607. return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
  608. #elif defined(B3_USE_NEON)
  609. return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
  610. #else
  611. return b3MakeVector3(
  612. v1.m_floats[0] + v2.m_floats[0],
  613. v1.m_floats[1] + v2.m_floats[1],
  614. v1.m_floats[2] + v2.m_floats[2]);
  615. #endif
  616. }
  617. /**@brief Return the elementwise product of two vectors */
  618. B3_FORCE_INLINE b3Vector3
  619. operator*(const b3Vector3& v1, const b3Vector3& v2)
  620. {
  621. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  622. return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
  623. #elif defined(B3_USE_NEON)
  624. return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
  625. #else
  626. return b3MakeVector3(
  627. v1.m_floats[0] * v2.m_floats[0],
  628. v1.m_floats[1] * v2.m_floats[1],
  629. v1.m_floats[2] * v2.m_floats[2]);
  630. #endif
  631. }
  632. /**@brief Return the difference between two vectors */
  633. B3_FORCE_INLINE b3Vector3
  634. operator-(const b3Vector3& v1, const b3Vector3& v2)
  635. {
  636. #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
  637. // without _mm_and_ps this code causes slowdown in Concave moving
  638. __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
  639. return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
  640. #elif defined(B3_USE_NEON)
  641. float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
  642. return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
  643. #else
  644. return b3MakeVector3(
  645. v1.m_floats[0] - v2.m_floats[0],
  646. v1.m_floats[1] - v2.m_floats[1],
  647. v1.m_floats[2] - v2.m_floats[2]);
  648. #endif
  649. }
  650. /**@brief Return the negative of the vector */
  651. B3_FORCE_INLINE b3Vector3
  652. operator-(const b3Vector3& v)
  653. {
  654. #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
  655. __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
  656. return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
  657. #elif defined(B3_USE_NEON)
  658. return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
  659. #else
  660. return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
  661. #endif
  662. }
  663. /**@brief Return the vector scaled by s */
  664. B3_FORCE_INLINE b3Vector3
  665. operator*(const b3Vector3& v, const b3Scalar& s)
  666. {
  667. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  668. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  669. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  670. return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
  671. #elif defined(B3_USE_NEON)
  672. float32x4_t r = vmulq_n_f32(v.mVec128, s);
  673. return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
  674. #else
  675. return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
  676. #endif
  677. }
  678. /**@brief Return the vector scaled by s */
  679. B3_FORCE_INLINE b3Vector3
  680. operator*(const b3Scalar& s, const b3Vector3& v)
  681. {
  682. return v * s;
  683. }
  684. /**@brief Return the vector inversely scaled by s */
  685. B3_FORCE_INLINE b3Vector3
  686. operator/(const b3Vector3& v, const b3Scalar& s)
  687. {
  688. b3FullAssert(s != b3Scalar(0.0));
  689. #if 0 //defined(B3_USE_SSE_IN_API)
  690. // this code is not faster !
  691. __m128 vs = _mm_load_ss(&s);
  692. vs = _mm_div_ss(b3v1110, vs);
  693. vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
  694. return b3Vector3(_mm_mul_ps(v.mVec128, vs));
  695. #else
  696. return v * (b3Scalar(1.0) / s);
  697. #endif
  698. }
  699. /**@brief Return the vector inversely scaled by s */
  700. B3_FORCE_INLINE b3Vector3
  701. operator/(const b3Vector3& v1, const b3Vector3& v2)
  702. {
  703. #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
  704. __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
  705. vec = _mm_and_ps(vec, b3vFFF0fMask);
  706. return b3MakeVector3(vec);
  707. #elif defined(B3_USE_NEON)
  708. float32x4_t x, y, v, m;
  709. x = v1.mVec128;
  710. y = v2.mVec128;
  711. v = vrecpeq_f32(y); // v ~ 1/y
  712. m = vrecpsq_f32(y, v); // m = (2-v*y)
  713. v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
  714. m = vrecpsq_f32(y, v); // mm = (2-vv*y)
  715. v = vmulq_f32(v, x); // x*vv
  716. v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
  717. return b3Vector3(v);
  718. #else
  719. return b3MakeVector3(
  720. v1.m_floats[0] / v2.m_floats[0],
  721. v1.m_floats[1] / v2.m_floats[1],
  722. v1.m_floats[2] / v2.m_floats[2]);
  723. #endif
  724. }
  725. /**@brief Return the dot product between two vectors */
  726. B3_FORCE_INLINE b3Scalar
  727. b3Dot(const b3Vector3& v1, const b3Vector3& v2)
  728. {
  729. return v1.dot(v2);
  730. }
  731. /**@brief Return the distance squared between two vectors */
  732. B3_FORCE_INLINE b3Scalar
  733. b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
  734. {
  735. return v1.distance2(v2);
  736. }
  737. /**@brief Return the distance between two vectors */
  738. B3_FORCE_INLINE b3Scalar
  739. b3Distance(const b3Vector3& v1, const b3Vector3& v2)
  740. {
  741. return v1.distance(v2);
  742. }
  743. /**@brief Return the angle between two vectors */
  744. B3_FORCE_INLINE b3Scalar
  745. b3Angle(const b3Vector3& v1, const b3Vector3& v2)
  746. {
  747. return v1.angle(v2);
  748. }
  749. /**@brief Return the cross product of two vectors */
  750. B3_FORCE_INLINE b3Vector3
  751. b3Cross(const b3Vector3& v1, const b3Vector3& v2)
  752. {
  753. return v1.cross(v2);
  754. }
  755. B3_FORCE_INLINE b3Scalar
  756. b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
  757. {
  758. return v1.triple(v2, v3);
  759. }
  760. /**@brief Return the linear interpolation between two vectors
  761. * @param v1 One vector
  762. * @param v2 The other vector
  763. * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
  764. B3_FORCE_INLINE b3Vector3
  765. b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
  766. {
  767. return v1.lerp(v2, t);
  768. }
  769. B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
  770. {
  771. return (v - *this).length2();
  772. }
  773. B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
  774. {
  775. return (v - *this).length();
  776. }
  777. B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
  778. {
  779. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  780. b3Vector3 norm = *this;
  781. return norm.normalize();
  782. #else
  783. return *this / length();
  784. #endif
  785. }
  786. B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const
  787. {
  788. // wAxis must be a unit lenght vector
  789. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  790. __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
  791. b3Scalar ssin = b3Sin(_angle);
  792. __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128;
  793. O = _mm_and_ps(O, b3vFFF0fMask);
  794. b3Scalar scos = b3Cos(_angle);
  795. __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
  796. __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
  797. __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0)
  798. __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0)
  799. O = _mm_add_ps(O, Y);
  800. vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0)
  801. O = _mm_add_ps(O, Z);
  802. vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0)
  803. vsin = vsin * C;
  804. O = O * wAxis.mVec128;
  805. __m128 X = mVec128 - O;
  806. O = O + vsin;
  807. vcos = vcos * X;
  808. O = O + vcos;
  809. return b3MakeVector3(O);
  810. #else
  811. b3Vector3 o = wAxis * wAxis.dot(*this);
  812. b3Vector3 _x = *this - o;
  813. b3Vector3 _y;
  814. _y = wAxis.cross(*this);
  815. return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle));
  816. #endif
  817. }
  818. B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
  819. {
  820. #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
  821. #if defined _WIN32 || defined(B3_USE_SSE)
  822. const long scalar_cutoff = 10;
  823. long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
  824. #elif defined B3_USE_NEON
  825. const long scalar_cutoff = 4;
  826. extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
  827. #endif
  828. if (array_count < scalar_cutoff)
  829. #else
  830. #endif //B3_USE_SSE || B3_USE_NEON
  831. {
  832. b3Scalar maxDot = -B3_INFINITY;
  833. int i = 0;
  834. int ptIndex = -1;
  835. for (i = 0; i < array_count; i++)
  836. {
  837. b3Scalar dot = array[i].dot(*this);
  838. if (dot > maxDot)
  839. {
  840. maxDot = dot;
  841. ptIndex = i;
  842. }
  843. }
  844. b3Assert(ptIndex >= 0);
  845. if (ptIndex < 0)
  846. {
  847. ptIndex = 0;
  848. }
  849. dotOut = maxDot;
  850. return ptIndex;
  851. }
  852. #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
  853. return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
  854. #endif
  855. }
  856. B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
  857. {
  858. #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
  859. #if defined B3_USE_SSE
  860. const long scalar_cutoff = 10;
  861. long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
  862. #elif defined B3_USE_NEON
  863. const long scalar_cutoff = 4;
  864. extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
  865. #else
  866. #error unhandled arch!
  867. #endif
  868. if (array_count < scalar_cutoff)
  869. #endif //B3_USE_SSE || B3_USE_NEON
  870. {
  871. b3Scalar minDot = B3_INFINITY;
  872. int i = 0;
  873. int ptIndex = -1;
  874. for (i = 0; i < array_count; i++)
  875. {
  876. b3Scalar dot = array[i].dot(*this);
  877. if (dot < minDot)
  878. {
  879. minDot = dot;
  880. ptIndex = i;
  881. }
  882. }
  883. dotOut = minDot;
  884. return ptIndex;
  885. }
  886. #if defined(B3_USE_SSE) || defined(B3_USE_NEON)
  887. return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
  888. #endif
  889. }
  890. class b3Vector4 : public b3Vector3
  891. {
  892. public:
  893. B3_FORCE_INLINE b3Vector4 absolute4() const
  894. {
  895. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  896. return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
  897. #elif defined(B3_USE_NEON)
  898. return b3Vector4(vabsq_f32(mVec128));
  899. #else
  900. return b3MakeVector4(
  901. b3Fabs(m_floats[0]),
  902. b3Fabs(m_floats[1]),
  903. b3Fabs(m_floats[2]),
  904. b3Fabs(m_floats[3]));
  905. #endif
  906. }
  907. b3Scalar getW() const { return m_floats[3]; }
  908. B3_FORCE_INLINE int maxAxis4() const
  909. {
  910. int maxIndex = -1;
  911. b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
  912. if (m_floats[0] > maxVal)
  913. {
  914. maxIndex = 0;
  915. maxVal = m_floats[0];
  916. }
  917. if (m_floats[1] > maxVal)
  918. {
  919. maxIndex = 1;
  920. maxVal = m_floats[1];
  921. }
  922. if (m_floats[2] > maxVal)
  923. {
  924. maxIndex = 2;
  925. maxVal = m_floats[2];
  926. }
  927. if (m_floats[3] > maxVal)
  928. {
  929. maxIndex = 3;
  930. }
  931. return maxIndex;
  932. }
  933. B3_FORCE_INLINE int minAxis4() const
  934. {
  935. int minIndex = -1;
  936. b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
  937. if (m_floats[0] < minVal)
  938. {
  939. minIndex = 0;
  940. minVal = m_floats[0];
  941. }
  942. if (m_floats[1] < minVal)
  943. {
  944. minIndex = 1;
  945. minVal = m_floats[1];
  946. }
  947. if (m_floats[2] < minVal)
  948. {
  949. minIndex = 2;
  950. minVal = m_floats[2];
  951. }
  952. if (m_floats[3] < minVal)
  953. {
  954. minIndex = 3;
  955. minVal = m_floats[3];
  956. }
  957. return minIndex;
  958. }
  959. B3_FORCE_INLINE int closestAxis4() const
  960. {
  961. return absolute4().maxAxis4();
  962. }
  963. /**@brief Set x,y,z and zero w
  964. * @param x Value of x
  965. * @param y Value of y
  966. * @param z Value of z
  967. */
  968. /* void getValue(b3Scalar *m) const
  969. {
  970. m[0] = m_floats[0];
  971. m[1] = m_floats[1];
  972. m[2] =m_floats[2];
  973. }
  974. */
  975. /**@brief Set the values
  976. * @param x Value of x
  977. * @param y Value of y
  978. * @param z Value of z
  979. * @param w Value of w
  980. */
  981. B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
  982. {
  983. m_floats[0] = _x;
  984. m_floats[1] = _y;
  985. m_floats[2] = _z;
  986. m_floats[3] = _w;
  987. }
  988. };
  989. ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  990. B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
  991. {
  992. #ifdef B3_USE_DOUBLE_PRECISION
  993. unsigned char* dest = (unsigned char*)&destVal;
  994. unsigned char* src = (unsigned char*)&sourceVal;
  995. dest[0] = src[7];
  996. dest[1] = src[6];
  997. dest[2] = src[5];
  998. dest[3] = src[4];
  999. dest[4] = src[3];
  1000. dest[5] = src[2];
  1001. dest[6] = src[1];
  1002. dest[7] = src[0];
  1003. #else
  1004. unsigned char* dest = (unsigned char*)&destVal;
  1005. unsigned char* src = (unsigned char*)&sourceVal;
  1006. dest[0] = src[3];
  1007. dest[1] = src[2];
  1008. dest[2] = src[1];
  1009. dest[3] = src[0];
  1010. #endif //B3_USE_DOUBLE_PRECISION
  1011. }
  1012. ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  1013. B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
  1014. {
  1015. for (int i = 0; i < 4; i++)
  1016. {
  1017. b3SwapScalarEndian(sourceVec[i], destVec[i]);
  1018. }
  1019. }
  1020. ///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  1021. B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
  1022. {
  1023. b3Vector3 swappedVec;
  1024. for (int i = 0; i < 4; i++)
  1025. {
  1026. b3SwapScalarEndian(vector[i], swappedVec[i]);
  1027. }
  1028. vector = swappedVec;
  1029. }
  1030. template <class T>
  1031. B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q)
  1032. {
  1033. if (b3Fabs(n[2]) > B3_SQRT12)
  1034. {
  1035. // choose p in y-z plane
  1036. b3Scalar a = n[1] * n[1] + n[2] * n[2];
  1037. b3Scalar k = b3RecipSqrt(a);
  1038. p[0] = 0;
  1039. p[1] = -n[2] * k;
  1040. p[2] = n[1] * k;
  1041. // set q = n x p
  1042. q[0] = a * k;
  1043. q[1] = -n[0] * p[2];
  1044. q[2] = n[0] * p[1];
  1045. }
  1046. else
  1047. {
  1048. // choose p in x-y plane
  1049. b3Scalar a = n[0] * n[0] + n[1] * n[1];
  1050. b3Scalar k = b3RecipSqrt(a);
  1051. p[0] = -n[1] * k;
  1052. p[1] = n[0] * k;
  1053. p[2] = 0;
  1054. // set q = n x p
  1055. q[0] = -n[2] * p[1];
  1056. q[1] = n[2] * p[0];
  1057. q[2] = a * k;
  1058. }
  1059. }
  1060. struct b3Vector3FloatData
  1061. {
  1062. float m_floats[4];
  1063. };
  1064. struct b3Vector3DoubleData
  1065. {
  1066. double m_floats[4];
  1067. };
  1068. B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
  1069. {
  1070. ///could also do a memcpy, check if it is worth it
  1071. for (int i = 0; i < 4; i++)
  1072. dataOut.m_floats[i] = float(m_floats[i]);
  1073. }
  1074. B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
  1075. {
  1076. for (int i = 0; i < 4; i++)
  1077. m_floats[i] = b3Scalar(dataIn.m_floats[i]);
  1078. }
  1079. B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
  1080. {
  1081. ///could also do a memcpy, check if it is worth it
  1082. for (int i = 0; i < 4; i++)
  1083. dataOut.m_floats[i] = double(m_floats[i]);
  1084. }
  1085. B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
  1086. {
  1087. for (int i = 0; i < 4; i++)
  1088. m_floats[i] = b3Scalar(dataIn.m_floats[i]);
  1089. }
  1090. B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
  1091. {
  1092. ///could also do a memcpy, check if it is worth it
  1093. for (int i = 0; i < 4; i++)
  1094. dataOut.m_floats[i] = m_floats[i];
  1095. }
  1096. B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
  1097. {
  1098. for (int i = 0; i < 4; i++)
  1099. m_floats[i] = dataIn.m_floats[i];
  1100. }
  1101. inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z)
  1102. {
  1103. b3Vector3 tmp;
  1104. tmp.setValue(x, y, z);
  1105. return tmp;
  1106. }
  1107. inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
  1108. {
  1109. b3Vector3 tmp;
  1110. tmp.setValue(x, y, z);
  1111. tmp.w = w;
  1112. return tmp;
  1113. }
  1114. inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
  1115. {
  1116. b3Vector4 tmp;
  1117. tmp.setValue(x, y, z, w);
  1118. return tmp;
  1119. }
  1120. #if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
  1121. inline b3Vector3 b3MakeVector3(b3SimdFloat4 v)
  1122. {
  1123. b3Vector3 tmp;
  1124. tmp.set128(v);
  1125. return tmp;
  1126. }
  1127. inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
  1128. {
  1129. b3Vector4 tmp;
  1130. tmp.set128(vec);
  1131. return tmp;
  1132. }
  1133. #endif
  1134. #endif //B3_VECTOR3_H