bezier_curve.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/default.h"
  5. //#include "../common/scene_curves.h"
  6. #include "../common/context.h"
  7. namespace embree
  8. {
  9. class BezierBasis
  10. {
  11. public:
  12. template<typename T>
  13. static __forceinline Vec4<T> eval(const T& u)
  14. {
  15. const T t1 = u;
  16. const T t0 = 1.0f-t1;
  17. const T B0 = t0 * t0 * t0;
  18. const T B1 = 3.0f * t1 * (t0 * t0);
  19. const T B2 = 3.0f * (t1 * t1) * t0;
  20. const T B3 = t1 * t1 * t1;
  21. return Vec4<T>(B0,B1,B2,B3);
  22. }
  23. template<typename T>
  24. static __forceinline Vec4<T> derivative(const T& u)
  25. {
  26. const T t1 = u;
  27. const T t0 = 1.0f-t1;
  28. const T B0 = -(t0*t0);
  29. const T B1 = madd(-2.0f,t0*t1,t0*t0);
  30. const T B2 = msub(+2.0f,t0*t1,t1*t1);
  31. const T B3 = +(t1*t1);
  32. return T(3.0f)*Vec4<T>(B0,B1,B2,B3);
  33. }
  34. template<typename T>
  35. static __forceinline Vec4<T> derivative2(const T& u)
  36. {
  37. const T t1 = u;
  38. const T t0 = 1.0f-t1;
  39. const T B0 = t0;
  40. const T B1 = madd(-2.0f,t0,t1);
  41. const T B2 = madd(-2.0f,t1,t0);
  42. const T B3 = t1;
  43. return T(6.0f)*Vec4<T>(B0,B1,B2,B3);
  44. }
  45. };
  46. struct PrecomputedBezierBasis
  47. {
  48. enum { N = 16 };
  49. public:
  50. PrecomputedBezierBasis() {}
  51. PrecomputedBezierBasis(int shift);
  52. /* basis for bezier evaluation */
  53. public:
  54. float c0[N+1][N+1];
  55. float c1[N+1][N+1];
  56. float c2[N+1][N+1];
  57. float c3[N+1][N+1];
  58. /* basis for bezier derivative evaluation */
  59. public:
  60. float d0[N+1][N+1];
  61. float d1[N+1][N+1];
  62. float d2[N+1][N+1];
  63. float d3[N+1][N+1];
  64. };
  65. extern PrecomputedBezierBasis bezier_basis0;
  66. extern PrecomputedBezierBasis bezier_basis1;
  67. template<typename V>
  68. struct LinearBezierCurve
  69. {
  70. V v0,v1;
  71. __forceinline LinearBezierCurve () {}
  72. __forceinline LinearBezierCurve (const LinearBezierCurve& other)
  73. : v0(other.v0), v1(other.v1) {}
  74. __forceinline LinearBezierCurve& operator= (const LinearBezierCurve& other) {
  75. v0 = other.v0; v1 = other.v1; return *this;
  76. }
  77. __forceinline LinearBezierCurve (const V& v0, const V& v1)
  78. : v0(v0), v1(v1) {}
  79. __forceinline V begin() const { return v0; }
  80. __forceinline V end () const { return v1; }
  81. bool hasRoot() const;
  82. friend embree_ostream operator<<(embree_ostream cout, const LinearBezierCurve& a) {
  83. return cout << "LinearBezierCurve (" << a.v0 << ", " << a.v1 << ")";
  84. }
  85. };
  86. template<> __forceinline bool LinearBezierCurve<Interval1f>::hasRoot() const {
  87. return numRoots(v0,v1);
  88. }
  89. template<typename V>
  90. struct QuadraticBezierCurve
  91. {
  92. V v0,v1,v2;
  93. __forceinline QuadraticBezierCurve () {}
  94. __forceinline QuadraticBezierCurve (const QuadraticBezierCurve& other)
  95. : v0(other.v0), v1(other.v1), v2(other.v2) {}
  96. __forceinline QuadraticBezierCurve& operator= (const QuadraticBezierCurve& other) {
  97. v0 = other.v0; v1 = other.v1; v2 = other.v2; return *this;
  98. }
  99. __forceinline QuadraticBezierCurve (const V& v0, const V& v1, const V& v2)
  100. : v0(v0), v1(v1), v2(v2) {}
  101. __forceinline V begin() const { return v0; }
  102. __forceinline V end () const { return v2; }
  103. __forceinline V interval() const {
  104. return merge(v0,v1,v2);
  105. }
  106. __forceinline BBox<V> bounds() const {
  107. return merge(BBox<V>(v0),BBox<V>(v1),BBox<V>(v2));
  108. }
  109. friend embree_ostream operator<<(embree_ostream cout, const QuadraticBezierCurve& a) {
  110. return cout << "QuadraticBezierCurve ( (" << a.u.lower << ", " << a.u.upper << "), " << a.v0 << ", " << a.v1 << ", " << a.v2 << ")";
  111. }
  112. };
  113. typedef QuadraticBezierCurve<float> QuadraticBezierCurve1f;
  114. typedef QuadraticBezierCurve<Vec2fa> QuadraticBezierCurve2fa;
  115. typedef QuadraticBezierCurve<Vec3fa> QuadraticBezierCurve3fa;
  116. template<typename Vertex>
  117. struct CubicBezierCurve
  118. {
  119. Vertex v0,v1,v2,v3;
  120. __forceinline CubicBezierCurve() {}
  121. template<typename T1>
  122. __forceinline CubicBezierCurve (const CubicBezierCurve<T1>& other)
  123. : v0(other.v0), v1(other.v1), v2(other.v2), v3(other.v3) {}
  124. __forceinline CubicBezierCurve& operator= (const CubicBezierCurve& other) {
  125. v0 = other.v0; v1 = other.v1; v2 = other.v2; v3 = other.v3; return *this;
  126. }
  127. __forceinline CubicBezierCurve(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
  128. : v0(v0), v1(v1), v2(v2), v3(v3) {}
  129. __forceinline Vertex begin() const {
  130. return v0;
  131. }
  132. __forceinline Vertex end() const {
  133. return v3;
  134. }
  135. __forceinline Vertex center() const {
  136. return 0.25f*(v0+v1+v2+v3);
  137. }
  138. __forceinline Vertex begin_direction() const {
  139. return v1-v0;
  140. }
  141. __forceinline Vertex end_direction() const {
  142. return v3-v2;
  143. }
  144. __forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const {
  145. return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
  146. }
  147. __forceinline CubicBezierCurve<vfloatx> vxfm(const Vertex& dx) const {
  148. return CubicBezierCurve<vfloatx>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
  149. }
  150. __forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const {
  151. return CubicBezierCurve<float>(dot(v0-p,dx),dot(v1-p,dx),dot(v2-p,dx),dot(v3-p,dx));
  152. }
  153. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space) const
  154. {
  155. const Vec3fa q0 = xfmVector(space,v0);
  156. const Vec3fa q1 = xfmVector(space,v1);
  157. const Vec3fa q2 = xfmVector(space,v2);
  158. const Vec3fa q3 = xfmVector(space,v3);
  159. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  160. }
  161. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const
  162. {
  163. const Vec3fa q0 = xfmVector(space,v0-p);
  164. const Vec3fa q1 = xfmVector(space,v1-p);
  165. const Vec3fa q2 = xfmVector(space,v2-p);
  166. const Vec3fa q3 = xfmVector(space,v3-p);
  167. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  168. }
  169. __forceinline CubicBezierCurve<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
  170. {
  171. const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
  172. const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
  173. const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
  174. const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
  175. return CubicBezierCurve<Vec3ff>(q0,q1,q2,q3);
  176. }
  177. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const
  178. {
  179. const Vec3fa q0 = xfmVector(space,s*(v0-p));
  180. const Vec3fa q1 = xfmVector(space,s*(v1-p));
  181. const Vec3fa q2 = xfmVector(space,s*(v2-p));
  182. const Vec3fa q3 = xfmVector(space,s*(v3-p));
  183. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  184. }
  185. __forceinline int maxRoots() const;
  186. __forceinline BBox<Vertex> bounds() const {
  187. return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
  188. }
  189. __forceinline friend CubicBezierCurve operator +( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  190. return CubicBezierCurve(a.v0+b.v0,a.v1+b.v1,a.v2+b.v2,a.v3+b.v3);
  191. }
  192. __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  193. return CubicBezierCurve(a.v0-b.v0,a.v1-b.v1,a.v2-b.v2,a.v3-b.v3);
  194. }
  195. __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const Vertex& b ) {
  196. return CubicBezierCurve(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
  197. }
  198. __forceinline friend CubicBezierCurve operator *( const Vertex& a, const CubicBezierCurve& b ) {
  199. return CubicBezierCurve(a*b.v0,a*b.v1,a*b.v2,a*b.v3);
  200. }
  201. __forceinline friend CubicBezierCurve cmadd( const Vertex& a, const CubicBezierCurve& b, const CubicBezierCurve& c) {
  202. return CubicBezierCurve(madd(a,b.v0,c.v0),madd(a,b.v1,c.v1),madd(a,b.v2,c.v2),madd(a,b.v3,c.v3));
  203. }
  204. __forceinline friend CubicBezierCurve clerp ( const CubicBezierCurve& a, const CubicBezierCurve& b, const Vertex& t ) {
  205. return cmadd((Vertex(1.0f)-t),a,t*b);
  206. }
  207. __forceinline friend CubicBezierCurve merge ( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  208. return CubicBezierCurve(merge(a.v0,b.v0),merge(a.v1,b.v1),merge(a.v2,b.v2),merge(a.v3,b.v3));
  209. }
  210. __forceinline void split(CubicBezierCurve& left, CubicBezierCurve& right, const float t = 0.5f) const
  211. {
  212. const Vertex p00 = v0;
  213. const Vertex p01 = v1;
  214. const Vertex p02 = v2;
  215. const Vertex p03 = v3;
  216. const Vertex p10 = lerp(p00,p01,t);
  217. const Vertex p11 = lerp(p01,p02,t);
  218. const Vertex p12 = lerp(p02,p03,t);
  219. const Vertex p20 = lerp(p10,p11,t);
  220. const Vertex p21 = lerp(p11,p12,t);
  221. const Vertex p30 = lerp(p20,p21,t);
  222. new (&left ) CubicBezierCurve(p00,p10,p20,p30);
  223. new (&right) CubicBezierCurve(p30,p21,p12,p03);
  224. }
  225. __forceinline CubicBezierCurve<Vec2vfx> split() const
  226. {
  227. const float u0 = 0.0f, u1 = 1.0f;
  228. const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
  229. const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
  230. Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
  231. const Vec2vfx P3 = shift_right_1(P0);
  232. const Vec2vfx dP3du = shift_right_1(dP0du);
  233. const Vec2vfx P1 = P0 + dP0du;
  234. const Vec2vfx P2 = P3 - dP3du;
  235. return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
  236. }
  237. __forceinline CubicBezierCurve<Vec2vfx> split(const BBox1f& u) const
  238. {
  239. const float u0 = u.lower, u1 = u.upper;
  240. const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
  241. const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
  242. Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
  243. const Vec2vfx P3 = shift_right_1(P0);
  244. const Vec2vfx dP3du = shift_right_1(dP0du);
  245. const Vec2vfx P1 = P0 + dP0du;
  246. const Vec2vfx P2 = P3 - dP3du;
  247. return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
  248. }
  249. __forceinline void eval(float t, Vertex& p, Vertex& dp) const
  250. {
  251. const Vertex p00 = v0;
  252. const Vertex p01 = v1;
  253. const Vertex p02 = v2;
  254. const Vertex p03 = v3;
  255. const Vertex p10 = lerp(p00,p01,t);
  256. const Vertex p11 = lerp(p01,p02,t);
  257. const Vertex p12 = lerp(p02,p03,t);
  258. const Vertex p20 = lerp(p10,p11,t);
  259. const Vertex p21 = lerp(p11,p12,t);
  260. const Vertex p30 = lerp(p20,p21,t);
  261. p = p30;
  262. dp = Vertex(3.0f)*(p21-p20);
  263. }
  264. #if 0
  265. __forceinline Vertex eval(float t) const
  266. {
  267. const Vertex p00 = v0;
  268. const Vertex p01 = v1;
  269. const Vertex p02 = v2;
  270. const Vertex p03 = v3;
  271. const Vertex p10 = lerp(p00,p01,t);
  272. const Vertex p11 = lerp(p01,p02,t);
  273. const Vertex p12 = lerp(p02,p03,t);
  274. const Vertex p20 = lerp(p10,p11,t);
  275. const Vertex p21 = lerp(p11,p12,t);
  276. const Vertex p30 = lerp(p20,p21,t);
  277. return p30;
  278. }
  279. #else
  280. __forceinline Vertex eval(const float t) const
  281. {
  282. const Vec4<float> b = BezierBasis::eval(t);
  283. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  284. }
  285. #endif
  286. __forceinline Vertex eval_dt(float t) const
  287. {
  288. const Vertex p00 = v1-v0;
  289. const Vertex p01 = v2-v1;
  290. const Vertex p02 = v3-v2;
  291. const Vertex p10 = lerp(p00,p01,t);
  292. const Vertex p11 = lerp(p01,p02,t);
  293. const Vertex p20 = lerp(p10,p11,t);
  294. return Vertex(3.0f)*p20;
  295. }
  296. __forceinline Vertex eval_du(const float t) const
  297. {
  298. const Vec4<float> b = BezierBasis::derivative(t);
  299. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  300. }
  301. __forceinline Vertex eval_dudu(const float t) const
  302. {
  303. const Vec4<float> b = BezierBasis::derivative2(t);
  304. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  305. }
  306. __forceinline void evalN(const vfloatx& t, Vec2vfx& p, Vec2vfx& dp) const
  307. {
  308. const Vec2vfx p00 = v0;
  309. const Vec2vfx p01 = v1;
  310. const Vec2vfx p02 = v2;
  311. const Vec2vfx p03 = v3;
  312. const Vec2vfx p10 = lerp(p00,p01,t);
  313. const Vec2vfx p11 = lerp(p01,p02,t);
  314. const Vec2vfx p12 = lerp(p02,p03,t);
  315. const Vec2vfx p20 = lerp(p10,p11,t);
  316. const Vec2vfx p21 = lerp(p11,p12,t);
  317. const Vec2vfx p30 = lerp(p20,p21,t);
  318. p = p30;
  319. dp = vfloatx(3.0f)*(p21-p20);
  320. }
  321. __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
  322. {
  323. const Vertex p00 = v0;
  324. const Vertex p01 = v1;
  325. const Vertex p02 = v2;
  326. const Vertex p03 = v3;
  327. const Vertex p10 = lerp(p00,p01,t);
  328. const Vertex p11 = lerp(p01,p02,t);
  329. const Vertex p12 = lerp(p02,p03,t);
  330. const Vertex p20 = lerp(p10,p11,t);
  331. const Vertex p21 = lerp(p11,p12,t);
  332. const Vertex p30 = lerp(p20,p21,t);
  333. p = p30;
  334. dp = 3.0f*(p21-p20);
  335. ddp = eval_dudu(t);
  336. }
  337. __forceinline CubicBezierCurve clip(const Interval1f& u1) const
  338. {
  339. Vertex f0,df0; eval(u1.lower,f0,df0);
  340. Vertex f1,df1; eval(u1.upper,f1,df1);
  341. float s = u1.upper-u1.lower;
  342. return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
  343. }
  344. __forceinline QuadraticBezierCurve<Vertex> derivative() const
  345. {
  346. const Vertex q0 = 3.0f*(v1-v0);
  347. const Vertex q1 = 3.0f*(v2-v1);
  348. const Vertex q2 = 3.0f*(v3-v2);
  349. return QuadraticBezierCurve<Vertex>(q0,q1,q2);
  350. }
  351. __forceinline BBox<Vertex> derivative_bounds(const Interval1f& u1) const
  352. {
  353. Vertex f0,df0; eval(u1.lower,f0,df0);
  354. Vertex f3,df3; eval(u1.upper,f3,df3);
  355. const float s = u1.upper-u1.lower;
  356. const Vertex f1 = f0+s*(1.0f/3.0f)*df0;
  357. const Vertex f2 = f3-s*(1.0f/3.0f)*df3;
  358. const Vertex q0 = s*df0;
  359. const Vertex q1 = 3.0f*(f2-f1);
  360. const Vertex q2 = s*df3;
  361. return merge(BBox<Vertex>(q0),BBox<Vertex>(q1),BBox<Vertex>(q2));
  362. }
  363. template<int M>
  364. __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
  365. {
  366. const Vec4vf<M> b = BezierBasis::eval(t);
  367. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  368. }
  369. template<int M>
  370. __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
  371. {
  372. const Vec4vf<M> b = BezierBasis::derivative(t);
  373. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  374. }
  375. template<int M>
  376. __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
  377. {
  378. const Vec4vf<M> b = BezierBasis::derivative2(t);
  379. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  380. }
  381. template<int M>
  382. __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
  383. {
  384. const Vec4vf<M> p00 = v0;
  385. const Vec4vf<M> p01 = v1;
  386. const Vec4vf<M> p02 = v2;
  387. const Vec4vf<M> p03 = v3;
  388. const Vec4vf<M> p10 = lerp(p00,p01,t);
  389. const Vec4vf<M> p11 = lerp(p01,p02,t);
  390. const Vec4vf<M> p12 = lerp(p02,p03,t);
  391. const Vec4vf<M> p20 = lerp(p10,p11,t);
  392. const Vec4vf<M> p21 = lerp(p11,p12,t);
  393. const Vec4vf<M> p30 = lerp(p20,p21,t);
  394. p = p30;
  395. dp = vfloat<M>(3.0f)*(p21-p20);
  396. }
  397. template<int M, typename Vec = Vec4vf<M>>
  398. __forceinline Vec eval0(const int ofs, const int size) const
  399. {
  400. assert(size <= PrecomputedBezierBasis::N);
  401. assert(ofs <= size);
  402. return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0),
  403. madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1),
  404. madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2),
  405. vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3))));
  406. }
  407. template<int M, typename Vec = Vec4vf<M>>
  408. __forceinline Vec eval1(const int ofs, const int size) const
  409. {
  410. assert(size <= PrecomputedBezierBasis::N);
  411. assert(ofs <= size);
  412. return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0),
  413. madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1),
  414. madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2),
  415. vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3))));
  416. }
  417. template<int M, typename Vec = Vec4vf<M>>
  418. __forceinline Vec derivative0(const int ofs, const int size) const
  419. {
  420. assert(size <= PrecomputedBezierBasis::N);
  421. assert(ofs <= size);
  422. return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0),
  423. madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1),
  424. madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2),
  425. vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3))));
  426. }
  427. template<int M, typename Vec = Vec4vf<M>>
  428. __forceinline Vec derivative1(const int ofs, const int size) const
  429. {
  430. assert(size <= PrecomputedBezierBasis::N);
  431. assert(ofs <= size);
  432. return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0),
  433. madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1),
  434. madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2),
  435. vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3))));
  436. }
  437. /* calculates bounds of bezier curve geometry */
  438. __forceinline BBox3fa accurateBounds() const
  439. {
  440. const int N = 7;
  441. const float scale = 1.0f/(3.0f*(N-1));
  442. Vec3vfx pl(pos_inf), pu(neg_inf);
  443. for (int i=0; i<=N; i+=VSIZEX)
  444. {
  445. vintx vi = vintx(i)+vintx(step);
  446. vboolx valid = vi <= vintx(N);
  447. const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
  448. const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
  449. const Vec3vfx pm = p-Vec3vfx(scale)*select(vi!=vintx(0),dp,Vec3vfx(zero));
  450. const Vec3vfx pp = p+Vec3vfx(scale)*select(vi!=vintx(N),dp,Vec3vfx(zero));
  451. pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
  452. pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
  453. }
  454. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  455. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  456. return BBox3fa(lower,upper);
  457. }
  458. /* calculates bounds of bezier curve geometry */
  459. __forceinline BBox3fa accurateRoundBounds() const
  460. {
  461. const int N = 7;
  462. const float scale = 1.0f/(3.0f*(N-1));
  463. Vec4vfx pl(pos_inf), pu(neg_inf);
  464. for (int i=0; i<=N; i+=VSIZEX)
  465. {
  466. vintx vi = vintx(i)+vintx(step);
  467. vboolx valid = vi <= vintx(N);
  468. const Vec4vfx p = eval0<VSIZEX>(i,N);
  469. const Vec4vfx dp = derivative0<VSIZEX>(i,N);
  470. const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
  471. const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
  472. pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
  473. pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
  474. }
  475. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  476. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  477. const float r_min = reduce_min(pl.w);
  478. const float r_max = reduce_max(pu.w);
  479. const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
  480. return enlarge(BBox3fa(lower,upper),upper_r);
  481. }
  482. /* calculates bounds when tessellated into N line segments */
  483. __forceinline BBox3fa accurateFlatBounds(int N) const
  484. {
  485. if (likely(N == 4))
  486. {
  487. const Vec4vf4 pi = eval0<4>(0,4);
  488. const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
  489. const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
  490. const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
  491. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  492. }
  493. else
  494. {
  495. Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
  496. for (int i=0; i<N; i+=VSIZEX)
  497. {
  498. vboolx valid = vintx(i)+vintx(step) < vintx(N);
  499. const Vec4vfx pi = eval0<VSIZEX>(i,N);
  500. pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
  501. pl.y = select(valid,min(pl.y,pi.y),pl.y);
  502. pl.z = select(valid,min(pl.z,pi.z),pl.z);
  503. pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
  504. pu.y = select(valid,max(pu.y,pi.y),pu.y);
  505. pu.z = select(valid,max(pu.z,pi.z),pu.z);
  506. ru = select(valid,max(ru,abs(pi.w)),ru);
  507. }
  508. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  509. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  510. const Vec3fa upper_r(reduce_max(ru));
  511. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  512. }
  513. }
  514. friend __forceinline embree_ostream operator<<(embree_ostream cout, const CubicBezierCurve& curve) {
  515. return cout << "CubicBezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
  516. }
  517. };
  518. #if defined(__AVX__)
  519. template<>
  520. __forceinline CubicBezierCurve<vfloat4> CubicBezierCurve<vfloat4>::clip(const Interval1f& u1) const
  521. {
  522. const vfloat8 p00 = vfloat8(v0);
  523. const vfloat8 p01 = vfloat8(v1);
  524. const vfloat8 p02 = vfloat8(v2);
  525. const vfloat8 p03 = vfloat8(v3);
  526. const vfloat8 t(vfloat4(u1.lower),vfloat4(u1.upper));
  527. const vfloat8 p10 = lerp(p00,p01,t);
  528. const vfloat8 p11 = lerp(p01,p02,t);
  529. const vfloat8 p12 = lerp(p02,p03,t);
  530. const vfloat8 p20 = lerp(p10,p11,t);
  531. const vfloat8 p21 = lerp(p11,p12,t);
  532. const vfloat8 p30 = lerp(p20,p21,t);
  533. const vfloat8 f01 = p30;
  534. const vfloat8 df01 = vfloat8(3.0f)*(p21-p20);
  535. const vfloat4 f0 = extract4<0>(f01), f1 = extract4<1>(f01);
  536. const vfloat4 df0 = extract4<0>(df01), df1 = extract4<1>(df01);
  537. const float s = u1.upper-u1.lower;
  538. return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
  539. }
  540. #endif
  541. template<typename Vertex> using BezierCurveT = CubicBezierCurve<Vertex>;
  542. typedef CubicBezierCurve<float> CubicBezierCurve1f;
  543. typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa;
  544. typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa;
  545. typedef CubicBezierCurve<Vec3fa> BezierCurve3fa;
  546. template<> __forceinline int CubicBezierCurve<float>::maxRoots() const
  547. {
  548. float eps = 1E-4f;
  549. bool neg0 = v0 <= 0.0f; bool zero0 = fabs(v0) < eps;
  550. bool neg1 = v1 <= 0.0f; bool zero1 = fabs(v1) < eps;
  551. bool neg2 = v2 <= 0.0f; bool zero2 = fabs(v2) < eps;
  552. bool neg3 = v3 <= 0.0f; bool zero3 = fabs(v3) < eps;
  553. return (neg0 != neg1 || zero0) + (neg1 != neg2 || zero1) + (neg2 != neg3 || zero2 || zero3);
  554. }
  555. template<> __forceinline int CubicBezierCurve<Interval1f>::maxRoots() const {
  556. return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3);
  557. }
  558. template<typename CurveGeometry>
  559. __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
  560. {
  561. return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
  562. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
  563. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
  564. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
  565. }
  566. }