feature_adaptive_eval_grid.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "patch.h"
  5. #include "catmullclark_patch.h"
  6. #include "bspline_patch.h"
  7. #include "gregory_patch.h"
  8. #include "tessellation.h"
  9. namespace embree
  10. {
  11. namespace isa
  12. {
  13. struct FeatureAdaptiveEvalGrid
  14. {
  15. typedef CatmullClark1Ring3fa CatmullClarkRing;
  16. typedef CatmullClarkPatch3fa CatmullClarkPatch;
  17. typedef BilinearPatch3fa BilinearPatch;
  18. typedef BSplinePatch3fa BSplinePatch;
  19. typedef BezierPatch3fa BezierPatch;
  20. typedef GregoryPatch3fa GregoryPatch;
  21. private:
  22. const unsigned x0,x1;
  23. const unsigned y0,y1;
  24. const unsigned swidth,sheight;
  25. const float rcp_swidth, rcp_sheight;
  26. float* const Px;
  27. float* const Py;
  28. float* const Pz;
  29. float* const U;
  30. float* const V;
  31. float* const Nx;
  32. float* const Ny;
  33. float* const Nz;
  34. const unsigned dwidth;
  35. //const unsigned dheight;
  36. unsigned count;
  37. public:
  38. FeatureAdaptiveEvalGrid (const GeneralCatmullClarkPatch3fa& patch, unsigned subPatch,
  39. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  40. float* Px, float* Py, float* Pz, float* U, float* V,
  41. float* Nx, float* Ny, float* Nz,
  42. const unsigned dwidth, const unsigned dheight)
  43. : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
  44. Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
  45. {
  46. assert(swidth < (2<<20) && sheight < (2<<20));
  47. const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1)));
  48. const BBox2f erange(Vec2f((float)x0,(float)y0),Vec2f((float)x1,(float)y1));
  49. /* convert into standard quad patch if possible */
  50. if (likely(patch.isQuadPatch()))
  51. {
  52. CatmullClarkPatch3fa qpatch; patch.init(qpatch);
  53. eval(qpatch, srange, erange, 0);
  54. assert(count == (x1-x0+1)*(y1-y0+1));
  55. return;
  56. }
  57. /* subdivide patch */
  58. unsigned N;
  59. array_t<CatmullClarkPatch3fa,GeneralCatmullClarkPatch3fa::SIZE> patches;
  60. patch.subdivide(patches,N);
  61. if (N == 4)
  62. {
  63. const Vec2f c = srange.center();
  64. const BBox2f srange0(srange.lower,c);
  65. const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
  66. const BBox2f srange2(c,srange.upper);
  67. const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
  68. #if PATCH_USE_GREGORY == 2
  69. BezierCurve3fa borders[GeneralCatmullClarkPatch3fa::SIZE]; patch.getLimitBorder(borders);
  70. BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
  71. BezierCurve3fa border1l,border1r; borders[1].subdivide(border1l,border1r);
  72. BezierCurve3fa border2l,border2r; borders[2].subdivide(border2l,border2r);
  73. BezierCurve3fa border3l,border3r; borders[3].subdivide(border3l,border3r);
  74. GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
  75. eval(patches[0],srange0,intersect(srange0,erange),1,&border0l,nullptr,nullptr,&border3r);
  76. eval(patches[1],srange1,intersect(srange1,erange),1,&border0r,&border1l,nullptr,nullptr);
  77. eval(patches[2],srange2,intersect(srange2,erange),1,nullptr,&border1r,&border2l,nullptr);
  78. eval(patches[3],srange3,intersect(srange3,erange),1,nullptr,nullptr,&border2r,&border3l);
  79. #else
  80. GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches);
  81. eval(patches[0],srange0,intersect(srange0,erange),1);
  82. eval(patches[1],srange1,intersect(srange1,erange),1);
  83. eval(patches[2],srange2,intersect(srange2,erange),1);
  84. eval(patches[3],srange3,intersect(srange3,erange),1);
  85. #endif
  86. }
  87. else
  88. {
  89. assert(subPatch < N);
  90. #if PATCH_USE_GREGORY == 2
  91. BezierCurve3fa borders[2]; patch.getLimitBorder(borders,subPatch);
  92. BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r);
  93. BezierCurve3fa border2l,border2r; borders[1].subdivide(border2l,border2r);
  94. eval(patches[subPatch], srange, erange, 1, &border0l, nullptr, nullptr, &border2r);
  95. #else
  96. eval(patches[subPatch], srange, erange, 1);
  97. #endif
  98. }
  99. assert(count == (x1-x0+1)*(y1-y0+1));
  100. }
  101. FeatureAdaptiveEvalGrid (const CatmullClarkPatch3fa& patch,
  102. const BBox2f& srange, const BBox2f& erange, const unsigned depth,
  103. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  104. float* Px, float* Py, float* Pz, float* U, float* V,
  105. float* Nx, float* Ny, float* Nz,
  106. const unsigned dwidth, const unsigned dheight)
  107. : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)),
  108. Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0)
  109. {
  110. eval(patch,srange,erange,depth);
  111. }
  112. template<typename Patch>
  113. void evalLocalGrid(const Patch& patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1)
  114. {
  115. const float scale_x = rcp(srange.upper.x-srange.lower.x);
  116. const float scale_y = rcp(srange.upper.y-srange.lower.y);
  117. count += (lx1-lx0)*(ly1-ly0);
  118. #if 0
  119. for (unsigned iy=ly0; iy<ly1; iy++) {
  120. for (unsigned ix=lx0; ix<lx1; ix++) {
  121. const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x);
  122. const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y);
  123. const Vec3fa p = patch.eval(lu,lv);
  124. const float u = float(ix)*rcp_swidth;
  125. const float v = float(iy)*rcp_sheight;
  126. const int ofs = (iy-y0)*dwidth+(ix-x0);
  127. Px[ofs] = p.x;
  128. Py[ofs] = p.y;
  129. Pz[ofs] = p.z;
  130. U[ofs] = u;
  131. V[ofs] = v;
  132. }
  133. }
  134. #else
  135. foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) {
  136. const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x);
  137. const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y);
  138. const Vec3vfx p = patch.eval(lu,lv);
  139. Vec3vfx n = zero;
  140. if (unlikely(Nx != nullptr)) n = normalize_safe(patch.normal(lu,lv));
  141. const vfloatx u = vfloatx(ix)*rcp_swidth;
  142. const vfloatx v = vfloatx(iy)*rcp_sheight;
  143. const vintx ofs = (iy-y0)*dwidth+(ix-x0);
  144. if (likely(all(valid)) && all(iy==iy[0])) {
  145. const unsigned ofs2 = ofs[0];
  146. vfloatx::storeu(Px+ofs2,p.x);
  147. vfloatx::storeu(Py+ofs2,p.y);
  148. vfloatx::storeu(Pz+ofs2,p.z);
  149. vfloatx::storeu(U+ofs2,u);
  150. vfloatx::storeu(V+ofs2,v);
  151. if (unlikely(Nx != nullptr)) {
  152. vfloatx::storeu(Nx+ofs2,n.x);
  153. vfloatx::storeu(Ny+ofs2,n.y);
  154. vfloatx::storeu(Nz+ofs2,n.z);
  155. }
  156. } else {
  157. foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) {
  158. const unsigned ofs2 = ofs[j]-j;
  159. vfloatx::storeu(valid,Px+ofs2,p.x);
  160. vfloatx::storeu(valid,Py+ofs2,p.y);
  161. vfloatx::storeu(valid,Pz+ofs2,p.z);
  162. vfloatx::storeu(valid,U+ofs2,u);
  163. vfloatx::storeu(valid,V+ofs2,v);
  164. if (unlikely(Nx != nullptr)) {
  165. vfloatx::storeu(valid,Nx+ofs2,n.x);
  166. vfloatx::storeu(valid,Ny+ofs2,n.y);
  167. vfloatx::storeu(valid,Nz+ofs2,n.z);
  168. }
  169. });
  170. }
  171. });
  172. #endif
  173. }
  174. __forceinline bool final(const CatmullClarkPatch3fa& patch, const CatmullClarkRing::Type type, unsigned depth)
  175. {
  176. const unsigned max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR;
  177. //#if PATCH_MIN_RESOLUTION
  178. // return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth;
  179. //#else
  180. return depth>=max_eval_depth;
  181. //#endif
  182. }
  183. void eval(const CatmullClarkPatch3fa& patch, const BBox2f& srange, const BBox2f& erange, const unsigned depth,
  184. const BezierCurve3fa* border0 = nullptr, const BezierCurve3fa* border1 = nullptr, const BezierCurve3fa* border2 = nullptr, const BezierCurve3fa* border3 = nullptr)
  185. {
  186. if (erange.empty())
  187. return;
  188. int lx0 = (int) ceilf(erange.lower.x);
  189. int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0));
  190. int ly0 = (int) ceilf(erange.lower.y);
  191. int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0));
  192. if (lx0 >= lx1 || ly0 >= ly1) return;
  193. CatmullClarkPatch::Type ty = patch.type();
  194. if (unlikely(final(patch,ty,depth)))
  195. {
  196. if (ty & CatmullClarkRing::TYPE_REGULAR) {
  197. RegularPatch rpatch(patch,border0,border1,border2,border3);
  198. evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
  199. return;
  200. } else {
  201. IrregularFillPatch ipatch(patch,border0,border1,border2,border3);
  202. evalLocalGrid(ipatch,srange,lx0,lx1,ly0,ly1);
  203. return;
  204. }
  205. }
  206. else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) {
  207. assert(depth > 0);
  208. RegularPatch rpatch(patch,border0,border1,border2,border3);
  209. evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1);
  210. return;
  211. }
  212. #if PATCH_USE_GREGORY == 2
  213. else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) {
  214. assert(depth > 0);
  215. GregoryPatch gpatch(patch,border0,border1,border2,border3);
  216. evalLocalGrid(gpatch,srange,lx0,lx1,ly0,ly1);
  217. }
  218. #endif
  219. else
  220. {
  221. array_t<CatmullClarkPatch3fa,4> patches;
  222. patch.subdivide(patches);
  223. const Vec2f c = srange.center();
  224. const BBox2f srange0(srange.lower,c);
  225. const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y));
  226. const BBox2f srange2(c,srange.upper);
  227. const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y));
  228. eval(patches[0],srange0,intersect(srange0,erange),depth+1);
  229. eval(patches[1],srange1,intersect(srange1,erange),depth+1);
  230. eval(patches[2],srange2,intersect(srange2,erange),depth+1);
  231. eval(patches[3],srange3,intersect(srange3,erange),depth+1);
  232. }
  233. }
  234. };
  235. template<typename Eval, typename Patch>
  236. bool stitch_col(const Patch& patch, int subPatch,
  237. const bool right, const unsigned y0, const unsigned y1, const int fine_y, const int coarse_y,
  238. float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dx0, const unsigned dwidth, const unsigned dheight)
  239. {
  240. assert(coarse_y <= fine_y);
  241. if (likely(fine_y == coarse_y))
  242. return false;
  243. const unsigned y0s = stitch(y0,fine_y,coarse_y);
  244. const unsigned y1s = stitch(y1,fine_y,coarse_y);
  245. const unsigned M = y1s-y0s+1 + VSIZEX;
  246. dynamic_large_stack_array(float,px,M,64*sizeof(float));
  247. dynamic_large_stack_array(float,py,M,64*sizeof(float));
  248. dynamic_large_stack_array(float,pz,M,64*sizeof(float));
  249. dynamic_large_stack_array(float,u,M,64*sizeof(float));
  250. dynamic_large_stack_array(float,v,M,64*sizeof(float));
  251. dynamic_large_stack_array(float,nx,M,64*sizeof(float));
  252. dynamic_large_stack_array(float,ny,M,64*sizeof(float));
  253. dynamic_large_stack_array(float,nz,M,64*sizeof(float));
  254. const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
  255. Eval(patch,subPatch, right,right, y0s,y1s, 2,coarse_y+1, px,py,pz,u,v,
  256. has_Nxyz ? (float*)nx : nullptr,has_Nxyz ? (float*)ny : nullptr ,has_Nxyz ? (float*)nz : nullptr, 1,4097);
  257. for (unsigned y=y0; y<=y1; y++)
  258. {
  259. const unsigned ys = stitch(y,fine_y,coarse_y)-y0s;
  260. Px[(y-y0)*dwidth+dx0] = px[ys];
  261. Py[(y-y0)*dwidth+dx0] = py[ys];
  262. Pz[(y-y0)*dwidth+dx0] = pz[ys];
  263. U [(y-y0)*dwidth+dx0] = u[ys];
  264. V [(y-y0)*dwidth+dx0] = v[ys];
  265. if (unlikely(has_Nxyz)) {
  266. Nx[(y-y0)*dwidth+dx0] = nx[ys];
  267. Ny[(y-y0)*dwidth+dx0] = ny[ys];
  268. Nz[(y-y0)*dwidth+dx0] = nz[ys];
  269. }
  270. }
  271. return true;
  272. }
  273. template<typename Eval, typename Patch>
  274. bool stitch_row(const Patch& patch, int subPatch,
  275. const bool bottom, const unsigned x0, const unsigned x1, const int fine_x, const int coarse_x,
  276. float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dy0, const unsigned dwidth, const unsigned dheight)
  277. {
  278. assert(coarse_x <= fine_x);
  279. if (likely(fine_x == coarse_x))
  280. return false;
  281. const unsigned x0s = stitch(x0,fine_x,coarse_x);
  282. const unsigned x1s = stitch(x1,fine_x,coarse_x);
  283. const unsigned M = x1s-x0s+1 + VSIZEX;
  284. dynamic_large_stack_array(float,px,M,32*sizeof(float));
  285. dynamic_large_stack_array(float,py,M,32*sizeof(float));
  286. dynamic_large_stack_array(float,pz,M,32*sizeof(float));
  287. dynamic_large_stack_array(float,u,M,32*sizeof(float));
  288. dynamic_large_stack_array(float,v,M,32*sizeof(float));
  289. dynamic_large_stack_array(float,nx,M,32*sizeof(float));
  290. dynamic_large_stack_array(float,ny,M,32*sizeof(float));
  291. dynamic_large_stack_array(float,nz,M,32*sizeof(float));
  292. const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz));
  293. Eval(patch,subPatch, x0s,x1s, bottom,bottom, coarse_x+1,2, px,py,pz,u,v,
  294. has_Nxyz ? (float*)nx :nullptr, has_Nxyz ? (float*)ny : nullptr , has_Nxyz ? (float*)nz : nullptr, 4097,1);
  295. for (unsigned x=x0; x<=x1; x++)
  296. {
  297. const unsigned xs = stitch(x,fine_x,coarse_x)-x0s;
  298. Px[dy0*dwidth+x-x0] = px[xs];
  299. Py[dy0*dwidth+x-x0] = py[xs];
  300. Pz[dy0*dwidth+x-x0] = pz[xs];
  301. U [dy0*dwidth+x-x0] = u[xs];
  302. V [dy0*dwidth+x-x0] = v[xs];
  303. if (unlikely(has_Nxyz)) {
  304. Nx[dy0*dwidth+x-x0] = nx[xs];
  305. Ny[dy0*dwidth+x-x0] = ny[xs];
  306. Nz[dy0*dwidth+x-x0] = nz[xs];
  307. }
  308. }
  309. return true;
  310. }
  311. template<typename Eval, typename Patch>
  312. void feature_adaptive_eval_grid (const Patch& patch, unsigned subPatch, const float levels[4],
  313. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  314. float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dwidth, const unsigned dheight)
  315. {
  316. bool sl = false, sr = false, st = false, sb = false;
  317. if (levels) {
  318. sl = x0 == 0 && stitch_col<Eval,Patch>(patch,subPatch,0,y0,y1,sheight-1,int(levels[3]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
  319. sr = x1 == swidth-1 && stitch_col<Eval,Patch>(patch,subPatch,1,y0,y1,sheight-1,int(levels[1]), Px,Py,Pz,U,V,Nx,Ny,Nz, x1-x0,dwidth,dheight);
  320. st = y0 == 0 && stitch_row<Eval,Patch>(patch,subPatch,0,x0,x1,swidth-1,int(levels[0]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight);
  321. sb = y1 == sheight-1 && stitch_row<Eval,Patch>(patch,subPatch,1,x0,x1,swidth-1,int(levels[2]), Px,Py,Pz,U,V,Nx,Ny,Nz, y1-y0,dwidth,dheight);
  322. }
  323. const unsigned ofs = st*dwidth+sl;
  324. Eval(patch,subPatch,x0+sl,x1-sr,y0+st,y1-sb, swidth,sheight, Px+ofs,Py+ofs,Pz+ofs,U+ofs,V+ofs,Nx?Nx+ofs:nullptr,Ny?Ny+ofs:nullptr,Nz?Nz+ofs:nullptr, dwidth,dheight);
  325. }
  326. }
  327. }