gregory_patch_dense.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "gregory_patch.h"
  5. namespace embree
  6. {
  7. class __aligned(64) DenseGregoryPatch3fa
  8. {
  9. typedef Vec3fa Vec3fa_4x4[4][4];
  10. public:
  11. __forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch)
  12. {
  13. for (size_t y=0; y<4; y++)
  14. for (size_t x=0; x<4; x++)
  15. matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f);
  16. matrix[0][0].w = patch.f[0][0].x;
  17. matrix[0][1].w = patch.f[0][0].y;
  18. matrix[0][2].w = patch.f[0][0].z;
  19. matrix[0][3].w = 0.0f;
  20. matrix[1][0].w = patch.f[0][1].x;
  21. matrix[1][1].w = patch.f[0][1].y;
  22. matrix[1][2].w = patch.f[0][1].z;
  23. matrix[1][3].w = 0.0f;
  24. matrix[2][0].w = patch.f[1][1].x;
  25. matrix[2][1].w = patch.f[1][1].y;
  26. matrix[2][2].w = patch.f[1][1].z;
  27. matrix[2][3].w = 0.0f;
  28. matrix[3][0].w = patch.f[1][0].x;
  29. matrix[3][1].w = patch.f[1][0].y;
  30. matrix[3][2].w = patch.f[1][0].z;
  31. matrix[3][3].w = 0.0f;
  32. }
  33. __forceinline void extract_f_m(Vec3fa f_m[2][2]) const
  34. {
  35. f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
  36. f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
  37. f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
  38. f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
  39. }
  40. __forceinline Vec3fa eval(const float uu, const float vv) const
  41. {
  42. __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
  43. return GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
  44. }
  45. __forceinline Vec3fa normal(const float uu, const float vv) const
  46. {
  47. __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
  48. return GregoryPatch3fa::normal(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
  49. }
  50. template<class T>
  51. __forceinline Vec3<T> eval(const T &uu, const T &vv) const
  52. {
  53. Vec3<T> f_m[2][2];
  54. f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
  55. f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
  56. f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
  57. f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
  58. return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
  59. }
  60. template<class T>
  61. __forceinline Vec3<T> normal(const T &uu, const T &vv) const
  62. {
  63. Vec3<T> f_m[2][2];
  64. f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
  65. f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
  66. f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
  67. f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
  68. return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
  69. }
  70. __forceinline void eval(const float u, const float v,
  71. Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv,
  72. const float dscale = 1.0f) const
  73. {
  74. __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
  75. if (P) {
  76. *P = GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,u,v);
  77. }
  78. if (dPdu) {
  79. assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
  80. assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
  81. }
  82. if (ddPdudu) {
  83. assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
  84. assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
  85. assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
  86. }
  87. }
  88. template<typename vbool, typename vfloat>
  89. __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const
  90. {
  91. __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
  92. GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N);
  93. }
  94. private:
  95. Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component
  96. };
  97. }