| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "gregory_patch.h" | 
| 7 |  | 
| 8 | namespace embree | 
| 9 | {   | 
| 10 |   class __aligned(64) DenseGregoryPatch3fa | 
| 11 |   { | 
| 12 |     typedef Vec3fa Vec3fa_4x4[4][4]; | 
| 13 |   public: | 
| 14 |  | 
| 15 |     __forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch) | 
| 16 |     { | 
| 17 |       for (size_t y=0; y<4; y++) | 
| 18 | 	for (size_t x=0; x<4; x++) | 
| 19 | 	  matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f); | 
| 20 |        | 
| 21 |       matrix[0][0].w = patch.f[0][0].x; | 
| 22 |       matrix[0][1].w = patch.f[0][0].y; | 
| 23 |       matrix[0][2].w = patch.f[0][0].z; | 
| 24 |       matrix[0][3].w = 0.0f; | 
| 25 |        | 
| 26 |       matrix[1][0].w = patch.f[0][1].x; | 
| 27 |       matrix[1][1].w = patch.f[0][1].y; | 
| 28 |       matrix[1][2].w = patch.f[0][1].z; | 
| 29 |       matrix[1][3].w = 0.0f; | 
| 30 |        | 
| 31 |       matrix[2][0].w = patch.f[1][1].x; | 
| 32 |       matrix[2][1].w = patch.f[1][1].y; | 
| 33 |       matrix[2][2].w = patch.f[1][1].z; | 
| 34 |       matrix[2][3].w = 0.0f; | 
| 35 |        | 
| 36 |       matrix[3][0].w = patch.f[1][0].x; | 
| 37 |       matrix[3][1].w = patch.f[1][0].y; | 
| 38 |       matrix[3][2].w = patch.f[1][0].z; | 
| 39 |       matrix[3][3].w = 0.0f; | 
| 40 |     } | 
| 41 |  | 
| 42 |     __forceinline void (Vec3fa f_m[2][2]) const | 
| 43 |     { | 
| 44 |       f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); | 
| 45 |       f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); | 
| 46 |       f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); | 
| 47 |       f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );       | 
| 48 |     } | 
| 49 |  | 
| 50 |     __forceinline Vec3fa eval(const float uu, const float vv) const | 
| 51 |     { | 
| 52 |       __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); | 
| 53 |       return GregoryPatch3fa::eval(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu,vv); | 
| 54 |     } | 
| 55 |  | 
| 56 |     __forceinline Vec3fa normal(const float uu, const float vv) const | 
| 57 |     { | 
| 58 |       __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); | 
| 59 |       return GregoryPatch3fa::normal(matrix: *(Vec3fa_4x4*)&matrix,f_m,uu,vv); | 
| 60 |     } | 
| 61 |  | 
| 62 |     template<class T> | 
| 63 |       __forceinline Vec3<T> eval(const T &uu, const T &vv) const  | 
| 64 |     { | 
| 65 |       Vec3<T> f_m[2][2]; | 
| 66 |       f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); | 
| 67 |       f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); | 
| 68 |       f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); | 
| 69 |       f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w ); | 
| 70 |       return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); | 
| 71 |     } | 
| 72 |      | 
| 73 |     template<class T> | 
| 74 |       __forceinline Vec3<T> normal(const T &uu, const T &vv) const  | 
| 75 |     { | 
| 76 |       Vec3<T> f_m[2][2]; | 
| 77 |       f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); | 
| 78 |       f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); | 
| 79 |       f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); | 
| 80 |       f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w ); | 
| 81 |       return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); | 
| 82 |     } | 
| 83 |  | 
| 84 |     __forceinline void eval(const float u, const float v,  | 
| 85 |                             Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv, | 
| 86 |                             const float dscale = 1.0f) const | 
| 87 |     { | 
| 88 |       __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); | 
| 89 |       if (P) { | 
| 90 |         *P    = GregoryPatch3fa::eval(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v);  | 
| 91 |       } | 
| 92 |       if (dPdu) { | 
| 93 |         assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v)*dscale;  | 
| 94 |         assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v)*dscale;  | 
| 95 |       } | 
| 96 |       if (ddPdudu) { | 
| 97 |         assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v)*sqr(x: dscale);  | 
| 98 |         assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v)*sqr(x: dscale);  | 
| 99 |         assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(matrix: *(Vec3fa_4x4*)&matrix,f: f_m,uu: u,vv: v)*sqr(x: dscale);  | 
| 100 |       } | 
| 101 |     } | 
| 102 |  | 
| 103 |     template<typename vbool, typename vfloat> | 
| 104 |     __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const  | 
| 105 |     { | 
| 106 |       __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); | 
| 107 |       GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N); | 
| 108 |     } | 
| 109 |  | 
| 110 |   private: | 
| 111 |     Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component | 
| 112 |   }; | 
| 113 | } | 
| 114 |  |