| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | namespace embree | 
| 7 | { | 
| 8 |   /* adjust discret tessellation level for feature-adaptive pre-subdivision */ | 
| 9 |   __forceinline float adjustTessellationLevel(float l, const size_t sublevel) | 
| 10 |   { | 
| 11 |     for (size_t i=0; i<sublevel; i++) l *= 0.5f; | 
| 12 |     float r = ceilf(x: l);       | 
| 13 |     for (size_t i=0; i<sublevel; i++) r *= 2.0f; | 
| 14 |     return r; | 
| 15 |   } | 
| 16 |    | 
| 17 |   __forceinline int stitch(const int x, const int fine, const int coarse) { | 
| 18 |     return (2*x+1)*coarse/(2*fine); | 
| 19 |   } | 
| 20 |  | 
| 21 |   __forceinline void stitchGridEdges(const unsigned int low_rate, | 
| 22 |                                      const unsigned int high_rate, | 
| 23 |                                      const unsigned int x0, | 
| 24 |                                      const unsigned int x1, | 
| 25 | 				    float * __restrict__ const uv_array, | 
| 26 | 				    const unsigned int uv_array_step) | 
| 27 |   { | 
| 28 | #if 1 | 
| 29 |     const float inv_low_rate = rcp(x: (float)(low_rate-1)); | 
| 30 |     for (unsigned x=x0; x<=x1; x++) { | 
| 31 |       uv_array[(x-x0)*uv_array_step] = float(stitch(x,fine: high_rate-1,coarse: low_rate-1))*inv_low_rate; | 
| 32 |     } | 
| 33 |     if (unlikely(x1 == high_rate-1)) | 
| 34 |       uv_array[(x1-x0)*uv_array_step] = 1.0f; | 
| 35 | #else | 
| 36 |     assert(low_rate < high_rate); | 
| 37 |     assert(high_rate >= 2); | 
| 38 |      | 
| 39 |     const float inv_low_rate = rcp((float)(low_rate-1)); | 
| 40 |     const unsigned int dy = low_rate  - 1;  | 
| 41 |     const unsigned int dx = high_rate - 1; | 
| 42 |      | 
| 43 |     int p = 2*dy-dx;   | 
| 44 |      | 
| 45 |     unsigned int offset = 0; | 
| 46 |     unsigned int y = 0; | 
| 47 |     float value = 0.0f; | 
| 48 |     for(unsigned int x=0;x<high_rate-1; x++) // '<=' would be correct but we will leave the 1.0f at the end | 
| 49 |     { | 
| 50 |       uv_array[offset] = value; | 
| 51 |        | 
| 52 |       offset += uv_array_step;       | 
| 53 |       if (unlikely(p > 0)) | 
| 54 |       { | 
| 55 | 	y++; | 
| 56 | 	value = (float)y * inv_low_rate; | 
| 57 | 	p -= 2*dx; | 
| 58 |       } | 
| 59 |       p += 2*dy; | 
| 60 |     } | 
| 61 | #endif | 
| 62 |   } | 
| 63 |    | 
| 64 |   __forceinline void stitchUVGrid(const float edge_levels[4], | 
| 65 |                                   const unsigned int swidth, | 
| 66 |                                   const unsigned int sheight, | 
| 67 |                                   const unsigned int x0, | 
| 68 |                                   const unsigned int y0, | 
| 69 | 				  const unsigned int grid_u_res, | 
| 70 | 				  const unsigned int grid_v_res, | 
| 71 | 				  float * __restrict__ const u_array, | 
| 72 | 				  float * __restrict__ const v_array) | 
| 73 |   { | 
| 74 |     const unsigned int x1 = x0+grid_u_res-1; | 
| 75 |     const unsigned int y1 = y0+grid_v_res-1; | 
| 76 |     const unsigned int int_edge_points0 = (unsigned int)edge_levels[0] + 1; | 
| 77 |     const unsigned int int_edge_points1 = (unsigned int)edge_levels[1] + 1; | 
| 78 |     const unsigned int int_edge_points2 = (unsigned int)edge_levels[2] + 1; | 
| 79 |     const unsigned int int_edge_points3 = (unsigned int)edge_levels[3] + 1; | 
| 80 |      | 
| 81 |     if (unlikely(y0 == 0 && int_edge_points0 < swidth)) | 
| 82 |       stitchGridEdges(low_rate: int_edge_points0,high_rate: swidth,x0,x1,uv_array: u_array,uv_array_step: 1); | 
| 83 |      | 
| 84 |     if (unlikely(y1 == sheight-1 && int_edge_points2 < swidth)) | 
| 85 |       stitchGridEdges(low_rate: int_edge_points2,high_rate: swidth,x0,x1,uv_array: &u_array[(grid_v_res-1)*grid_u_res],uv_array_step: 1); | 
| 86 |      | 
| 87 |     if (unlikely(x0 == 0 && int_edge_points1 < sheight)) | 
| 88 |       stitchGridEdges(low_rate: int_edge_points1,high_rate: sheight,x0: y0,x1: y1,uv_array: &v_array[grid_u_res-1],uv_array_step: grid_u_res); | 
| 89 |      | 
| 90 |     if (unlikely(x1 == swidth-1 && int_edge_points3 < sheight)) | 
| 91 |       stitchGridEdges(low_rate: int_edge_points3,high_rate: sheight,x0: y0,x1: y1,uv_array: v_array,uv_array_step: grid_u_res);   | 
| 92 |   } | 
| 93 |    | 
| 94 |   __forceinline void gridUVTessellator(const float edge_levels[4],   | 
| 95 |                                        const unsigned int swidth, | 
| 96 |                                        const unsigned int sheight, | 
| 97 |                                        const unsigned int x0, | 
| 98 |                                        const unsigned int y0, | 
| 99 | 				       const unsigned int grid_u_res, | 
| 100 | 				       const unsigned int grid_v_res, | 
| 101 | 				       float * __restrict__ const u_array, | 
| 102 | 				       float * __restrict__ const v_array) | 
| 103 |   { | 
| 104 |     assert( grid_u_res >= 1); | 
| 105 |     assert( grid_v_res >= 1); | 
| 106 |     assert( edge_levels[0] >= 1.0f ); | 
| 107 |     assert( edge_levels[1] >= 1.0f ); | 
| 108 |     assert( edge_levels[2] >= 1.0f ); | 
| 109 |     assert( edge_levels[3] >= 1.0f ); | 
| 110 |      | 
| 111 | #if defined(__AVX__) | 
| 112 |     const vint8 grid_u_segments = vint8(swidth)-1; | 
| 113 |     const vint8 grid_v_segments = vint8(sheight)-1; | 
| 114 |      | 
| 115 |     const vfloat8 inv_grid_u_segments = rcp(vfloat8(grid_u_segments)); | 
| 116 |     const vfloat8 inv_grid_v_segments = rcp(vfloat8(grid_v_segments)); | 
| 117 |      | 
| 118 |     unsigned int index = 0; | 
| 119 |     vint8 v_i( zero ); | 
| 120 |     for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1) | 
| 121 |     { | 
| 122 |       vint8 u_i ( step ); | 
| 123 |        | 
| 124 |       const vbool8 m_v = v_i < grid_v_segments; | 
| 125 |        | 
| 126 |       for (unsigned int x=0;x<grid_u_res;x+=8, u_i += 8) | 
| 127 |       { | 
| 128 |         const vbool8 m_u = u_i < grid_u_segments; | 
| 129 | 	const vfloat8 u = select(m_u, vfloat8(x0+u_i) * inv_grid_u_segments, 1.0f); | 
| 130 | 	const vfloat8 v = select(m_v, vfloat8(y0+v_i) * inv_grid_v_segments, 1.0f); | 
| 131 | 	vfloat8::storeu(&u_array[index + x],u); | 
| 132 | 	vfloat8::storeu(&v_array[index + x],v);	    | 
| 133 |       } | 
| 134 |     }        | 
| 135 |  #else    | 
| 136 |     const vint4 grid_u_segments = vint4(swidth)-1; | 
| 137 |     const vint4 grid_v_segments = vint4(sheight)-1; | 
| 138 |      | 
| 139 |     const vfloat4 inv_grid_u_segments = rcp(a: vfloat4(grid_u_segments)); | 
| 140 |     const vfloat4 inv_grid_v_segments = rcp(a: vfloat4(grid_v_segments)); | 
| 141 |      | 
| 142 |     unsigned int index = 0; | 
| 143 |     vint4 v_i( zero ); | 
| 144 |     for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1) | 
| 145 |     { | 
| 146 |       vint4 u_i ( step ); | 
| 147 |        | 
| 148 |       const vbool4 m_v = v_i < grid_v_segments; | 
| 149 |        | 
| 150 |       for (unsigned int x=0;x<grid_u_res;x+=4, u_i += 4) | 
| 151 |       { | 
| 152 |         const vbool4 m_u = u_i < grid_u_segments; | 
| 153 | 	const vfloat4 u = select(m: m_u, t: vfloat4(x0+u_i) * inv_grid_u_segments, f: 1.0f); | 
| 154 | 	const vfloat4 v = select(m: m_v, t: vfloat4(y0+v_i) * inv_grid_v_segments, f: 1.0f); | 
| 155 |         vfloat4::storeu(ptr: &u_array[index + x],v: u); | 
| 156 | 	vfloat4::storeu(ptr: &v_array[index + x],v);	    | 
| 157 |       } | 
| 158 |     }        | 
| 159 | #endif | 
| 160 |   }  | 
| 161 | } | 
| 162 |  |