1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | namespace embree |
7 | { |
8 | /* adjust discret tessellation level for feature-adaptive pre-subdivision */ |
9 | __forceinline float adjustTessellationLevel(float l, const size_t sublevel) |
10 | { |
11 | for (size_t i=0; i<sublevel; i++) l *= 0.5f; |
12 | float r = ceilf(x: l); |
13 | for (size_t i=0; i<sublevel; i++) r *= 2.0f; |
14 | return r; |
15 | } |
16 | |
17 | __forceinline int stitch(const int x, const int fine, const int coarse) { |
18 | return (2*x+1)*coarse/(2*fine); |
19 | } |
20 | |
21 | __forceinline void stitchGridEdges(const unsigned int low_rate, |
22 | const unsigned int high_rate, |
23 | const unsigned int x0, |
24 | const unsigned int x1, |
25 | float * __restrict__ const uv_array, |
26 | const unsigned int uv_array_step) |
27 | { |
28 | #if 1 |
29 | const float inv_low_rate = rcp(x: (float)(low_rate-1)); |
30 | for (unsigned x=x0; x<=x1; x++) { |
31 | uv_array[(x-x0)*uv_array_step] = float(stitch(x,fine: high_rate-1,coarse: low_rate-1))*inv_low_rate; |
32 | } |
33 | if (unlikely(x1 == high_rate-1)) |
34 | uv_array[(x1-x0)*uv_array_step] = 1.0f; |
35 | #else |
36 | assert(low_rate < high_rate); |
37 | assert(high_rate >= 2); |
38 | |
39 | const float inv_low_rate = rcp((float)(low_rate-1)); |
40 | const unsigned int dy = low_rate - 1; |
41 | const unsigned int dx = high_rate - 1; |
42 | |
43 | int p = 2*dy-dx; |
44 | |
45 | unsigned int offset = 0; |
46 | unsigned int y = 0; |
47 | float value = 0.0f; |
48 | for(unsigned int x=0;x<high_rate-1; x++) // '<=' would be correct but we will leave the 1.0f at the end |
49 | { |
50 | uv_array[offset] = value; |
51 | |
52 | offset += uv_array_step; |
53 | if (unlikely(p > 0)) |
54 | { |
55 | y++; |
56 | value = (float)y * inv_low_rate; |
57 | p -= 2*dx; |
58 | } |
59 | p += 2*dy; |
60 | } |
61 | #endif |
62 | } |
63 | |
64 | __forceinline void stitchUVGrid(const float edge_levels[4], |
65 | const unsigned int swidth, |
66 | const unsigned int sheight, |
67 | const unsigned int x0, |
68 | const unsigned int y0, |
69 | const unsigned int grid_u_res, |
70 | const unsigned int grid_v_res, |
71 | float * __restrict__ const u_array, |
72 | float * __restrict__ const v_array) |
73 | { |
74 | const unsigned int x1 = x0+grid_u_res-1; |
75 | const unsigned int y1 = y0+grid_v_res-1; |
76 | const unsigned int int_edge_points0 = (unsigned int)edge_levels[0] + 1; |
77 | const unsigned int int_edge_points1 = (unsigned int)edge_levels[1] + 1; |
78 | const unsigned int int_edge_points2 = (unsigned int)edge_levels[2] + 1; |
79 | const unsigned int int_edge_points3 = (unsigned int)edge_levels[3] + 1; |
80 | |
81 | if (unlikely(y0 == 0 && int_edge_points0 < swidth)) |
82 | stitchGridEdges(low_rate: int_edge_points0,high_rate: swidth,x0,x1,uv_array: u_array,uv_array_step: 1); |
83 | |
84 | if (unlikely(y1 == sheight-1 && int_edge_points2 < swidth)) |
85 | stitchGridEdges(low_rate: int_edge_points2,high_rate: swidth,x0,x1,uv_array: &u_array[(grid_v_res-1)*grid_u_res],uv_array_step: 1); |
86 | |
87 | if (unlikely(x0 == 0 && int_edge_points1 < sheight)) |
88 | stitchGridEdges(low_rate: int_edge_points1,high_rate: sheight,x0: y0,x1: y1,uv_array: &v_array[grid_u_res-1],uv_array_step: grid_u_res); |
89 | |
90 | if (unlikely(x1 == swidth-1 && int_edge_points3 < sheight)) |
91 | stitchGridEdges(low_rate: int_edge_points3,high_rate: sheight,x0: y0,x1: y1,uv_array: v_array,uv_array_step: grid_u_res); |
92 | } |
93 | |
94 | __forceinline void gridUVTessellator(const float edge_levels[4], |
95 | const unsigned int swidth, |
96 | const unsigned int sheight, |
97 | const unsigned int x0, |
98 | const unsigned int y0, |
99 | const unsigned int grid_u_res, |
100 | const unsigned int grid_v_res, |
101 | float * __restrict__ const u_array, |
102 | float * __restrict__ const v_array) |
103 | { |
104 | assert( grid_u_res >= 1); |
105 | assert( grid_v_res >= 1); |
106 | assert( edge_levels[0] >= 1.0f ); |
107 | assert( edge_levels[1] >= 1.0f ); |
108 | assert( edge_levels[2] >= 1.0f ); |
109 | assert( edge_levels[3] >= 1.0f ); |
110 | |
111 | #if defined(__AVX__) |
112 | const vint8 grid_u_segments = vint8(swidth)-1; |
113 | const vint8 grid_v_segments = vint8(sheight)-1; |
114 | |
115 | const vfloat8 inv_grid_u_segments = rcp(vfloat8(grid_u_segments)); |
116 | const vfloat8 inv_grid_v_segments = rcp(vfloat8(grid_v_segments)); |
117 | |
118 | unsigned int index = 0; |
119 | vint8 v_i( zero ); |
120 | for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1) |
121 | { |
122 | vint8 u_i ( step ); |
123 | |
124 | const vbool8 m_v = v_i < grid_v_segments; |
125 | |
126 | for (unsigned int x=0;x<grid_u_res;x+=8, u_i += 8) |
127 | { |
128 | const vbool8 m_u = u_i < grid_u_segments; |
129 | const vfloat8 u = select(m_u, vfloat8(x0+u_i) * inv_grid_u_segments, 1.0f); |
130 | const vfloat8 v = select(m_v, vfloat8(y0+v_i) * inv_grid_v_segments, 1.0f); |
131 | vfloat8::storeu(&u_array[index + x],u); |
132 | vfloat8::storeu(&v_array[index + x],v); |
133 | } |
134 | } |
135 | #else |
136 | const vint4 grid_u_segments = vint4(swidth)-1; |
137 | const vint4 grid_v_segments = vint4(sheight)-1; |
138 | |
139 | const vfloat4 inv_grid_u_segments = rcp(a: vfloat4(grid_u_segments)); |
140 | const vfloat4 inv_grid_v_segments = rcp(a: vfloat4(grid_v_segments)); |
141 | |
142 | unsigned int index = 0; |
143 | vint4 v_i( zero ); |
144 | for (unsigned int y=0;y<grid_v_res;y++,index+=grid_u_res,v_i += 1) |
145 | { |
146 | vint4 u_i ( step ); |
147 | |
148 | const vbool4 m_v = v_i < grid_v_segments; |
149 | |
150 | for (unsigned int x=0;x<grid_u_res;x+=4, u_i += 4) |
151 | { |
152 | const vbool4 m_u = u_i < grid_u_segments; |
153 | const vfloat4 u = select(m: m_u, t: vfloat4(x0+u_i) * inv_grid_u_segments, f: 1.0f); |
154 | const vfloat4 v = select(m: m_v, t: vfloat4(y0+v_i) * inv_grid_v_segments, f: 1.0f); |
155 | vfloat4::storeu(ptr: &u_array[index + x],v: u); |
156 | vfloat4::storeu(ptr: &v_array[index + x],v); |
157 | } |
158 | } |
159 | #endif |
160 | } |
161 | } |
162 | |