grid_soa.h source code [qtquick3d/src/3rdparty/embree/kernels/geometry/grid_soa.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "../common/ray.h"
7	#include "../common/scene_subdiv_mesh.h"
8	#include "../bvh/bvh.h"
9	#include "../subdiv/tessellation.h"
10	#include "../subdiv/tessellation_cache.h"
11	#include "subdivpatch1.h"
12
13	namespace embree
14	{
15	namespace isa
16	{
17	class GridSOA
18	{
19	public:
20
21	/! GridSOA constructor /
22	GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
23	const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
24	const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
25
26	/! Subgrid creation /
27	template<typename Allocator>
28	static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
29	unsigned x0, unsigned x1, unsigned y0, unsigned y1,
30	const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
31	{
32	const unsigned width = x1-x0+`1`;
33	const unsigned height = y1-y0+`1`;
34	const GridRange range(`0`,width-`1`,`0`,height-`1`);
35	size_t bvhBytes = `0`;
36	if (time_steps == `1`)
37	bvhBytes = getBVHBytes(range,nodeBytes: sizeof(BVH4::AABBNode),leafBytes: `0`);
38	else {
39	bvhBytes = (time_steps-`1`)getBVHBytes(range,nodeBytes: sizeof*(BVH4::AABBNodeMB),leafBytes: `0`);
40	bvhBytes += getTemporalBVHBytes(time_range: make_range(begin: `0`,end: int(time_steps-`1`)),nodeBytes: sizeof(BVH4::AABBNodeMB4D));
41	}
42	const size_t gridBytes = `4`size_t(width)size_t(height)*sizeof(float);
43	size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
44	#if !defined(__64BIT__)
45	rootBytes += `4`; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
46	#endif
47	void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
48	assert(data);
49	return new (data) GridSOA (patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(i: patches->geomID()),bvhBytes,gridBytes,bounds_o);
50	}
51
52	/! Grid creation /
53	template<typename Allocator>
54	static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
55	const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
56	{
57	return create(patches,time_steps,`0`,patches->grid_u_res-`1`,`0`,patches->grid_v_res-`1`,scene,alloc,bounds_o);
58	}
59
60	/! returns reference to root /
61	__forceinline BVH4::NodeRef& root(size_t t = `0`) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
62	__forceinline const BVH4::NodeRef& root(size_t t = `0`) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
63
64	/! returns pointer to BVH array /
65	__forceinline char* bvhData() { return &data[`0`]; }
66	__forceinline const char* bvhData() const { return &data[`0`]; }
67
68	/! returns pointer to Grid array /
69	__forceinline float* gridData(size_t t = `0`) { return (float) &data[gridOffset + tgridBytes]; }
70	__forceinline const float* gridData(size_t t = `0`) const { return (float) &data[gridOffset + tgridBytes]; }
71
72	__forceinline void* encodeLeaf(size_t u, size_t v) {
73	return (void) (`16`(v * width + u + `1`)); // +1 to not create empty leaf
74	}
75	__forceinline float* decodeLeaf(size_t t, const void* ptr) {
76	return gridData(t) + (((size_t) (ptr) >> `4`) - `1`);
77	}
78
79	/! returns the size of the BVH over the grid in bytes /
80	static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
81
82	/! returns the size of the temporal BVH over the time range BVHs /
83	static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
84
85	/! calculates bounding box of grid range /
86	__forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
87	{
88	const float* const grid_array = gridData(t: time);
89	const float* const grid_x_array = grid_array + `0` * dim_offset;
90	const float* const grid_y_array = grid_array + `1` * dim_offset;
91	const float* const grid_z_array = grid_array + `2` * dim_offset;
92
93	/ compute the bounds just for the range! /
94	BBox3fa bounds( empty );
95	for (unsigned v = range.v_start; v<=range.v_end; v++)
96	{
97	for (unsigned u = range.u_start; u<=range.u_end; u++)
98	{
99	const float x = grid_x_array[ v * width + u];
100	const float y = grid_y_array[ v * width + u];
101	const float z = grid_z_array[ v * width + u];
102	bounds.extend( other: Vec3fa (x,y,z) );
103	}
104	}
105	assert(is_finite(bounds));
106	return bounds;
107	}
108
109	/! Evaluates grid over patch and builds BVH4 tree over the grid. /
110	std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
111
112	/! Create BVH4 tree over grid. /
113	std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
114
115	/! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. /
116	std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
117
118	/! Create MBlur BVH4 tree over grid. /
119	std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
120
121	/! Create MSMBlur BVH4 tree over grid. /
122	std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
123
124	template<typename Loader>
125	struct MapUV
126	{
127	typedef typename Loader::vfloat vfloat;
128	const float* const grid_uv;
129	size_t line_offset;
130	size_t lines;
131
132	__forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
133	: grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
134
135	__forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const {
136	const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
137	const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[`0`]);
138	const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[`1`]);
139	const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[`2`]);
140	const Vec2<vfloat> uv = u * uv1 + v * uv2 + (`1.0f`-u-v) * uv0;
141	u = uv[`0`];v = uv[`1`];
142	}
143	};
144
145	struct Gather2x3
146	{
147	enum { M = `4` };
148	typedef vbool4 vbool;
149	typedef vint4 vint;
150	typedef vfloat4 vfloat;
151
152	static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
153	{
154	vfloat4 r0 = vfloat4::loadu(a: grid + `0`*line_offset);
155	vfloat4 r1 = vfloat4::loadu(a: grid + `1`line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid*
156	if (unlikely(line_offset == `2`))
157	{
158	r0 = shuffle<`0`,`1`,`1`,`1`>(v: r0);
159	r1 = shuffle<`0`,`1`,`1`,`1`>(v: r1);
160	}
161	return Vec3vf4 (unpacklo(a: r0,b: r1), // r00, r10, r01, r11
162	shuffle<`1`,`1`,`2`,`2`>(v: r0), // r01, r01, r02, r02
163	shuffle<`0`,`1`,`1`,`2`>(v: r1)); // r10, r11, r11, r12
164	}
165
166	static __forceinline void gather(const float* const grid_x,
167	const float* const grid_y,
168	const float* const grid_z,
169	const size_t line_offset,
170	const size_t lines,
171	Vec3vf4& v0_o,
172	Vec3vf4& v1_o,
173	Vec3vf4& v2_o)
174	{
175	const Vec3vf4 tri_v012_x = gather(grid: grid_x,line_offset,lines);
176	const Vec3vf4 tri_v012_y = gather(grid: grid_y,line_offset,lines);
177	const Vec3vf4 tri_v012_z = gather(grid: grid_z,line_offset,lines);
178	v0_o = Vec3vf4 (tri_v012_x [`0`],tri_v012_y [`0`],tri_v012_z [`0`]);
179	v1_o = Vec3vf4 (tri_v012_x [`1`],tri_v012_y [`1`],tri_v012_z [`1`]);
180	v2_o = Vec3vf4 (tri_v012_x [`2`],tri_v012_y [`2`],tri_v012_z [`2`]);
181	}
182	};
183
184	#if defined (__AVX__)
185	struct Gather3x3
186	{
187	enum { M = `8` };
188	typedef vbool8 vbool;
189	typedef vint8 vint;
190	typedef vfloat8 vfloat;
191
192	static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
193	{
194	vfloat4 ra = vfloat4::loadu(grid + `0`*line_offset);
195	vfloat4 rb = vfloat4::loadu(grid + `1`line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid*
196	vfloat4 rc;
197	if (likely(lines > `2`))
198	rc = vfloat4::loadu(grid + `2`*line_offset);
199	else
200	rc = rb;
201
202	if (unlikely(line_offset == `2`))
203	{
204	ra = shuffle<`0`,`1`,`1`,`1`>(ra);
205	rb = shuffle<`0`,`1`,`1`,`1`>(rb);
206	rc = shuffle<`0`,`1`,`1`,`1`>(rc);
207	}
208
209	const vfloat8 r0 = vfloat8(ra,rb);
210	const vfloat8 r1 = vfloat8(rb,rc);
211	return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
212	shuffle<`1`,`1`,`2`,`2`>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
213	shuffle<`0`,`1`,`1`,`2`>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
214	}
215
216	static __forceinline void gather(const float* const grid_x,
217	const float* const grid_y,
218	const float* const grid_z,
219	const size_t line_offset,
220	const size_t lines,
221	Vec3vf8& v0_o,
222	Vec3vf8& v1_o,
223	Vec3vf8& v2_o)
224	{
225	const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
226	const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
227	const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
228	v0_o = Vec3vf8(tri_v012_x[`0`],tri_v012_y[`0`],tri_v012_z[`0`]);
229	v1_o = Vec3vf8(tri_v012_x[`1`],tri_v012_y[`1`],tri_v012_z[`1`]);
230	v2_o = Vec3vf8(tri_v012_x[`2`],tri_v012_y[`2`],tri_v012_z[`2`]);
231	}
232	};
233	#endif
234
235	template<typename vfloat>
236	static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
237	{
238	typedef typename vfloat::Int vint;
239	const vint iu = asInt(uv) & `0xffff`;
240	const vint iv = srl(asInt(uv),`16`);
241	const vfloat u = (vfloat)iu * vfloat(`8.0f`/`0x10000`);
242	const vfloat v = (vfloat)iv * vfloat(`8.0f`/`0x10000`);
243	return Vec2<vfloat>(u,v);
244	}
245
246	__forceinline unsigned int geomID() const {
247	return _geomID;
248	}
249
250	__forceinline unsigned int primID() const {
251	return _primID;
252	}
253
254	public:
255	BVH4::NodeRef troot;
256	#if !defined(__64BIT__)
257	unsigned align1;
258	#endif
259	unsigned time_steps;
260	unsigned width;
261
262	unsigned height;
263	unsigned dim_offset;
264	unsigned _geomID;
265	unsigned _primID;
266
267	unsigned align2;
268	unsigned gridOffset;
269	unsigned gridBytes;
270	unsigned rootOffset;
271
272	char data[`1`]; //!< after the struct we first store the BVH, then the grid, and finally the roots
273	};
274	}
275	}
276

Provided by KDAB

Definitions

source code of qtquick3d/src/3rdparty/embree/kernels/geometry/grid_soa.h