spirv_hlsl.cpp source code [qtshadertools/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp]

1	/*
2	* Copyright 2016-2021 Robert Konrad
3	* SPDX-License-Identifier: Apache-2.0 OR MIT
4	*
5	* Licensed under the Apache License, Version 2.0 (the "License");
6	* you may not use this file except in compliance with the License.
7	* You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*
17	*/
18
19	/*
20	* At your option, you may choose to accept this material under either:
21	* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
22	* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
23	*/
24
25	#include "spirv_hlsl.hpp"
26	#include "GLSL.std.450.h"
27	#include <algorithm>
28	#include <assert.h>
29
30	using namespace spv;
31	using namespace SPIRV_CROSS_NAMESPACE;
32	using namespace std;
33
34	enum class ImageFormatNormalizedState
35	{
36	None = `0`,
37	Unorm = `1`,
38	Snorm = `2`
39	};
40
41	static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt)
42	{
43	switch (fmt)
44	{
45	case ImageFormatR8:
46	case ImageFormatR16:
47	case ImageFormatRg8:
48	case ImageFormatRg16:
49	case ImageFormatRgba8:
50	case ImageFormatRgba16:
51	case ImageFormatRgb10A2:
52	return ImageFormatNormalizedState::Unorm;
53
54	case ImageFormatR8Snorm:
55	case ImageFormatR16Snorm:
56	case ImageFormatRg8Snorm:
57	case ImageFormatRg16Snorm:
58	case ImageFormatRgba8Snorm:
59	case ImageFormatRgba16Snorm:
60	return ImageFormatNormalizedState::Snorm;
61
62	default:
63	break;
64	}
65
66	return ImageFormatNormalizedState::None;
67	}
68
69	static unsigned image_format_to_components(ImageFormat fmt)
70	{
71	switch (fmt)
72	{
73	case ImageFormatR8:
74	case ImageFormatR16:
75	case ImageFormatR8Snorm:
76	case ImageFormatR16Snorm:
77	case ImageFormatR16f:
78	case ImageFormatR32f:
79	case ImageFormatR8i:
80	case ImageFormatR16i:
81	case ImageFormatR32i:
82	case ImageFormatR8ui:
83	case ImageFormatR16ui:
84	case ImageFormatR32ui:
85	return `1`;
86
87	case ImageFormatRg8:
88	case ImageFormatRg16:
89	case ImageFormatRg8Snorm:
90	case ImageFormatRg16Snorm:
91	case ImageFormatRg16f:
92	case ImageFormatRg32f:
93	case ImageFormatRg8i:
94	case ImageFormatRg16i:
95	case ImageFormatRg32i:
96	case ImageFormatRg8ui:
97	case ImageFormatRg16ui:
98	case ImageFormatRg32ui:
99	return `2`;
100
101	case ImageFormatR11fG11fB10f:
102	return `3`;
103
104	case ImageFormatRgba8:
105	case ImageFormatRgba16:
106	case ImageFormatRgb10A2:
107	case ImageFormatRgba8Snorm:
108	case ImageFormatRgba16Snorm:
109	case ImageFormatRgba16f:
110	case ImageFormatRgba32f:
111	case ImageFormatRgba8i:
112	case ImageFormatRgba16i:
113	case ImageFormatRgba32i:
114	case ImageFormatRgba8ui:
115	case ImageFormatRgba16ui:
116	case ImageFormatRgba32ui:
117	case ImageFormatRgb10a2ui:
118	return `4`;
119
120	case ImageFormatUnknown:
121	return `4`; // Assume 4.
122
123	default:
124	SPIRV_CROSS_THROW("Unrecognized typed image format.");
125	}
126	}
127
128	static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
129	{
130	switch (fmt)
131	{
132	case ImageFormatR8:
133	case ImageFormatR16:
134	if (basetype != SPIRType::Float)
135	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
136	return "unorm float";
137	case ImageFormatRg8:
138	case ImageFormatRg16:
139	if (basetype != SPIRType::Float)
140	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
141	return "unorm float2";
142	case ImageFormatRgba8:
143	case ImageFormatRgba16:
144	if (basetype != SPIRType::Float)
145	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
146	return "unorm float4";
147	case ImageFormatRgb10A2:
148	if (basetype != SPIRType::Float)
149	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
150	return "unorm float4";
151
152	case ImageFormatR8Snorm:
153	case ImageFormatR16Snorm:
154	if (basetype != SPIRType::Float)
155	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
156	return "snorm float";
157	case ImageFormatRg8Snorm:
158	case ImageFormatRg16Snorm:
159	if (basetype != SPIRType::Float)
160	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
161	return "snorm float2";
162	case ImageFormatRgba8Snorm:
163	case ImageFormatRgba16Snorm:
164	if (basetype != SPIRType::Float)
165	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
166	return "snorm float4";
167
168	case ImageFormatR16f:
169	case ImageFormatR32f:
170	if (basetype != SPIRType::Float)
171	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
172	return "float";
173	case ImageFormatRg16f:
174	case ImageFormatRg32f:
175	if (basetype != SPIRType::Float)
176	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
177	return "float2";
178	case ImageFormatRgba16f:
179	case ImageFormatRgba32f:
180	if (basetype != SPIRType::Float)
181	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
182	return "float4";
183
184	case ImageFormatR11fG11fB10f:
185	if (basetype != SPIRType::Float)
186	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
187	return "float3";
188
189	case ImageFormatR8i:
190	case ImageFormatR16i:
191	case ImageFormatR32i:
192	if (basetype != SPIRType::Int)
193	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
194	return "int";
195	case ImageFormatRg8i:
196	case ImageFormatRg16i:
197	case ImageFormatRg32i:
198	if (basetype != SPIRType::Int)
199	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
200	return "int2";
201	case ImageFormatRgba8i:
202	case ImageFormatRgba16i:
203	case ImageFormatRgba32i:
204	if (basetype != SPIRType::Int)
205	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
206	return "int4";
207
208	case ImageFormatR8ui:
209	case ImageFormatR16ui:
210	case ImageFormatR32ui:
211	if (basetype != SPIRType::UInt)
212	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
213	return "uint";
214	case ImageFormatRg8ui:
215	case ImageFormatRg16ui:
216	case ImageFormatRg32ui:
217	if (basetype != SPIRType::UInt)
218	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
219	return "uint2";
220	case ImageFormatRgba8ui:
221	case ImageFormatRgba16ui:
222	case ImageFormatRgba32ui:
223	if (basetype != SPIRType::UInt)
224	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
225	return "uint4";
226	case ImageFormatRgb10a2ui:
227	if (basetype != SPIRType::UInt)
228	SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
229	return "uint4";
230
231	case ImageFormatUnknown:
232	switch (basetype)
233	{
234	case SPIRType::Float:
235	return "float4";
236	case SPIRType::Int:
237	return "int4";
238	case SPIRType::UInt:
239	return "uint4";
240	default:
241	SPIRV_CROSS_THROW("Unsupported base type for image.");
242	}
243
244	default:
245	SPIRV_CROSS_THROW("Unrecognized typed image format.");
246	}
247	}
248
249	string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
250	{
251	auto &imagetype = get<SPIRType>(id: type.image.type);
252	const char dim = nullptr*;
253	bool typed_load = false;
254	uint32_t components = `4`;
255
256	bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, decoration: DecorationNonWritable);
257
258	switch (type.image.dim)
259	{
260	case Dim1D:
261	typed_load = type.image.sampled == `2`;
262	dim = "1D";
263	break;
264	case Dim2D:
265	typed_load = type.image.sampled == `2`;
266	dim = "2D";
267	break;
268	case Dim3D:
269	typed_load = type.image.sampled == `2`;
270	dim = "3D";
271	break;
272	case DimCube:
273	if (type.image.sampled == `2`)
274	SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL.");
275	dim = "Cube";
276	break;
277	case DimRect:
278	SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO
279	case DimBuffer:
280	if (type.image.sampled == `1`)
281	return join(ts: "Buffer<", ts: type_to_glsl(type: imagetype), ts&: components, ts: ">");
282	else if (type.image.sampled == `2`)
283	{
284	if (interlocked_resources.count(x: id))
285	return join(ts: "RasterizerOrderedBuffer<", ts: image_format_to_type(fmt: type.image.format, basetype: imagetype.basetype),
286	ts: ">");
287
288	typed_load = !force_image_srv && type.image.sampled == `2`;
289
290	const char *rw = force_image_srv ? "" : "RW";
291	return join(ts&: rw, ts: "Buffer<",
292	ts: typed_load ? image_format_to_type(fmt: type.image.format, basetype: imagetype.basetype) :
293	join(ts: type_to_glsl(type: imagetype), ts&: components),
294	ts: ">");
295	}
296	else
297	SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
298	case DimSubpassData:
299	dim = "2D";
300	typed_load = false;
301	break;
302	default:
303	SPIRV_CROSS_THROW("Invalid dimension.");
304	}
305	const char *arrayed = type.image.arrayed ? "Array" : "";
306	const char *ms = type.image.ms ? "MS" : "";
307	const char *rw = typed_load && !force_image_srv ? "RW" : "";
308
309	if (force_image_srv)
310	typed_load = false;
311
312	if (typed_load && interlocked_resources.count(x: id))
313	rw = "RasterizerOrdered";
314
315	return join(ts&: rw, ts: "Texture", ts&: dim, ts&: ms, ts&: arrayed, ts: "<",
316	ts: typed_load ? image_format_to_type(fmt: type.image.format, basetype: imagetype.basetype) :
317	join(ts: type_to_glsl(type: imagetype), ts&: components),
318	ts: ">");
319	}
320
321	string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /id/)
322	{
323	auto &imagetype = get<SPIRType>(id: type.image.type);
324	string res;
325
326	switch (imagetype.basetype)
327	{
328	case SPIRType::Int:
329	res = "i";
330	break;
331	case SPIRType::UInt:
332	res = "u";
333	break;
334	default:
335	break;
336	}
337
338	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
339	return res + "subpassInput" + (type.image.ms ? "MS" : "");
340
341	// If we're emulating subpassInput with samplers, force sampler2D
342	// so we don't have to specify format.
343	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
344	{
345	// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
346	if (type.image.dim == DimBuffer && type.image.sampled == `1`)
347	res += "sampler";
348	else
349	res += type.image.sampled == `2` ? "image" : "texture";
350	}
351	else
352	res += "sampler";
353
354	switch (type.image.dim)
355	{
356	case Dim1D:
357	res += "1D";
358	break;
359	case Dim2D:
360	res += "2D";
361	break;
362	case Dim3D:
363	res += "3D";
364	break;
365	case DimCube:
366	res += "CUBE";
367	break;
368
369	case DimBuffer:
370	res += "Buffer";
371	break;
372
373	case DimSubpassData:
374	res += "2D";
375	break;
376	default:
377	SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
378	}
379
380	if (type.image.ms)
381	res += "MS";
382	if (type.image.arrayed)
383	res += "Array";
384
385	return res;
386	}
387
388	string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id)
389	{
390	if (hlsl_options.shader_model <= `30`)
391	return image_type_hlsl_legacy(type, id);
392	else
393	return image_type_hlsl_modern(type, id);
394	}
395
396	// The optional id parameter indicates the object whose type we are trying
397	// to find the description for. It is optional. Most type descriptions do not
398	// depend on a specific object's use of that type.
399	string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
400	{
401	// Ignore the pointer type since GLSL doesn't have pointers.
402
403	switch (type.basetype)
404	{
405	case SPIRType::Struct:
406	// Need OpName lookup here to get a "sensible" name for a struct.
407	if (backend.explicit_struct_type)
408	return join(ts: "struct ", ts: to_name(id: type.self));
409	else
410	return to_name(id: type.self);
411
412	case SPIRType::Image:
413	case SPIRType::SampledImage:
414	return image_type_hlsl(type, id);
415
416	case SPIRType::Sampler:
417	return comparison_ids.count(x: id) ? "SamplerComparisonState" : "SamplerState";
418
419	case SPIRType::Void:
420	return "void";
421
422	default:
423	break;
424	}
425
426	if (type.vecsize == `1` && type.columns == `1`) // Scalar builtin
427	{
428	switch (type.basetype)
429	{
430	case SPIRType::Boolean:
431	return "bool";
432	case SPIRType::Int:
433	return backend.basic_int_type;
434	case SPIRType::UInt:
435	return backend.basic_uint_type;
436	case SPIRType::AtomicCounter:
437	return "atomic_uint";
438	case SPIRType::Half:
439	if (hlsl_options.enable_16bit_types)
440	return "half";
441	else
442	return "min16float";
443	case SPIRType::Short:
444	if (hlsl_options.enable_16bit_types)
445	return "int16_t";
446	else
447	return "min16int";
448	case SPIRType::UShort:
449	if (hlsl_options.enable_16bit_types)
450	return "uint16_t";
451	else
452	return "min16uint";
453	case SPIRType::Float:
454	return "float";
455	case SPIRType::Double:
456	return "double";
457	case SPIRType::Int64:
458	if (hlsl_options.shader_model < `60`)
459	SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
460	return "int64_t";
461	case SPIRType::UInt64:
462	if (hlsl_options.shader_model < `60`)
463	SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
464	return "uint64_t";
465	case SPIRType::AccelerationStructure:
466	return "RaytracingAccelerationStructure";
467	case SPIRType::RayQuery:
468	return "RayQuery<RAY_FLAG_NONE>";
469	default:
470	return "???";
471	}
472	}
473	else if (type.vecsize > `1` && type.columns == `1`) // Vector builtin
474	{
475	switch (type.basetype)
476	{
477	case SPIRType::Boolean:
478	return join(ts: "bool", ts: type.vecsize);
479	case SPIRType::Int:
480	return join(ts: "int", ts: type.vecsize);
481	case SPIRType::UInt:
482	return join(ts: "uint", ts: type.vecsize);
483	case SPIRType::Half:
484	return join(ts: hlsl_options.enable_16bit_types ? "half" : "min16float", ts: type.vecsize);
485	case SPIRType::Short:
486	return join(ts: hlsl_options.enable_16bit_types ? "int16_t" : "min16int", ts: type.vecsize);
487	case SPIRType::UShort:
488	return join(ts: hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", ts: type.vecsize);
489	case SPIRType::Float:
490	return join(ts: "float", ts: type.vecsize);
491	case SPIRType::Double:
492	return join(ts: "double", ts: type.vecsize);
493	case SPIRType::Int64:
494	return join(ts: "i64vec", ts: type.vecsize);
495	case SPIRType::UInt64:
496	return join(ts: "u64vec", ts: type.vecsize);
497	default:
498	return "???";
499	}
500	}
501	else
502	{
503	switch (type.basetype)
504	{
505	case SPIRType::Boolean:
506	return join(ts: "bool", ts: type.columns, ts: "x", ts: type.vecsize);
507	case SPIRType::Int:
508	return join(ts: "int", ts: type.columns, ts: "x", ts: type.vecsize);
509	case SPIRType::UInt:
510	return join(ts: "uint", ts: type.columns, ts: "x", ts: type.vecsize);
511	case SPIRType::Half:
512	return join(ts: hlsl_options.enable_16bit_types ? "half" : "min16float", ts: type.columns, ts: "x", ts: type.vecsize);
513	case SPIRType::Short:
514	return join(ts: hlsl_options.enable_16bit_types ? "int16_t" : "min16int", ts: type.columns, ts: "x", ts: type.vecsize);
515	case SPIRType::UShort:
516	return join(ts: hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", ts: type.columns, ts: "x", ts: type.vecsize);
517	case SPIRType::Float:
518	return join(ts: "float", ts: type.columns, ts: "x", ts: type.vecsize);
519	case SPIRType::Double:
520	return join(ts: "double", ts: type.columns, ts: "x", ts: type.vecsize);
521	// Matrix types not supported for int64/uint64.
522	default:
523	return "???";
524	}
525	}
526	}
527
528	void CompilerHLSL::emit_header()
529	{
530	for (auto &header : header_lines)
531	statement(ts&: header);
532
533	if (header_lines.size() > `0`)
534	{
535	statement(ts: "");
536	}
537	}
538
539	void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var)
540	{
541	add_resource_name(id: var.self);
542
543	// The global copies of I/O variables should not contain interpolation qualifiers.
544	// These are emitted inside the interface structs.
545	auto &flags = ir.meta [var.self].decoration.decoration_flags;
546	auto old_flags = flags;
547	flags.reset();
548	statement(ts: "static ", ts: variable_decl(variable: var), ts: ";");
549	flags = old_flags;
550	}
551
552	const char CompilerHLSL::to_storage_qualifiers_glsl(const* SPIRVariable &var)
553	{
554	// Input and output variables are handled specially in HLSL backend.
555	// The variables are declared as global, private variables, and do not need any qualifiers.
556	if (var.storage == StorageClassUniformConstant \|\| var.storage == StorageClassUniform \|\|
557	var.storage == StorageClassPushConstant)
558	{
559	return "uniform ";
560	}
561
562	return "";
563	}
564
565	void CompilerHLSL::emit_builtin_outputs_in_struct()
566	{
567	auto &execution = get_entry_point();
568
569	bool legacy = hlsl_options.shader_model <= `30`;
570	active_output_builtins.for_each_bit(op: [&](uint32_t i) {
571	const char type = nullptr*;
572	const char semantic = nullptr*;
573	auto builtin = static_cast<BuiltIn>(i);
574	switch (builtin)
575	{
576	case BuiltInPosition:
577	type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
578	semantic = legacy ? "POSITION" : "SV_Position";
579	break;
580
581	case BuiltInSampleMask:
582	if (hlsl_options.shader_model < `41` \|\| execution.model != ExecutionModelFragment)
583	SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher.");
584	type = "uint";
585	semantic = "SV_Coverage";
586	break;
587
588	case BuiltInFragDepth:
589	type = "float";
590	if (legacy)
591	{
592	semantic = "DEPTH";
593	}
594	else
595	{
596	if (hlsl_options.shader_model >= `50` && execution.flags.get(bit: ExecutionModeDepthGreater))
597	semantic = "SV_DepthGreaterEqual";
598	else if (hlsl_options.shader_model >= `50` && execution.flags.get(bit: ExecutionModeDepthLess))
599	semantic = "SV_DepthLessEqual";
600	else
601	semantic = "SV_Depth";
602	}
603	break;
604
605	case BuiltInClipDistance:
606	// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
607	for (uint32_t clip = `0`; clip < clip_distance_count; clip += `4`)
608	{
609	uint32_t to_declare = clip_distance_count - clip;
610	if (to_declare > `4`)
611	to_declare = `4`;
612
613	uint32_t semantic_index = clip / `4`;
614
615	static const char *types[] = { "float", "float2", "float3", "float4" };
616	statement(ts&: types[to_declare - `1`], ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassOutput), ts&: semantic_index,
617	ts: " : SV_ClipDistance", ts&: semantic_index, ts: ";");
618	}
619	break;
620
621	case BuiltInCullDistance:
622	// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
623	for (uint32_t cull = `0`; cull < cull_distance_count; cull += `4`)
624	{
625	uint32_t to_declare = cull_distance_count - cull;
626	if (to_declare > `4`)
627	to_declare = `4`;
628
629	uint32_t semantic_index = cull / `4`;
630
631	static const char *types[] = { "float", "float2", "float3", "float4" };
632	statement(ts&: types[to_declare - `1`], ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassOutput), ts&: semantic_index,
633	ts: " : SV_CullDistance", ts&: semantic_index, ts: ";");
634	}
635	break;
636
637	case BuiltInPointSize:
638	// If point_size_compat is enabled, just ignore PointSize.
639	// PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
640	// even if it means working around the missing feature.
641	if (hlsl_options.point_size_compat)
642	break;
643	else
644	SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
645
646	case BuiltInLayer:
647	if (hlsl_options.shader_model < `50` \|\| get_entry_point().model != ExecutionModelGeometry)
648	SPIRV_CROSS_THROW("Render target array index output is only supported in GS 5.0 or higher.");
649	type = "uint";
650	semantic = "SV_RenderTargetArrayIndex";
651	break;
652
653	default:
654	SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
655	}
656
657	if (type && semantic)
658	statement(ts&: type, ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassOutput), ts: " : ", ts&: semantic, ts: ";");
659	});
660	}
661
662	void CompilerHLSL::emit_builtin_inputs_in_struct()
663	{
664	bool legacy = hlsl_options.shader_model <= `30`;
665	active_input_builtins.for_each_bit(op: [&](uint32_t i) {
666	const char type = nullptr*;
667	const char semantic = nullptr*;
668	auto builtin = static_cast<BuiltIn>(i);
669	switch (builtin)
670	{
671	case BuiltInFragCoord:
672	type = "float4";
673	semantic = legacy ? "VPOS" : "SV_Position";
674	break;
675
676	case BuiltInVertexId:
677	case BuiltInVertexIndex:
678	if (legacy)
679	SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower.");
680	type = "uint";
681	semantic = "SV_VertexID";
682	break;
683
684	case BuiltInPrimitiveId:
685	type = "uint";
686	semantic = "SV_PrimitiveID";
687	break;
688
689	case BuiltInInstanceId:
690	case BuiltInInstanceIndex:
691	if (legacy)
692	SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower.");
693	type = "uint";
694	semantic = "SV_InstanceID";
695	break;
696
697	case BuiltInSampleId:
698	if (legacy)
699	SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower.");
700	type = "uint";
701	semantic = "SV_SampleIndex";
702	break;
703
704	case BuiltInSampleMask:
705	if (hlsl_options.shader_model < `50` \|\| get_entry_point().model != ExecutionModelFragment)
706	SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher.");
707	type = "uint";
708	semantic = "SV_Coverage";
709	break;
710
711	case BuiltInGlobalInvocationId:
712	type = "uint3";
713	semantic = "SV_DispatchThreadID";
714	break;
715
716	case BuiltInLocalInvocationId:
717	type = "uint3";
718	semantic = "SV_GroupThreadID";
719	break;
720
721	case BuiltInLocalInvocationIndex:
722	type = "uint";
723	semantic = "SV_GroupIndex";
724	break;
725
726	case BuiltInWorkgroupId:
727	type = "uint3";
728	semantic = "SV_GroupID";
729	break;
730
731	case BuiltInFrontFacing:
732	type = "bool";
733	semantic = "SV_IsFrontFace";
734	break;
735
736	case BuiltInViewIndex:
737	if (hlsl_options.shader_model < `61` \|\| (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment))
738	SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher.");
739	type = "uint";
740	semantic = "SV_ViewID";
741	break;
742
743	case BuiltInNumWorkgroups:
744	case BuiltInSubgroupSize:
745	case BuiltInSubgroupLocalInvocationId:
746	case BuiltInSubgroupEqMask:
747	case BuiltInSubgroupLtMask:
748	case BuiltInSubgroupLeMask:
749	case BuiltInSubgroupGtMask:
750	case BuiltInSubgroupGeMask:
751	// Handled specially.
752	break;
753
754	case BuiltInHelperInvocation:
755	if (hlsl_options.shader_model < `50` \|\| get_entry_point().model != ExecutionModelFragment)
756	SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
757	break;
758
759	case BuiltInClipDistance:
760	// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
761	for (uint32_t clip = `0`; clip < clip_distance_count; clip += `4`)
762	{
763	uint32_t to_declare = clip_distance_count - clip;
764	if (to_declare > `4`)
765	to_declare = `4`;
766
767	uint32_t semantic_index = clip / `4`;
768
769	static const char *types[] = { "float", "float2", "float3", "float4" };
770	statement(ts&: types[to_declare - `1`], ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassInput), ts&: semantic_index,
771	ts: " : SV_ClipDistance", ts&: semantic_index, ts: ";");
772	}
773	break;
774
775	case BuiltInCullDistance:
776	// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
777	for (uint32_t cull = `0`; cull < cull_distance_count; cull += `4`)
778	{
779	uint32_t to_declare = cull_distance_count - cull;
780	if (to_declare > `4`)
781	to_declare = `4`;
782
783	uint32_t semantic_index = cull / `4`;
784
785	static const char *types[] = { "float", "float2", "float3", "float4" };
786	statement(ts&: types[to_declare - `1`], ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassInput), ts&: semantic_index,
787	ts: " : SV_CullDistance", ts&: semantic_index, ts: ";");
788	}
789	break;
790
791	case BuiltInPointCoord:
792	// PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
793	if (hlsl_options.point_coord_compat)
794	break;
795	else
796	SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
797
798	case BuiltInLayer:
799	if (hlsl_options.shader_model < `50` \|\| get_entry_point().model != ExecutionModelFragment)
800	SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher.");
801	type = "uint";
802	semantic = "SV_RenderTargetArrayIndex";
803	break;
804
805	default:
806	SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
807	}
808
809	if (type && semantic)
810	statement(ts&: type, ts: " ", ts: builtin_to_glsl(builtin, storage: StorageClassInput), ts: " : ", ts&: semantic, ts: ";");
811	});
812	}
813
814	uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
815	{
816	// TODO: Need to verify correctness.
817	uint32_t elements = `0`;
818
819	if (type.basetype == SPIRType::Struct)
820	{
821	for (uint32_t i = `0`; i < uint32_t(type.member_types.size()); i++)
822	elements += type_to_consumed_locations(type: get<SPIRType>(id: type.member_types [i]));
823	}
824	else
825	{
826	uint32_t array_multiplier = `1`;
827	for (uint32_t i = `0`; i < uint32_t(type.array.size()); i++)
828	{
829	if (type.array_size_literal [i])
830	array_multiplier *= type.array [i];
831	else
832	array_multiplier *= evaluate_constant_u32(id: type.array [i]);
833	}
834	elements += array_multiplier * type.columns;
835	}
836	return elements;
837	}
838
839	string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
840	{
841	string res;
842	//if (flags & (1ull << DecorationSmooth))
843	// res += "linear ";
844	if (flags.get(bit: DecorationFlat))
845	res += "nointerpolation ";
846	if (flags.get(bit: DecorationNoPerspective))
847	res += "noperspective ";
848	if (flags.get(bit: DecorationCentroid))
849	res += "centroid ";
850	if (flags.get(bit: DecorationPatch))
851	res += "patch "; // Seems to be different in actual HLSL.
852	if (flags.get(bit: DecorationSample))
853	res += "sample ";
854	if (flags.get(bit: DecorationInvariant) && backend.support_precise_qualifier)
855	res += "precise "; // Not supported?
856
857	return res;
858	}
859
860	std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc)
861	{
862	if (em == ExecutionModelVertex && sc == StorageClassInput)
863	{
864	// We have a vertex attribute - we should look at remapping it if the user provided
865	// vertex attribute hints.
866	for (auto &attribute : remap_vertex_attributes)
867	if (attribute.location == location)
868	return attribute.semantic;
869	}
870
871	// Not a vertex attribute, or no remap_vertex_attributes entry.
872	return join(ts: "TEXCOORD", ts&: location);
873	}
874
875	std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var)
876	{
877	// We cannot emit static const initializer for block constants for practical reasons,
878	// so just inline the initializer.
879	// FIXME: There is a theoretical problem here if someone tries to composite extract
880	// into this initializer since we don't declare it properly, but that is somewhat non-sensical.
881	auto &type = get<SPIRType>(id: var.basetype);
882	bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
883	auto *c = maybe_get<SPIRConstant>(id: var.initializer);
884	if (is_block && c)
885	return constant_expression(c: *c);
886	else
887	return CompilerGLSL::to_initializer_expression(var);
888	}
889
890	void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
891	uint32_t location,
892	std::unordered_set<uint32_t> &active_locations)
893	{
894	auto &execution = get_entry_point();
895	auto type = get<SPIRType>(id: var.basetype);
896	auto semantic = to_semantic(location, em: execution.model, sc: var.storage);
897	auto mbr_name = join(ts: to_name(id: type.self), ts: "_", ts: to_member_name(type, index: member_index));
898	auto &mbr_type = get<SPIRType>(id: type.member_types [member_index]);
899
900	statement(ts: to_interpolation_qualifiers(flags: get_member_decoration_bitset(id: type.self, index: member_index)),
901	ts: type_to_glsl(type: mbr_type),
902	ts: " ", ts&: mbr_name, ts: type_to_array_glsl(type: mbr_type),
903	ts: " : ", ts&: semantic, ts: ";");
904
905	// Structs and arrays should consume more locations.
906	uint32_t consumed_locations = type_to_consumed_locations(type: mbr_type);
907	for (uint32_t i = `0`; i < consumed_locations; i++)
908	active_locations.insert(x: location + i);
909	}
910
911	void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations)
912	{
913	auto &execution = get_entry_point();
914	auto type = get<SPIRType>(id: var.basetype);
915
916	string binding;
917	bool use_location_number = true;
918	bool legacy = hlsl_options.shader_model <= `30`;
919	if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
920	{
921	// Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1.
922	uint32_t index = get_decoration(id: var.self, decoration: DecorationIndex);
923	uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation);
924
925	if (index != `0` && location != `0`)
926	SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL.");
927
928	binding = join(ts: legacy ? "COLOR" : "SV_Target", ts: location + index);
929	use_location_number = false;
930	if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP)
931	type.vecsize = `4`;
932	}
933
934	const auto get_vacant_location = [&]() -> uint32_t {
935	for (uint32_t i = `0`; i < `64`; i++)
936	if (!active_locations.count(x: i))
937	return i;
938	SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted.");
939	};
940
941	bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
942
943	auto name = to_name(id: var.self);
944	if (use_location_number)
945	{
946	uint32_t location_number;
947
948	// If an explicit location exists, use it with TEXCOORD[N] semantic.
949	// Otherwise, pick a vacant location.
950	if (has_decoration(id: var.self, decoration: DecorationLocation))
951	location_number = get_decoration(id: var.self, decoration: DecorationLocation);
952	else
953	location_number = get_vacant_location ();
954
955	// Allow semantic remap if specified.
956	auto semantic = to_semantic(location: location_number, em: execution.model, sc: var.storage);
957
958	if (need_matrix_unroll && type.columns > `1`)
959	{
960	if (!type.array.empty())
961	SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported.");
962
963	// Unroll matrices.
964	for (uint32_t i = `0`; i < type.columns; i++)
965	{
966	SPIRType newtype = type;
967	newtype.columns = `1`;
968
969	string effective_semantic;
970	if (hlsl_options.flatten_matrix_vertex_input_semantics)
971	effective_semantic = to_semantic(location: location_number, em: execution.model, sc: var.storage);
972	else
973	effective_semantic = join(ts&: semantic, ts: "_", ts&: i);
974
975	statement(ts: to_interpolation_qualifiers(flags: get_decoration_bitset(id: var.self)),
976	ts: variable_decl(type: newtype, name: join(ts&: name, ts: "_", ts&: i)), ts: " : ", ts&: effective_semantic, ts: ";");
977	active_locations.insert(x: location_number++);
978	}
979	}
980	else
981	{
982	statement(ts: to_interpolation_qualifiers(flags: get_decoration_bitset(id: var.self)), ts: variable_decl(type, name), ts: " : ",
983	ts&: semantic, ts: ";");
984
985	// Structs and arrays should consume more locations.
986	uint32_t consumed_locations = type_to_consumed_locations(type);
987	for (uint32_t i = `0`; i < consumed_locations; i++)
988	active_locations.insert(x: location_number + i);
989	}
990	}
991	else
992	statement(ts: variable_decl(type, name), ts: " : ", ts&: binding, ts: ";");
993	}
994
995	std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage)
996	{
997	switch (builtin)
998	{
999	case BuiltInVertexId:
1000	return "gl_VertexID";
1001	case BuiltInInstanceId:
1002	return "gl_InstanceID";
1003	case BuiltInNumWorkgroups:
1004	{
1005	if (!num_workgroups_builtin)
1006	SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. "
1007	"Cannot emit code for this builtin.");
1008
1009	auto &var = get<SPIRVariable>(id: num_workgroups_builtin);
1010	auto &type = get<SPIRType>(id: var.basetype);
1011	auto ret = join(ts: to_name(id: num_workgroups_builtin), ts: "_", ts: get_member_name(id: type.self, index: `0`));
1012	ParsedIR::sanitize_underscores(str&: ret);
1013	return ret;
1014	}
1015	case BuiltInPointCoord:
1016	// Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
1017	return "float2(0.5f, 0.5f)";
1018	case BuiltInSubgroupLocalInvocationId:
1019	return "WaveGetLaneIndex()";
1020	case BuiltInSubgroupSize:
1021	return "WaveGetLaneCount()";
1022	case BuiltInHelperInvocation:
1023	return "IsHelperLane()";
1024
1025	default:
1026	return CompilerGLSL::builtin_to_glsl(builtin, storage);
1027	}
1028	}
1029
1030	void CompilerHLSL::emit_builtin_variables()
1031	{
1032	Bitset builtins = active_input_builtins;
1033	builtins.merge_or(other: active_output_builtins);
1034
1035	bool need_base_vertex_info = false;
1036
1037	std::unordered_map<uint32_t, ID> builtin_to_initializer;
1038	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1039	if (!is_builtin_variable(var) \|\| var.storage != StorageClassOutput \|\| !var.initializer)
1040	return;
1041
1042	auto c = this*->maybe_get<SPIRConstant>(id: var.initializer);
1043	if (!c)
1044	return;
1045
1046	auto &type = this->get<SPIRType>(id: var.basetype);
1047	if (type.basetype == SPIRType::Struct)
1048	{
1049	uint32_t member_count = uint32_t(type.member_types.size());
1050	for (uint32_t i = `0`; i < member_count; i++)
1051	{
1052	if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
1053	{
1054	builtin_to_initializer [get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn)] =
1055	c->subconstants [i];
1056	}
1057	}
1058	}
1059	else if (has_decoration(id: var.self, decoration: DecorationBuiltIn))
1060	builtin_to_initializer [get_decoration(id: var.self, decoration: DecorationBuiltIn)] = var.initializer;
1061	});
1062
1063	// Emit global variables for the interface variables which are statically used by the shader.
1064	builtins.for_each_bit(op: [&](uint32_t i) {
1065	const char type = nullptr*;
1066	auto builtin = static_cast<BuiltIn>(i);
1067	uint32_t array_size = `0`;
1068
1069	string init_expr;
1070	auto init_itr = builtin_to_initializer.find(x: builtin);
1071	if (init_itr != builtin_to_initializer.end())
1072	init_expr = join(ts: " = ", ts: to_expression(id: init_itr ->second));
1073
1074	switch (builtin)
1075	{
1076	case BuiltInFragCoord:
1077	case BuiltInPosition:
1078	type = "float4";
1079	break;
1080
1081	case BuiltInFragDepth:
1082	type = "float";
1083	break;
1084
1085	case BuiltInVertexId:
1086	case BuiltInVertexIndex:
1087	case BuiltInInstanceIndex:
1088	type = "int";
1089	if (hlsl_options.support_nonzero_base_vertex_base_instance)
1090	need_base_vertex_info = true;
1091	break;
1092
1093	case BuiltInInstanceId:
1094	case BuiltInSampleId:
1095	type = "int";
1096	break;
1097
1098	case BuiltInPointSize:
1099	if (hlsl_options.point_size_compat)
1100	{
1101	// Just emit the global variable, it will be ignored.
1102	type = "float";
1103	break;
1104	}
1105	else
1106	SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
1107
1108	case BuiltInGlobalInvocationId:
1109	case BuiltInLocalInvocationId:
1110	case BuiltInWorkgroupId:
1111	type = "uint3";
1112	break;
1113
1114	case BuiltInLocalInvocationIndex:
1115	type = "uint";
1116	break;
1117
1118	case BuiltInFrontFacing:
1119	type = "bool";
1120	break;
1121
1122	case BuiltInNumWorkgroups:
1123	case BuiltInPointCoord:
1124	// Handled specially.
1125	break;
1126
1127	case BuiltInSubgroupLocalInvocationId:
1128	case BuiltInSubgroupSize:
1129	if (hlsl_options.shader_model < `60`)
1130	SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
1131	break;
1132
1133	case BuiltInSubgroupEqMask:
1134	case BuiltInSubgroupLtMask:
1135	case BuiltInSubgroupLeMask:
1136	case BuiltInSubgroupGtMask:
1137	case BuiltInSubgroupGeMask:
1138	if (hlsl_options.shader_model < `60`)
1139	SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
1140	type = "uint4";
1141	break;
1142
1143	case BuiltInHelperInvocation:
1144	if (hlsl_options.shader_model < `50`)
1145	SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation.");
1146	break;
1147
1148	case BuiltInClipDistance:
1149	array_size = clip_distance_count;
1150	type = "float";
1151	break;
1152
1153	case BuiltInCullDistance:
1154	array_size = cull_distance_count;
1155	type = "float";
1156	break;
1157
1158	case BuiltInSampleMask:
1159	type = "int";
1160	break;
1161
1162	case BuiltInPrimitiveId:
1163	case BuiltInViewIndex:
1164	case BuiltInLayer:
1165	type = "uint";
1166	break;
1167
1168	default:
1169	SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
1170	}
1171
1172	StorageClass storage = active_input_builtins.get(bit: i) ? StorageClassInput : StorageClassOutput;
1173
1174	if (type)
1175	{
1176	if (array_size)
1177	statement(ts: "static ", ts&: type, ts: " ", ts: builtin_to_glsl(builtin, storage), ts: "[", ts&: array_size, ts: "]", ts&: init_expr, ts: ";");
1178	else
1179	statement(ts: "static ", ts&: type, ts: " ", ts: builtin_to_glsl(builtin, storage), ts&: init_expr, ts: ";");
1180	}
1181
1182	// SampleMask can be both in and out with sample builtin, in this case we have already
1183	// declared the input variable and we need to add the output one now.
1184	if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(bit: i))
1185	{
1186	statement(ts: "static ", ts&: type, ts: " ", ts: this->builtin_to_glsl(builtin, storage: StorageClassOutput), ts&: init_expr, ts: ";");
1187	}
1188	});
1189
1190	if (need_base_vertex_info)
1191	{
1192	statement(ts: "cbuffer SPIRV_Cross_VertexInfo");
1193	begin_scope();
1194	statement(ts: "int SPIRV_Cross_BaseVertex;");
1195	statement(ts: "int SPIRV_Cross_BaseInstance;");
1196	end_scope_decl();
1197	statement(ts: "");
1198	}
1199	}
1200
1201	void CompilerHLSL::emit_composite_constants()
1202	{
1203	// HLSL cannot declare structs or arrays inline, so we must move them out to
1204	// global constants directly.
1205	bool emitted = false;
1206
1207	ir.for_each_typed_id<SPIRConstant>(op: [&](uint32_t, SPIRConstant &c) {
1208	if (c.specialization)
1209	return;
1210
1211	auto &type = this->get<SPIRType>(id: c.constant_type);
1212
1213	if (type.basetype == SPIRType::Struct && is_builtin_type(type))
1214	return;
1215
1216	if (type.basetype == SPIRType::Struct \|\| !type.array.empty())
1217	{
1218	add_resource_name(id: c.self);
1219	auto name = to_name(id: c.self);
1220	statement(ts: "static const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c), ts: ";");
1221	emitted = true;
1222	}
1223	});
1224
1225	if (emitted)
1226	statement(ts: "");
1227	}
1228
1229	void CompilerHLSL::emit_specialization_constants_and_structs()
1230	{
1231	bool emitted = false;
1232	SpecializationConstant wg_x, wg_y, wg_z;
1233	ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
1234
1235	std::unordered_set<TypeID> io_block_types;
1236	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
1237	auto &type = this->get<SPIRType>(id: var.basetype);
1238	if ((var.storage == StorageClassInput \|\| var.storage == StorageClassOutput) &&
1239	!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
1240	interface_variable_exists_in_entry_point(id: var.self) &&
1241	has_decoration(id: type.self, decoration: DecorationBlock))
1242	{
1243	io_block_types.insert(x: type.self);
1244	}
1245	});
1246
1247	auto loop_lock = ir.create_loop_hard_lock();
1248	for (auto &id_ : ir.ids_for_constant_or_type)
1249	{
1250	auto &id = ir.ids [id_];
1251
1252	if (id.get_type() == TypeConstant)
1253	{
1254	auto &c = id.get<SPIRConstant>();
1255
1256	if (c.self == workgroup_size_id)
1257	{
1258	statement(ts: "static const uint3 gl_WorkGroupSize = ",
1259	ts: constant_expression(c: get<SPIRConstant>(id: workgroup_size_id)), ts: ";");
1260	emitted = true;
1261	}
1262	else if (c.specialization)
1263	{
1264	auto &type = get<SPIRType>(id: c.constant_type);
1265	add_resource_name(id: c.self);
1266	auto name = to_name(id: c.self);
1267
1268	if (has_decoration(id: c.self, decoration: DecorationSpecId))
1269	{
1270	// HLSL does not support specialization constants, so fallback to macros.
1271	c.specialization_constant_macro_name =
1272	constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId));
1273
1274	statement(ts: "#ifndef ", ts&: c.specialization_constant_macro_name);
1275	statement(ts: "#define ", ts&: c.specialization_constant_macro_name, ts: " ", ts: constant_expression(c));
1276	statement(ts: "#endif");
1277	statement(ts: "static const ", ts: variable_decl(type, name), ts: " = ", ts&: c.specialization_constant_macro_name, ts: ";");
1278	}
1279	else
1280	statement(ts: "static const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c), ts: ";");
1281
1282	emitted = true;
1283	}
1284	}
1285	else if (id.get_type() == TypeConstantOp)
1286	{
1287	auto &c = id.get<SPIRConstantOp>();
1288	auto &type = get<SPIRType>(id: c.basetype);
1289	add_resource_name(id: c.self);
1290	auto name = to_name(id: c.self);
1291	statement(ts: "static const ", ts: variable_decl(type, name), ts: " = ", ts: constant_op_expression(cop: c), ts: ";");
1292	emitted = true;
1293	}
1294	else if (id.get_type() == TypeType)
1295	{
1296	auto &type = id.get<SPIRType>();
1297	bool is_non_io_block = has_decoration(id: type.self, decoration: DecorationBlock) &&
1298	io_block_types.count(x: type.self) == `0`;
1299	bool is_buffer_block = has_decoration(id: type.self, decoration: DecorationBufferBlock);
1300	if (type.basetype == SPIRType::Struct && type.array.empty() &&
1301	!type.pointer && !is_non_io_block && !is_buffer_block)
1302	{
1303	if (emitted)
1304	statement(ts: "");
1305	emitted = false;
1306
1307	emit_struct(type);
1308	}
1309	}
1310	}
1311
1312	if (emitted)
1313	statement(ts: "");
1314	}
1315
1316	void CompilerHLSL::replace_illegal_names()
1317	{
1318	static const unordered_set<string> keywords = {
1319	// Additional HLSL specific keywords.
1320	// From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
1321	"AppendStructuredBuffer", "asm", "asm_fragment",
1322	"BlendState", "bool", "break", "Buffer", "ByteAddressBuffer",
1323	"case", "cbuffer", "centroid", "class", "column_major", "compile",
1324	"compile_fragment", "CompileShader", "const", "continue", "ComputeShader",
1325	"ConsumeStructuredBuffer",
1326	"default", "DepthStencilState", "DepthStencilView", "discard", "do",
1327	"double", "DomainShader", "dword",
1328	"else", "export", "false", "float", "for", "fxgroup",
1329	"GeometryShader", "groupshared", "half", "HullShader",
1330	"if", "in", "inline", "inout", "InputPatch", "int", "interface",
1331	"line", "lineadj", "linear", "LineStream",
1332	"matrix", "min16float", "min10float", "min16int", "min16uint",
1333	"namespace", "nointerpolation", "noperspective", "NULL",
1334	"out", "OutputPatch",
1335	"packoffset", "pass", "pixelfragment", "PixelShader", "point",
1336	"PointStream", "precise", "RasterizerState", "RenderTargetView",
1337	"return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
1338	"RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
1339	"RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState",
1340	"SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state",
1341	"static", "string", "struct", "switch", "StructuredBuffer", "tbuffer",
1342	"technique", "technique10", "technique11", "texture", "Texture1D",
1343	"Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
1344	"Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
1345	"triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
1346	"vector", "vertexfragment", "VertexShader", "void", "volatile", "while",
1347	};
1348
1349	CompilerGLSL::replace_illegal_names(keywords);
1350	CompilerGLSL::replace_illegal_names();
1351	}
1352
1353	void CompilerHLSL::declare_undefined_values()
1354	{
1355	bool emitted = false;
1356	ir.for_each_typed_id<SPIRUndef>(op: [&](uint32_t, const SPIRUndef &undef) {
1357	auto &type = this->get<SPIRType>(id: undef.basetype);
1358	// OpUndef can be void for some reason ...
1359	if (type.basetype == SPIRType::Void)
1360	return;
1361
1362	string initializer;
1363	if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
1364	initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: undef.basetype));
1365
1366	statement(ts: "static ", ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";");
1367	emitted = true;
1368	});
1369
1370	if (emitted)
1371	statement(ts: "");
1372	}
1373
1374	void CompilerHLSL::emit_resources()
1375	{
1376	auto &execution = get_entry_point();
1377
1378	replace_illegal_names();
1379
1380	emit_specialization_constants_and_structs();
1381	emit_composite_constants();
1382
1383	bool emitted = false;
1384
1385	// Output UBOs and SSBOs
1386	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1387	auto &type = this->get<SPIRType>(id: var.basetype);
1388
1389	bool is_block_storage = type.storage == StorageClassStorageBuffer \|\| type.storage == StorageClassUniform;
1390	bool has_block_flags = ir.meta [type.self].decoration.decoration_flags.get(bit: DecorationBlock) \|\|
1391	ir.meta [type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
1392
1393	if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
1394	has_block_flags)
1395	{
1396	emit_buffer_block(type: var);
1397	emitted = true;
1398	}
1399	});
1400
1401	// Output push constant blocks
1402	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1403	auto &type = this->get<SPIRType>(id: var.basetype);
1404	if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
1405	!is_hidden_variable(var))
1406	{
1407	emit_push_constant_block(var);
1408	emitted = true;
1409	}
1410	});
1411
1412	if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= `30` &&
1413	active_output_builtins.get(bit: BuiltInPosition))
1414	{
1415	statement(ts: "uniform float4 gl_HalfPixel;");
1416	emitted = true;
1417	}
1418
1419	bool skip_separate_image_sampler = !combined_image_samplers.empty() \|\| hlsl_options.shader_model <= `30`;
1420
1421	// Output Uniform Constants (values, samplers, images, etc).
1422	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1423	auto &type = this->get<SPIRType>(id: var.basetype);
1424
1425	// If we're remapping separate samplers and images, only emit the combined samplers.
1426	if (skip_separate_image_sampler)
1427	{
1428	// Sampler buffers are always used without a sampler, and they will also work in regular D3D.
1429	bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
1430	bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == `1`;
1431	bool separate_sampler = type.basetype == SPIRType::Sampler;
1432	if (!sampler_buffer && (separate_image \|\| separate_sampler))
1433	return;
1434	}
1435
1436	if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable &&
1437	type.pointer && (type.storage == StorageClassUniformConstant \|\| type.storage == StorageClassAtomicCounter) &&
1438	!is_hidden_variable(var))
1439	{
1440	emit_uniform(var);
1441	emitted = true;
1442	}
1443	});
1444
1445	if (emitted)
1446	statement(ts: "");
1447	emitted = false;
1448
1449	// Emit builtin input and output variables here.
1450	emit_builtin_variables();
1451
1452	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1453	auto &type = this->get<SPIRType>(id: var.basetype);
1454
1455	if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
1456	(var.storage == StorageClassInput \|\| var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
1457	interface_variable_exists_in_entry_point(id: var.self))
1458	{
1459	// Builtin variables are handled separately.
1460	emit_interface_block_globally(var);
1461	emitted = true;
1462	}
1463	});
1464
1465	if (emitted)
1466	statement(ts: "");
1467	emitted = false;
1468
1469	require_input = false;
1470	require_output = false;
1471	unordered_set<uint32_t> active_inputs;
1472	unordered_set<uint32_t> active_outputs;
1473
1474	struct IOVariable
1475	{
1476	const SPIRVariable *var;
1477	uint32_t location;
1478	uint32_t block_member_index;
1479	bool block;
1480	};
1481
1482	SmallVector<IOVariable> input_variables;
1483	SmallVector<IOVariable> output_variables;
1484
1485	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
1486	auto &type = this->get<SPIRType>(id: var.basetype);
1487	bool block = has_decoration(id: type.self, decoration: DecorationBlock);
1488
1489	if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
1490	return;
1491
1492	if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
1493	interface_variable_exists_in_entry_point(id: var.self))
1494	{
1495	if (block)
1496	{
1497	for (uint32_t i = `0`; i < uint32_t(type.member_types.size()); i++)
1498	{
1499	uint32_t location = get_declared_member_location(var, mbr_idx: i, strip_array: false);
1500	if (var.storage == StorageClassInput)
1501	input_variables.push_back(t: { .var: &var, .location: location, .block_member_index: i, .block: true });
1502	else
1503	output_variables.push_back(t: { .var: &var, .location: location, .block_member_index: i, .block: true });
1504	}
1505	}
1506	else
1507	{
1508	uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation);
1509	if (var.storage == StorageClassInput)
1510	input_variables.push_back(t: { .var: &var, .location: location, .block_member_index: `0`, .block: false });
1511	else
1512	output_variables.push_back(t: { .var: &var, .location: location, .block_member_index: `0`, .block: false });
1513	}
1514	}
1515	});
1516
1517	const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool {
1518	// Sort input and output variables based on, from more robust to less robust:
1519	// - Location
1520	// - Variable has a location
1521	// - Name comparison
1522	// - Variable has a name
1523	// - Fallback: ID
1524	bool has_location_a = a.block \|\| has_decoration(id: a.var->self, decoration: DecorationLocation);
1525	bool has_location_b = b.block \|\| has_decoration(id: b.var->self, decoration: DecorationLocation);
1526
1527	if (has_location_a && has_location_b)
1528	return a.location < b.location;
1529	else if (has_location_a && !has_location_b)
1530	return true;
1531	else if (!has_location_a && has_location_b)
1532	return false;
1533
1534	const auto &name1 = to_name(id: a.var->self);
1535	const auto &name2 = to_name(id: b.var->self);
1536
1537	if (name1.empty() && name2.empty())
1538	return a.var->self < b.var->self;
1539	else if (name1.empty())
1540	return true;
1541	else if (name2.empty())
1542	return false;
1543
1544	return name1.compare(str: name2) < `0`;
1545	};
1546
1547	auto input_builtins = active_input_builtins;
1548	input_builtins.clear(bit: BuiltInNumWorkgroups);
1549	input_builtins.clear(bit: BuiltInPointCoord);
1550	input_builtins.clear(bit: BuiltInSubgroupSize);
1551	input_builtins.clear(bit: BuiltInSubgroupLocalInvocationId);
1552	input_builtins.clear(bit: BuiltInSubgroupEqMask);
1553	input_builtins.clear(bit: BuiltInSubgroupLtMask);
1554	input_builtins.clear(bit: BuiltInSubgroupLeMask);
1555	input_builtins.clear(bit: BuiltInSubgroupGtMask);
1556	input_builtins.clear(bit: BuiltInSubgroupGeMask);
1557
1558	if (!input_variables.empty() \|\| !input_builtins.empty())
1559	{
1560	require_input = true;
1561	statement(ts: "struct SPIRV_Cross_Input");
1562
1563	begin_scope();
1564	sort(first: input_variables.begin(), last: input_variables.end(), comp: variable_compare);
1565	for (auto &var : input_variables)
1566	{
1567	if (var.block)
1568	emit_interface_block_member_in_struct(var: *var.var, member_index: var.block_member_index, location: var.location, active_locations&: active_inputs);
1569	else
1570	emit_interface_block_in_struct(var: *var.var, active_locations&: active_inputs);
1571	}
1572	emit_builtin_inputs_in_struct();
1573	end_scope_decl();
1574	statement(ts: "");
1575	}
1576
1577	if (!output_variables.empty() \|\| !active_output_builtins.empty())
1578	{
1579	require_output = true;
1580	statement(ts: "struct SPIRV_Cross_Output");
1581
1582	begin_scope();
1583	sort(first: output_variables.begin(), last: output_variables.end(), comp: variable_compare);
1584	for (auto &var : output_variables)
1585	{
1586	if (var.block)
1587	emit_interface_block_member_in_struct(var: *var.var, member_index: var.block_member_index, location: var.location, active_locations&: active_outputs);
1588	else
1589	emit_interface_block_in_struct(var: *var.var, active_locations&: active_outputs);
1590	}
1591	emit_builtin_outputs_in_struct();
1592	end_scope_decl();
1593	statement(ts: "");
1594	}
1595
1596	// Global variables.
1597	for (auto global : global_variables)
1598	{
1599	auto &var = get<SPIRVariable>(id: global);
1600	if (is_hidden_variable(var, include_builtins: true))
1601	continue;
1602
1603	if (var.storage != StorageClassOutput)
1604	{
1605	if (!variable_is_lut(var))
1606	{
1607	add_resource_name(id: var.self);
1608
1609	const char storage = nullptr*;
1610	switch (var.storage)
1611	{
1612	case StorageClassWorkgroup:
1613	storage = "groupshared";
1614	break;
1615
1616	default:
1617	storage = "static";
1618	break;
1619	}
1620
1621	string initializer;
1622	if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
1623	!var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var)))
1624	{
1625	initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var)));
1626	}
1627	statement(ts&: storage, ts: " ", ts: variable_decl(variable: var), ts&: initializer, ts: ";");
1628
1629	emitted = true;
1630	}
1631	}
1632	}
1633
1634	if (emitted)
1635	statement(ts: "");
1636
1637	declare_undefined_values();
1638
1639	if (requires_op_fmod)
1640	{
1641	static const char *types[] = {
1642	"float",
1643	"float2",
1644	"float3",
1645	"float4",
1646	};
1647
1648	for (auto &type : types)
1649	{
1650	statement(ts&: type, ts: " mod(", ts&: type, ts: " x, ", ts&: type, ts: " y)");
1651	begin_scope();
1652	statement(ts: "return x - y * floor(x / y);");
1653	end_scope();
1654	statement(ts: "");
1655	}
1656	}
1657
1658	emit_texture_size_variants(variant_mask: required_texture_size_variants.srv, vecsize_qualifier: "4", uav: false, type_qualifier: "");
1659	for (uint32_t norm = `0`; norm < `3`; norm++)
1660	{
1661	for (uint32_t comp = `0`; comp < `4`; comp++)
1662	{
1663	static const char *qualifiers[] = { "", "unorm ", "snorm " };
1664	static const char *vecsizes[] = { "", "2", "3", "4" };
1665	emit_texture_size_variants(variant_mask: required_texture_size_variants.uav[norm][comp], vecsize_qualifier: vecsizes[comp], uav: true,
1666	type_qualifier: qualifiers[norm]);
1667	}
1668	}
1669
1670	if (requires_fp16_packing)
1671	{
1672	// HLSL does not pack into a single word sadly :(
1673	statement(ts: "uint spvPackHalf2x16(float2 value)");
1674	begin_scope();
1675	statement(ts: "uint2 Packed = f32tof16(value);");
1676	statement(ts: "return Packed.x \| (Packed.y << 16);");
1677	end_scope();
1678	statement(ts: "");
1679
1680	statement(ts: "float2 spvUnpackHalf2x16(uint value)");
1681	begin_scope();
1682	statement(ts: "return f16tof32(uint2(value & 0xffff, value >> 16));");
1683	end_scope();
1684	statement(ts: "");
1685	}
1686
1687	if (requires_uint2_packing)
1688	{
1689	statement(ts: "uint64_t spvPackUint2x32(uint2 value)");
1690	begin_scope();
1691	statement(ts: "return (uint64_t(value.y) << 32) \| uint64_t(value.x);");
1692	end_scope();
1693	statement(ts: "");
1694
1695	statement(ts: "uint2 spvUnpackUint2x32(uint64_t value)");
1696	begin_scope();
1697	statement(ts: "uint2 Unpacked;");
1698	statement(ts: "Unpacked.x = uint(value & 0xffffffff);");
1699	statement(ts: "Unpacked.y = uint(value >> 32);");
1700	statement(ts: "return Unpacked;");
1701	end_scope();
1702	statement(ts: "");
1703	}
1704
1705	if (requires_explicit_fp16_packing)
1706	{
1707	// HLSL does not pack into a single word sadly :(
1708	statement(ts: "uint spvPackFloat2x16(min16float2 value)");
1709	begin_scope();
1710	statement(ts: "uint2 Packed = f32tof16(value);");
1711	statement(ts: "return Packed.x \| (Packed.y << 16);");
1712	end_scope();
1713	statement(ts: "");
1714
1715	statement(ts: "min16float2 spvUnpackFloat2x16(uint value)");
1716	begin_scope();
1717	statement(ts: "return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
1718	end_scope();
1719	statement(ts: "");
1720	}
1721
1722	// HLSL does not seem to have builtins for these operation, so roll them by hand ...
1723	if (requires_unorm8_packing)
1724	{
1725	statement(ts: "uint spvPackUnorm4x8(float4 value)");
1726	begin_scope();
1727	statement(ts: "uint4 Packed = uint4(round(saturate(value) * 255.0));");
1728	statement(ts: "return Packed.x \| (Packed.y << 8) \| (Packed.z << 16) \| (Packed.w << 24);");
1729	end_scope();
1730	statement(ts: "");
1731
1732	statement(ts: "float4 spvUnpackUnorm4x8(uint value)");
1733	begin_scope();
1734	statement(ts: "uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);");
1735	statement(ts: "return float4(Packed) / 255.0;");
1736	end_scope();
1737	statement(ts: "");
1738	}
1739
1740	if (requires_snorm8_packing)
1741	{
1742	statement(ts: "uint spvPackSnorm4x8(float4 value)");
1743	begin_scope();
1744	statement(ts: "int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;");
1745	statement(ts: "return uint(Packed.x \| (Packed.y << 8) \| (Packed.z << 16) \| (Packed.w << 24));");
1746	end_scope();
1747	statement(ts: "");
1748
1749	statement(ts: "float4 spvUnpackSnorm4x8(uint value)");
1750	begin_scope();
1751	statement(ts: "int SignedValue = int(value);");
1752	statement(ts: "int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;");
1753	statement(ts: "return clamp(float4(Packed) / 127.0, -1.0, 1.0);");
1754	end_scope();
1755	statement(ts: "");
1756	}
1757
1758	if (requires_unorm16_packing)
1759	{
1760	statement(ts: "uint spvPackUnorm2x16(float2 value)");
1761	begin_scope();
1762	statement(ts: "uint2 Packed = uint2(round(saturate(value) * 65535.0));");
1763	statement(ts: "return Packed.x \| (Packed.y << 16);");
1764	end_scope();
1765	statement(ts: "");
1766
1767	statement(ts: "float2 spvUnpackUnorm2x16(uint value)");
1768	begin_scope();
1769	statement(ts: "uint2 Packed = uint2(value & 0xffff, value >> 16);");
1770	statement(ts: "return float2(Packed) / 65535.0;");
1771	end_scope();
1772	statement(ts: "");
1773	}
1774
1775	if (requires_snorm16_packing)
1776	{
1777	statement(ts: "uint spvPackSnorm2x16(float2 value)");
1778	begin_scope();
1779	statement(ts: "int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;");
1780	statement(ts: "return uint(Packed.x \| (Packed.y << 16));");
1781	end_scope();
1782	statement(ts: "");
1783
1784	statement(ts: "float2 spvUnpackSnorm2x16(uint value)");
1785	begin_scope();
1786	statement(ts: "int SignedValue = int(value);");
1787	statement(ts: "int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;");
1788	statement(ts: "return clamp(float2(Packed) / 32767.0, -1.0, 1.0);");
1789	end_scope();
1790	statement(ts: "");
1791	}
1792
1793	if (requires_bitfield_insert)
1794	{
1795	static const char *types[] = { "uint", "uint2", "uint3", "uint4" };
1796	for (auto &type : types)
1797	{
1798	statement(ts&: type, ts: " spvBitfieldInsert(", ts&: type, ts: " Base, ", ts&: type, ts: " Insert, uint Offset, uint Count)");
1799	begin_scope();
1800	statement(ts: "uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));");
1801	statement(ts: "return (Base & ~Mask) \| ((Insert << Offset) & Mask);");
1802	end_scope();
1803	statement(ts: "");
1804	}
1805	}
1806
1807	if (requires_bitfield_extract)
1808	{
1809	static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" };
1810	for (auto &type : unsigned_types)
1811	{
1812	statement(ts&: type, ts: " spvBitfieldUExtract(", ts&: type, ts: " Base, uint Offset, uint Count)");
1813	begin_scope();
1814	statement(ts: "uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);");
1815	statement(ts: "return (Base >> Offset) & Mask;");
1816	end_scope();
1817	statement(ts: "");
1818	}
1819
1820	// In this overload, we will have to do sign-extension, which we will emulate by shifting up and down.
1821	static const char *signed_types[] = { "int", "int2", "int3", "int4" };
1822	for (auto &type : signed_types)
1823	{
1824	statement(ts&: type, ts: " spvBitfieldSExtract(", ts&: type, ts: " Base, int Offset, int Count)");
1825	begin_scope();
1826	statement(ts: "int Mask = Count == 32 ? -1 : ((1 << Count) - 1);");
1827	statement(ts&: type, ts: " Masked = (Base >> Offset) & Mask;");
1828	statement(ts: "int ExtendShift = (32 - Count) & 31;");
1829	statement(ts: "return (Masked << ExtendShift) >> ExtendShift;");
1830	end_scope();
1831	statement(ts: "");
1832	}
1833	}
1834
1835	if (requires_inverse_2x2)
1836	{
1837	statement(ts: "// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
1838	statement(ts: "// adjoint and dividing by the determinant. The contents of the matrix are changed.");
1839	statement(ts: "float2x2 spvInverse(float2x2 m)");
1840	begin_scope();
1841	statement(ts: "float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
1842	statement_no_indent(ts: "");
1843	statement(ts: "// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
1844	statement(ts: "adj[0][0] = m[1][1];");
1845	statement(ts: "adj[0][1] = -m[0][1];");
1846	statement_no_indent(ts: "");
1847	statement(ts: "adj[1][0] = -m[1][0];");
1848	statement(ts: "adj[1][1] = m[0][0];");
1849	statement_no_indent(ts: "");
1850	statement(ts: "// Calculate the determinant as a combination of the cofactors of the first row.");
1851	statement(ts: "float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
1852	statement_no_indent(ts: "");
1853	statement(ts: "// Divide the classical adjoint matrix by the determinant.");
1854	statement(ts: "// If determinant is zero, matrix is not invertable, so leave it unchanged.");
1855	statement(ts: "return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
1856	end_scope();
1857	statement(ts: "");
1858	}
1859
1860	if (requires_inverse_3x3)
1861	{
1862	statement(ts: "// Returns the determinant of a 2x2 matrix.");
1863	statement(ts: "float spvDet2x2(float a1, float a2, float b1, float b2)");
1864	begin_scope();
1865	statement(ts: "return a1 * b2 - b1 * a2;");
1866	end_scope();
1867	statement_no_indent(ts: "");
1868	statement(ts: "// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
1869	statement(ts: "// adjoint and dividing by the determinant. The contents of the matrix are changed.");
1870	statement(ts: "float3x3 spvInverse(float3x3 m)");
1871	begin_scope();
1872	statement(ts: "float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
1873	statement_no_indent(ts: "");
1874	statement(ts: "// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
1875	statement(ts: "adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
1876	statement(ts: "adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
1877	statement(ts: "adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
1878	statement_no_indent(ts: "");
1879	statement(ts: "adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
1880	statement(ts: "adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
1881	statement(ts: "adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
1882	statement_no_indent(ts: "");
1883	statement(ts: "adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
1884	statement(ts: "adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
1885	statement(ts: "adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
1886	statement_no_indent(ts: "");
1887	statement(ts: "// Calculate the determinant as a combination of the cofactors of the first row.");
1888	statement(ts: "float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
1889	statement_no_indent(ts: "");
1890	statement(ts: "// Divide the classical adjoint matrix by the determinant.");
1891	statement(ts: "// If determinant is zero, matrix is not invertable, so leave it unchanged.");
1892	statement(ts: "return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
1893	end_scope();
1894	statement(ts: "");
1895	}
1896
1897	if (requires_inverse_4x4)
1898	{
1899	if (!requires_inverse_3x3)
1900	{
1901	statement(ts: "// Returns the determinant of a 2x2 matrix.");
1902	statement(ts: "float spvDet2x2(float a1, float a2, float b1, float b2)");
1903	begin_scope();
1904	statement(ts: "return a1 * b2 - b1 * a2;");
1905	end_scope();
1906	statement(ts: "");
1907	}
1908
1909	statement(ts: "// Returns the determinant of a 3x3 matrix.");
1910	statement(ts: "float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
1911	"float c2, float c3)");
1912	begin_scope();
1913	statement(ts: "return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * "
1914	"spvDet2x2(a2, a3, "
1915	"b2, b3);");
1916	end_scope();
1917	statement_no_indent(ts: "");
1918	statement(ts: "// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
1919	statement(ts: "// adjoint and dividing by the determinant. The contents of the matrix are changed.");
1920	statement(ts: "float4x4 spvInverse(float4x4 m)");
1921	begin_scope();
1922	statement(ts: "float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
1923	statement_no_indent(ts: "");
1924	statement(ts: "// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
1925	statement(
1926	ts: "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
1927	"m[3][3]);");
1928	statement(
1929	ts: "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
1930	"m[3][3]);");
1931	statement(
1932	ts: "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
1933	"m[3][3]);");
1934	statement(
1935	ts: "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
1936	"m[2][3]);");
1937	statement_no_indent(ts: "");
1938	statement(
1939	ts: "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
1940	"m[3][3]);");
1941	statement(
1942	ts: "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
1943	"m[3][3]);");
1944	statement(
1945	ts: "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
1946	"m[3][3]);");
1947	statement(
1948	ts: "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
1949	"m[2][3]);");
1950	statement_no_indent(ts: "");
1951	statement(
1952	ts: "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
1953	"m[3][3]);");
1954	statement(
1955	ts: "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
1956	"m[3][3]);");
1957	statement(
1958	ts: "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
1959	"m[3][3]);");
1960	statement(
1961	ts: "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
1962	"m[2][3]);");
1963	statement_no_indent(ts: "");
1964	statement(
1965	ts: "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
1966	"m[3][2]);");
1967	statement(
1968	ts: "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
1969	"m[3][2]);");
1970	statement(
1971	ts: "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
1972	"m[3][2]);");
1973	statement(
1974	ts: "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
1975	"m[2][2]);");
1976	statement_no_indent(ts: "");
1977	statement(ts: "// Calculate the determinant as a combination of the cofactors of the first row.");
1978	statement(ts: "float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
1979	"* m[3][0]);");
1980	statement_no_indent(ts: "");
1981	statement(ts: "// Divide the classical adjoint matrix by the determinant.");
1982	statement(ts: "// If determinant is zero, matrix is not invertable, so leave it unchanged.");
1983	statement(ts: "return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
1984	end_scope();
1985	statement(ts: "");
1986	}
1987
1988	if (requires_scalar_reflect)
1989	{
1990	// FP16/FP64? No templates in HLSL.
1991	statement(ts: "float spvReflect(float i, float n)");
1992	begin_scope();
1993	statement(ts: "return i - 2.0 * dot(n, i) * n;");
1994	end_scope();
1995	statement(ts: "");
1996	}
1997
1998	if (requires_scalar_refract)
1999	{
2000	// FP16/FP64? No templates in HLSL.
2001	statement(ts: "float spvRefract(float i, float n, float eta)");
2002	begin_scope();
2003	statement(ts: "float NoI = n * i;");
2004	statement(ts: "float NoI2 = NoI * NoI;");
2005	statement(ts: "float k = 1.0 - eta * eta * (1.0 - NoI2);");
2006	statement(ts: "if (k < 0.0)");
2007	begin_scope();
2008	statement(ts: "return 0.0;");
2009	end_scope();
2010	statement(ts: "else");
2011	begin_scope();
2012	statement(ts: "return eta * i - (eta * NoI + sqrt(k)) * n;");
2013	end_scope();
2014	end_scope();
2015	statement(ts: "");
2016	}
2017
2018	if (requires_scalar_faceforward)
2019	{
2020	// FP16/FP64? No templates in HLSL.
2021	statement(ts: "float spvFaceForward(float n, float i, float nref)");
2022	begin_scope();
2023	statement(ts: "return i * nref < 0.0 ? n : -n;");
2024	end_scope();
2025	statement(ts: "");
2026	}
2027
2028	for (TypeID type_id : composite_selection_workaround_types)
2029	{
2030	// Need out variable since HLSL does not support returning arrays.
2031	auto &type = get<SPIRType>(id: type_id);
2032	auto type_str = type_to_glsl(type);
2033	auto type_arr_str = type_to_array_glsl(type);
2034	statement(ts: "void spvSelectComposite(out ", ts&: type_str, ts: " out_value", ts&: type_arr_str, ts: ", bool cond, ",
2035	ts&: type_str, ts: " true_val", ts&: type_arr_str, ts: ", ",
2036	ts&: type_str, ts: " false_val", ts&: type_arr_str, ts: ")");
2037	begin_scope();
2038	statement(ts: "if (cond)");
2039	begin_scope();
2040	statement(ts: "out_value = true_val;");
2041	end_scope();
2042	statement(ts: "else");
2043	begin_scope();
2044	statement(ts: "out_value = false_val;");
2045	end_scope();
2046	end_scope();
2047	statement(ts: "");
2048	}
2049	}
2050
2051	void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char vecsize_qualifier, bool* uav,
2052	const char *type_qualifier)
2053	{
2054	if (variant_mask == `0`)
2055	return;
2056
2057	static const char *types[QueryTypeCount] = { "float", "int", "uint" };
2058	static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray",
2059	"Texture3D", "Buffer", "TextureCube", "TextureCubeArray",
2060	"Texture2DMS", "Texture2DMSArray" };
2061
2062	static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
2063
2064	static const char *ret_types[QueryDimCount] = {
2065	"uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
2066	};
2067
2068	static const uint32_t return_arguments[QueryDimCount] = {
2069	`1`, `2`, `2`, `3`, `3`, `1`, `2`, `3`, `2`, `3`,
2070	};
2071
2072	for (uint32_t index = `0`; index < QueryDimCount; index++)
2073	{
2074	for (uint32_t type_index = `0`; type_index < QueryTypeCount; type_index++)
2075	{
2076	uint32_t bit = `16` * type_index + index;
2077	uint64_t mask = `1ull` << bit;
2078
2079	if ((variant_mask & mask) == `0`)
2080	continue;
2081
2082	statement(ts&: ret_types[index], ts: " spv", ts: (uav ? "Image" : "Texture"), ts: "Size(", ts: (uav ? "RW" : ""),
2083	ts&: dims[index], ts: "<", ts&: type_qualifier, ts&: types[type_index], ts&: vecsize_qualifier, ts: "> Tex, ",
2084	ts: (uav ? "" : "uint Level, "), ts: "out uint Param)");
2085	begin_scope();
2086	statement(ts&: ret_types[index], ts: " ret;");
2087	switch (return_arguments[index])
2088	{
2089	case `1`:
2090	if (has_lod[index] && !uav)
2091	statement(ts: "Tex.GetDimensions(Level, ret.x, Param);");
2092	else
2093	{
2094	statement(ts: "Tex.GetDimensions(ret.x);");
2095	statement(ts: "Param = 0u;");
2096	}
2097	break;
2098	case `2`:
2099	if (has_lod[index] && !uav)
2100	statement(ts: "Tex.GetDimensions(Level, ret.x, ret.y, Param);");
2101	else if (!uav)
2102	statement(ts: "Tex.GetDimensions(ret.x, ret.y, Param);");
2103	else
2104	{
2105	statement(ts: "Tex.GetDimensions(ret.x, ret.y);");
2106	statement(ts: "Param = 0u;");
2107	}
2108	break;
2109	case `3`:
2110	if (has_lod[index] && !uav)
2111	statement(ts: "Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
2112	else if (!uav)
2113	statement(ts: "Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
2114	else
2115	{
2116	statement(ts: "Tex.GetDimensions(ret.x, ret.y, ret.z);");
2117	statement(ts: "Param = 0u;");
2118	}
2119	break;
2120	}
2121
2122	statement(ts: "return ret;");
2123	end_scope();
2124	statement(ts: "");
2125	}
2126	}
2127	}
2128
2129	string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
2130	{
2131	auto &flags = get_member_decoration_bitset(id: type.self, index);
2132
2133	// HLSL can emit row_major or column_major decoration in any struct.
2134	// Do not try to merge combined decorations for children like in GLSL.
2135
2136	// Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major".
2137	// The way to deal with this is to multiply everything in inverse order, and reverse the memory layout.
2138	if (flags.get(bit: DecorationColMajor))
2139	return "row_major ";
2140	else if (flags.get(bit: DecorationRowMajor))
2141	return "column_major ";
2142
2143	return "";
2144	}
2145
2146	void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
2147	const string &qualifier, uint32_t base_offset)
2148	{
2149	auto &membertype = get<SPIRType>(id: member_type_id);
2150
2151	Bitset memberflags;
2152	auto &memb = ir.meta [type.self].members;
2153	if (index < memb.size())
2154	memberflags = memb [index].decoration_flags;
2155
2156	string packing_offset;
2157	bool is_push_constant = type.storage == StorageClassPushConstant;
2158
2159	if ((has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) \|\| is_push_constant) &&
2160	has_member_decoration(id: type.self, index, decoration: DecorationOffset))
2161	{
2162	uint32_t offset = memb [index].offset - base_offset;
2163	if (offset & `3`)
2164	SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL.");
2165
2166	static const char *packing_swizzle[] = { "", ".y", ".z", ".w" };
2167	packing_offset = join(ts: " : packoffset(c", ts: offset / `16`, ts&: packing_swizzle[(offset & `15`) >> `2`], ts: ")");
2168	}
2169
2170	statement(ts: layout_for_member(type, index), ts: qualifier,
2171	ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts&: packing_offset, ts: ";");
2172	}
2173
2174	void CompilerHLSL::emit_rayquery_function(const char commited, const* char candidate, const* uint32_t *ops)
2175	{
2176	flush_variable_declaration(id: ops[`0`]);
2177	uint32_t is_commited = evaluate_constant_u32(id: ops[`3`]);
2178	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts&: is_commited ? commited : candidate), forward_rhs: false);
2179	}
2180
2181	void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
2182	{
2183	auto &type = get<SPIRType>(id: var.basetype);
2184
2185	bool is_uav = var.storage == StorageClassStorageBuffer \|\| has_decoration(id: type.self, decoration: DecorationBufferBlock);
2186
2187	if (flattened_buffer_blocks.count(x: var.self))
2188	{
2189	emit_buffer_block_flattened(type: var);
2190	}
2191	else if (is_uav)
2192	{
2193	Bitset flags = ir.get_buffer_block_flags(var);
2194	bool is_readonly = flags.get(bit: DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(id: var.self);
2195	bool is_coherent = flags.get(bit: DecorationCoherent) && !is_readonly;
2196	bool is_interlocked = interlocked_resources.count(x: var.self) > `0`;
2197	const char *type_name = "ByteAddressBuffer ";
2198	if (!is_readonly)
2199	type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
2200	add_resource_name(id: var.self);
2201	statement(ts: is_coherent ? "globallycoherent " : "", ts&: type_name, ts: to_name(id: var.self), ts: type_to_array_glsl(type),
2202	ts: to_resource_binding(var), ts: ";");
2203	}
2204	else
2205	{
2206	if (type.array.empty())
2207	{
2208	// Flatten the top-level struct so we can use packoffset,
2209	// this restriction is similar to GLSL where layout(offset) is not possible on sub-structs.
2210	flattened_structs [var.self] = false;
2211
2212	// Prefer the block name if possible.
2213	auto buffer_name = to_name(id: type.self, allow_alias: false);
2214	if (ir.meta [type.self].decoration.alias.empty() \|\|
2215	resource_names.find(x: buffer_name) != end(cont&: resource_names) \|\|
2216	block_names.find(x: buffer_name) != end(cont&: block_names))
2217	{
2218	buffer_name = get_block_fallback_name(id: var.self);
2219	}
2220
2221	add_variable(variables_primary&: block_names, variables_secondary: resource_names, name&: buffer_name);
2222
2223	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2224	// This cannot conflict with anything else, so we're safe now.
2225	if (buffer_name.empty())
2226	buffer_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2227
2228	uint32_t failed_index = `0`;
2229	if (buffer_is_packing_standard(type, packing: BufferPackingHLSLCbufferPackOffset, failed_index: &failed_index))
2230	set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2231	else
2232	{
2233	SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ",
2234	failed_index, " (name: ", to_member_name(type, failed_index),
2235	") cannot be expressed with either HLSL packing layout or packoffset."));
2236	}
2237
2238	block_names.insert(x: buffer_name);
2239
2240	// Save for post-reflection later.
2241	declared_block_names [var.self] = buffer_name;
2242
2243	type.member_name_cache.clear();
2244	// var.self can be used as a backup name for the block name,
2245	// so we need to make sure we don't disturb the name here on a recompile.
2246	// It will need to be reset if we have to recompile.
2247	preserve_alias_on_reset(id: var.self);
2248	add_resource_name(id: var.self);
2249	statement(ts: "cbuffer ", ts&: buffer_name, ts: to_resource_binding(var));
2250	begin_scope();
2251
2252	uint32_t i = `0`;
2253	for (auto &member : type.member_types)
2254	{
2255	add_member_name(type, name: i);
2256	auto backup_name = get_member_name(id: type.self, index: i);
2257	auto member_name = to_member_name(type, index: i);
2258	member_name = join(ts: to_name(id: var.self), ts: "_", ts&: member_name);
2259	ParsedIR::sanitize_underscores(str&: member_name);
2260	set_member_name(id: type.self, index: i, name: member_name);
2261	emit_struct_member(type, member_type_id: member, index: i, qualifier: "");
2262	set_member_name(id: type.self, index: i, name: backup_name);
2263	i++;
2264	}
2265
2266	end_scope_decl();
2267	statement(ts: "");
2268	}
2269	else
2270	{
2271	if (hlsl_options.shader_model < `51`)
2272	SPIRV_CROSS_THROW(
2273	"Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
2274
2275	add_resource_name(id: type.self);
2276	add_resource_name(id: var.self);
2277
2278	// ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
2279	uint32_t failed_index = `0`;
2280	if (!buffer_is_packing_standard(type, packing: BufferPackingHLSLCbuffer, failed_index: &failed_index))
2281	{
2282	SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self),
2283	"), member index ", failed_index, " (name: ", to_member_name(type, failed_index),
2284	") cannot be expressed with normal HLSL packing rules."));
2285	}
2286
2287	emit_struct(type&: get<SPIRType>(id: type.self));
2288	statement(ts: "ConstantBuffer<", ts: to_name(id: type.self), ts: "> ", ts: to_name(id: var.self), ts: type_to_array_glsl(type),
2289	ts: to_resource_binding(var), ts: ";");
2290	}
2291	}
2292	}
2293
2294	void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
2295	{
2296	if (flattened_buffer_blocks.count(x: var.self))
2297	{
2298	emit_buffer_block_flattened(type: var);
2299	}
2300	else if (root_constants_layout.empty())
2301	{
2302	emit_buffer_block(var);
2303	}
2304	else
2305	{
2306	for (const auto &layout : root_constants_layout)
2307	{
2308	auto &type = get<SPIRType>(id: var.basetype);
2309
2310	uint32_t failed_index = `0`;
2311	if (buffer_is_packing_standard(type, packing: BufferPackingHLSLCbufferPackOffset, failed_index: &failed_index, start_offset: layout.start,
2312	end_offset: layout.end))
2313	set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2314	else
2315	{
2316	SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")",
2317	", member index ", failed_index, " (name: ", to_member_name(type, failed_index),
2318	") cannot be expressed with either HLSL packing layout or packoffset."));
2319	}
2320
2321	flattened_structs [var.self] = false;
2322	type.member_name_cache.clear();
2323	add_resource_name(id: var.self);
2324	auto &memb = ir.meta [type.self].members;
2325
2326	statement(ts: "cbuffer SPIRV_CROSS_RootConstant_", ts: to_name(id: var.self),
2327	ts: to_resource_register(flag: HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, space: `'b'`, binding: layout.binding, set: layout.space));
2328	begin_scope();
2329
2330	// Index of the next field in the generated root constant constant buffer
2331	auto constant_index = `0u`;
2332
2333	// Iterate over all member of the push constant and check which of the fields
2334	// fit into the given root constant layout.
2335	for (auto i = `0u`; i < memb.size(); i++)
2336	{
2337	const auto offset = memb [i].offset;
2338	if (layout.start <= offset && offset < layout.end)
2339	{
2340	const auto &member = type.member_types [i];
2341
2342	add_member_name(type, name: constant_index);
2343	auto backup_name = get_member_name(id: type.self, index: i);
2344	auto member_name = to_member_name(type, index: i);
2345	member_name = join(ts: to_name(id: var.self), ts: "_", ts&: member_name);
2346	ParsedIR::sanitize_underscores(str&: member_name);
2347	set_member_name(id: type.self, index: constant_index, name: member_name);
2348	emit_struct_member(type, member_type_id: member, index: i, qualifier: "", base_offset: layout.start);
2349	set_member_name(id: type.self, index: constant_index, name: backup_name);
2350
2351	constant_index++;
2352	}
2353	}
2354
2355	end_scope_decl();
2356	}
2357	}
2358	}
2359
2360	string CompilerHLSL::to_sampler_expression(uint32_t id)
2361	{
2362	auto expr = join(ts: "_", ts: to_non_uniform_aware_expression(id));
2363	auto index = expr.find_first_of(c: `'['`);
2364	if (index == string::npos)
2365	{
2366	return expr + "_sampler";
2367	}
2368	else
2369	{
2370	// We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead.
2371	return expr.insert(pos: index, s: "_sampler");
2372	}
2373	}
2374
2375	void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
2376	{
2377	if (hlsl_options.shader_model >= `40` && combined_image_samplers.empty())
2378	{
2379	set<SPIRCombinedImageSampler>(id: result_id, args&: result_type, args&: image_id, args&: samp_id);
2380	}
2381	else
2382	{
2383	// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
2384	emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forward_rhs: true, suppress_usage_tracking: true);
2385	}
2386	}
2387
2388	string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
2389	{
2390	string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
2391
2392	if (hlsl_options.shader_model <= `30`)
2393	return arg_str;
2394
2395	// Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL.
2396	auto &type = expression_type(id);
2397
2398	// We don't have to consider combined image samplers here via OpSampledImage because
2399	// those variables cannot be passed as arguments to functions.
2400	// Only global SampledImage variables may be used as arguments.
2401	if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
2402	arg_str += ", " + to_sampler_expression(id);
2403
2404	return arg_str;
2405	}
2406
2407	void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
2408	{
2409	if (func.self != ir.default_entry_point)
2410	add_function_overload(func);
2411
2412	auto &execution = get_entry_point();
2413	// Avoid shadow declarations.
2414	local_variable_names = resource_names;
2415
2416	string decl;
2417
2418	auto &type = get<SPIRType>(id: func.return_type);
2419	if (type.array.empty())
2420	{
2421	decl += flags_to_qualifiers_glsl(type, flags: return_flags);
2422	decl += type_to_glsl(type);
2423	decl += " ";
2424	}
2425	else
2426	{
2427	// We cannot return arrays in HLSL, so "return" through an out variable.
2428	decl = "void ";
2429	}
2430
2431	if (func.self == ir.default_entry_point)
2432	{
2433	if (execution.model == ExecutionModelVertex)
2434	decl += "vert_main";
2435	else if (execution.model == ExecutionModelFragment)
2436	decl += "frag_main";
2437	else if (execution.model == ExecutionModelGLCompute)
2438	decl += "comp_main";
2439	else
2440	SPIRV_CROSS_THROW("Unsupported execution model.");
2441	processing_entry_point = true;
2442	}
2443	else
2444	decl += to_name(id: func.self);
2445
2446	decl += "(";
2447	SmallVector<string> arglist;
2448
2449	if (!type.array.empty())
2450	{
2451	// Fake array returns by writing to an out array instead.
2452	string out_argument;
2453	out_argument += "out ";
2454	out_argument += type_to_glsl(type);
2455	out_argument += " ";
2456	out_argument += "spvReturnValue";
2457	out_argument += type_to_array_glsl(type);
2458	arglist.push_back(t: std::move(out_argument));
2459	}
2460
2461	for (auto &arg : func.arguments)
2462	{
2463	// Do not pass in separate images or samplers if we're remapping
2464	// to combined image samplers.
2465	if (skip_argument(id: arg.id))
2466	continue;
2467
2468	// Might change the variable name if it already exists in this function.
2469	// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
2470	// to use same name for variables.
2471	// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
2472	add_local_variable_name(id: arg.id);
2473
2474	arglist.push_back(t: argument_decl(arg));
2475
2476	// Flatten a combined sampler to two separate arguments in modern HLSL.
2477	auto &arg_type = get<SPIRType>(id: arg.type);
2478	if (hlsl_options.shader_model > `30` && arg_type.basetype == SPIRType::SampledImage &&
2479	arg_type.image.dim != DimBuffer)
2480	{
2481	// Manufacture automatic sampler arg for SampledImage texture
2482	arglist.push_back(t: join(ts: is_depth_image(type: arg_type, id: arg.id) ? "SamplerComparisonState " : "SamplerState ",
2483	ts: to_sampler_expression(id: arg.id), ts: type_to_array_glsl(type: arg_type)));
2484	}
2485
2486	// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
2487	auto *var = maybe_get<SPIRVariable>(id: arg.id);
2488	if (var)
2489	var->parameter = &arg;
2490	}
2491
2492	for (auto &arg : func.shadow_arguments)
2493	{
2494	// Might change the variable name if it already exists in this function.
2495	// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
2496	// to use same name for variables.
2497	// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
2498	add_local_variable_name(id: arg.id);
2499
2500	arglist.push_back(t: argument_decl(arg));
2501
2502	// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
2503	auto *var = maybe_get<SPIRVariable>(id: arg.id);
2504	if (var)
2505	var->parameter = &arg;
2506	}
2507
2508	decl += merge(list: arglist);
2509	decl += ")";
2510	statement(ts&: decl);
2511	}
2512
2513	void CompilerHLSL::emit_hlsl_entry_point()
2514	{
2515	SmallVector<string> arguments;
2516
2517	if (require_input)
2518	arguments.push_back(t: "SPIRV_Cross_Input stage_input");
2519
2520	auto &execution = get_entry_point();
2521
2522	switch (execution.model)
2523	{
2524	case ExecutionModelGLCompute:
2525	{
2526	SpecializationConstant wg_x, wg_y, wg_z;
2527	get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
2528
2529	uint32_t x = execution.workgroup_size.x;
2530	uint32_t y = execution.workgroup_size.y;
2531	uint32_t z = execution.workgroup_size.z;
2532
2533	if (!execution.workgroup_size.constant && execution.flags.get(bit: ExecutionModeLocalSizeId))
2534	{
2535	if (execution.workgroup_size.id_x)
2536	x = get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar();
2537	if (execution.workgroup_size.id_y)
2538	y = get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar();
2539	if (execution.workgroup_size.id_z)
2540	z = get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar();
2541	}
2542
2543	auto x_expr = wg_x.id ? get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name : to_string(val: x);
2544	auto y_expr = wg_y.id ? get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name : to_string(val: y);
2545	auto z_expr = wg_z.id ? get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name : to_string(val: z);
2546
2547	statement(ts: "[numthreads(", ts&: x_expr, ts: ", ", ts&: y_expr, ts: ", ", ts&: z_expr, ts: ")]");
2548	break;
2549	}
2550	case ExecutionModelFragment:
2551	if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
2552	statement(ts: "[earlydepthstencil]");
2553	break;
2554	default:
2555	break;
2556	}
2557
2558	statement(ts: require_output ? "SPIRV_Cross_Output " : "void ", ts: "main(", ts: merge(list: arguments), ts: ")");
2559	begin_scope();
2560	bool legacy = hlsl_options.shader_model <= `30`;
2561
2562	// Copy builtins from entry point arguments to globals.
2563	active_input_builtins.for_each_bit(op: [&](uint32_t i) {
2564	auto builtin = builtin_to_glsl(builtin: static_cast<BuiltIn>(i), storage: StorageClassInput);
2565	switch (static_cast<BuiltIn>(i))
2566	{
2567	case BuiltInFragCoord:
2568	// VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent.
2569	// TODO: Do we need an option here? Any reason why a D3D9 shader would be used
2570	// on a D3D10+ system with a different rasterization config?
2571	if (legacy)
2572	statement(ts&: builtin, ts: " = stage_input.", ts&: builtin, ts: " + float4(0.5f, 0.5f, 0.0f, 0.0f);");
2573	else
2574	{
2575	statement(ts&: builtin, ts: " = stage_input.", ts&: builtin, ts: ";");
2576	// ZW are undefined in D3D9, only do this fixup here.
2577	statement(ts&: builtin, ts: ".w = 1.0 / ", ts&: builtin, ts: ".w;");
2578	}
2579	break;
2580
2581	case BuiltInVertexId:
2582	case BuiltInVertexIndex:
2583	case BuiltInInstanceIndex:
2584	// D3D semantics are uint, but shader wants int.
2585	if (hlsl_options.support_nonzero_base_vertex_base_instance)
2586	{
2587	if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex)
2588	statement(ts&: builtin, ts: " = int(stage_input.", ts&: builtin, ts: ") + SPIRV_Cross_BaseInstance;");
2589	else
2590	statement(ts&: builtin, ts: " = int(stage_input.", ts&: builtin, ts: ") + SPIRV_Cross_BaseVertex;");
2591	}
2592	else
2593	statement(ts&: builtin, ts: " = int(stage_input.", ts&: builtin, ts: ");");
2594	break;
2595
2596	case BuiltInInstanceId:
2597	// D3D semantics are uint, but shader wants int.
2598	statement(ts&: builtin, ts: " = int(stage_input.", ts&: builtin, ts: ");");
2599	break;
2600
2601	case BuiltInNumWorkgroups:
2602	case BuiltInPointCoord:
2603	case BuiltInSubgroupSize:
2604	case BuiltInSubgroupLocalInvocationId:
2605	case BuiltInHelperInvocation:
2606	break;
2607
2608	case BuiltInSubgroupEqMask:
2609	// Emulate these ...
2610	// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
2611	statement(ts: "gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));");
2612	statement(ts: "if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;");
2613	statement(ts: "if (WaveGetLaneIndex() >= 64 \|\| WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;");
2614	statement(ts: "if (WaveGetLaneIndex() >= 96 \|\| WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;");
2615	statement(ts: "if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;");
2616	break;
2617
2618	case BuiltInSubgroupGeMask:
2619	// Emulate these ...
2620	// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
2621	statement(ts: "gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);");
2622	statement(ts: "if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;");
2623	statement(ts: "if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;");
2624	statement(ts: "if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;");
2625	statement(ts: "if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;");
2626	statement(ts: "if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;");
2627	statement(ts: "if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;");
2628	break;
2629
2630	case BuiltInSubgroupGtMask:
2631	// Emulate these ...
2632	// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
2633	statement(ts: "uint gt_lane_index = WaveGetLaneIndex() + 1;");
2634	statement(ts: "gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);");
2635	statement(ts: "if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;");
2636	statement(ts: "if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;");
2637	statement(ts: "if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;");
2638	statement(ts: "if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;");
2639	statement(ts: "if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;");
2640	statement(ts: "if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;");
2641	statement(ts: "if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;");
2642	break;
2643
2644	case BuiltInSubgroupLeMask:
2645	// Emulate these ...
2646	// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
2647	statement(ts: "uint le_lane_index = WaveGetLaneIndex() + 1;");
2648	statement(ts: "gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;");
2649	statement(ts: "if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;");
2650	statement(ts: "if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;");
2651	statement(ts: "if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;");
2652	statement(ts: "if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;");
2653	statement(ts: "if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;");
2654	statement(ts: "if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;");
2655	statement(ts: "if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;");
2656	break;
2657
2658	case BuiltInSubgroupLtMask:
2659	// Emulate these ...
2660	// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
2661	statement(ts: "gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;");
2662	statement(ts: "if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;");
2663	statement(ts: "if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;");
2664	statement(ts: "if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;");
2665	statement(ts: "if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;");
2666	statement(ts: "if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;");
2667	statement(ts: "if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;");
2668	break;
2669
2670	case BuiltInClipDistance:
2671	for (uint32_t clip = `0`; clip < clip_distance_count; clip++)
2672	statement(ts: "gl_ClipDistance[", ts&: clip, ts: "] = stage_input.gl_ClipDistance", ts: clip / `4`, ts: ".", ts: "xyzw"[clip & `3`],
2673	ts: ";");
2674	break;
2675
2676	case BuiltInCullDistance:
2677	for (uint32_t cull = `0`; cull < cull_distance_count; cull++)
2678	statement(ts: "gl_CullDistance[", ts&: cull, ts: "] = stage_input.gl_CullDistance", ts: cull / `4`, ts: ".", ts: "xyzw"[cull & `3`],
2679	ts: ";");
2680	break;
2681
2682	default:
2683	statement(ts&: builtin, ts: " = stage_input.", ts&: builtin, ts: ";");
2684	break;
2685	}
2686	});
2687
2688	// Copy from stage input struct to globals.
2689	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
2690	auto &type = this->get<SPIRType>(id: var.basetype);
2691	bool block = has_decoration(id: type.self, decoration: DecorationBlock);
2692
2693	if (var.storage != StorageClassInput)
2694	return;
2695
2696	bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
2697
2698	if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
2699	interface_variable_exists_in_entry_point(id: var.self))
2700	{
2701	if (block)
2702	{
2703	auto type_name = to_name(id: type.self);
2704	auto var_name = to_name(id: var.self);
2705	for (uint32_t mbr_idx = `0`; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
2706	{
2707	auto mbr_name = to_member_name(type, index: mbr_idx);
2708	auto flat_name = join(ts&: type_name, ts: "_", ts&: mbr_name);
2709	statement(ts&: var_name, ts: ".", ts&: mbr_name, ts: " = stage_input.", ts&: flat_name, ts: ";");
2710	}
2711	}
2712	else
2713	{
2714	auto name = to_name(id: var.self);
2715	auto &mtype = this->get<SPIRType>(id: var.basetype);
2716	if (need_matrix_unroll && mtype.columns > `1`)
2717	{
2718	// Unroll matrices.
2719	for (uint32_t col = `0`; col < mtype.columns; col++)
2720	statement(ts&: name, ts: "[", ts&: col, ts: "] = stage_input.", ts&: name, ts: "_", ts&: col, ts: ";");
2721	}
2722	else
2723	{
2724	statement(ts&: name, ts: " = stage_input.", ts&: name, ts: ";");
2725	}
2726	}
2727	}
2728	});
2729
2730	// Run the shader.
2731	if (execution.model == ExecutionModelVertex)
2732	statement(ts: "vert_main();");
2733	else if (execution.model == ExecutionModelFragment)
2734	statement(ts: "frag_main();");
2735	else if (execution.model == ExecutionModelGLCompute)
2736	statement(ts: "comp_main();");
2737	else
2738	SPIRV_CROSS_THROW("Unsupported shader stage.");
2739
2740	// Copy stage outputs.
2741	if (require_output)
2742	{
2743	statement(ts: "SPIRV_Cross_Output stage_output;");
2744
2745	// Copy builtins from globals to return struct.
2746	active_output_builtins.for_each_bit(op: [&](uint32_t i) {
2747	// PointSize doesn't exist in HLSL.
2748	if (i == BuiltInPointSize)
2749	return;
2750
2751	switch (static_cast<BuiltIn>(i))
2752	{
2753	case BuiltInClipDistance:
2754	for (uint32_t clip = `0`; clip < clip_distance_count; clip++)
2755	statement(ts: "stage_output.gl_ClipDistance", ts: clip / `4`, ts: ".", ts: "xyzw"[clip & `3`], ts: " = gl_ClipDistance[",
2756	ts&: clip, ts: "];");
2757	break;
2758
2759	case BuiltInCullDistance:
2760	for (uint32_t cull = `0`; cull < cull_distance_count; cull++)
2761	statement(ts: "stage_output.gl_CullDistance", ts: cull / `4`, ts: ".", ts: "xyzw"[cull & `3`], ts: " = gl_CullDistance[",
2762	ts&: cull, ts: "];");
2763	break;
2764
2765	default:
2766	{
2767	auto builtin_expr = builtin_to_glsl(builtin: static_cast<BuiltIn>(i), storage: StorageClassOutput);
2768	statement(ts: "stage_output.", ts&: builtin_expr, ts: " = ", ts&: builtin_expr, ts: ";");
2769	break;
2770	}
2771	}
2772	});
2773
2774	ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
2775	auto &type = this->get<SPIRType>(id: var.basetype);
2776	bool block = has_decoration(id: type.self, decoration: DecorationBlock);
2777
2778	if (var.storage != StorageClassOutput)
2779	return;
2780
2781	if (!var.remapped_variable && type.pointer &&
2782	!is_builtin_variable(var) &&
2783	interface_variable_exists_in_entry_point(id: var.self))
2784	{
2785	if (block)
2786	{
2787	// I/O blocks need to flatten output.
2788	auto type_name = to_name(id: type.self);
2789	auto var_name = to_name(id: var.self);
2790	for (uint32_t mbr_idx = `0`; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
2791	{
2792	auto mbr_name = to_member_name(type, index: mbr_idx);
2793	auto flat_name = join(ts&: type_name, ts: "_", ts&: mbr_name);
2794	statement(ts: "stage_output.", ts&: flat_name, ts: " = ", ts&: var_name, ts: ".", ts&: mbr_name, ts: ";");
2795	}
2796	}
2797	else
2798	{
2799	auto name = to_name(id: var.self);
2800
2801	if (legacy && execution.model == ExecutionModelFragment)
2802	{
2803	string output_filler;
2804	for (uint32_t size = type.vecsize; size < `4`; ++size)
2805	output_filler += ", 0.0";
2806
2807	statement(ts: "stage_output.", ts&: name, ts: " = float4(", ts&: name, ts&: output_filler, ts: ");");
2808	}
2809	else
2810	{
2811	statement(ts: "stage_output.", ts&: name, ts: " = ", ts&: name, ts: ";");
2812	}
2813	}
2814	}
2815	});
2816
2817	statement(ts: "return stage_output;");
2818	}
2819
2820	end_scope();
2821	}
2822
2823	void CompilerHLSL::emit_fixup()
2824	{
2825	if (is_vertex_like_shader() && active_output_builtins.get(bit: BuiltInPosition))
2826	{
2827	// Do various mangling on the gl_Position.
2828	if (hlsl_options.shader_model <= `30`)
2829	{
2830	statement(ts: "gl_Position.x = gl_Position.x - gl_HalfPixel.x * "
2831	"gl_Position.w;");
2832	statement(ts: "gl_Position.y = gl_Position.y + gl_HalfPixel.y * "
2833	"gl_Position.w;");
2834	}
2835
2836	if (options.vertex.flip_vert_y)
2837	statement(ts: "gl_Position.y = -gl_Position.y;");
2838	if (options.vertex.fixup_clipspace)
2839	statement(ts: "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;");
2840	}
2841	}
2842
2843	void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
2844	{
2845	if (sparse)
2846	SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL.");
2847
2848	auto *ops = stream(instr: i);
2849	auto op = static_cast<Op>(i.op);
2850	uint32_t length = i.length;
2851
2852	SmallVector<uint32_t> inherited_expressions;
2853
2854	uint32_t result_type = ops[`0`];
2855	uint32_t id = ops[`1`];
2856	VariableID img = ops[`2`];
2857	uint32_t coord = ops[`3`];
2858	uint32_t dref = `0`;
2859	uint32_t comp = `0`;
2860	bool gather = false;
2861	bool proj = false;
2862	const uint32_t opt = nullptr*;
2863	auto *combined_image = maybe_get<SPIRCombinedImageSampler>(id: img);
2864
2865	if (combined_image && has_decoration(id: img, decoration: DecorationNonUniform))
2866	{
2867	set_decoration(id: combined_image->image, decoration: DecorationNonUniform);
2868	set_decoration(id: combined_image->sampler, decoration: DecorationNonUniform);
2869	}
2870
2871	auto img_expr = to_non_uniform_aware_expression(id: combined_image ? combined_image->image : img);
2872
2873	inherited_expressions.push_back(t: coord);
2874
2875	switch (op)
2876	{
2877	case OpImageSampleDrefImplicitLod:
2878	case OpImageSampleDrefExplicitLod:
2879	dref = ops[`4`];
2880	opt = &ops[`5`];
2881	length -= `5`;
2882	break;
2883
2884	case OpImageSampleProjDrefImplicitLod:
2885	case OpImageSampleProjDrefExplicitLod:
2886	dref = ops[`4`];
2887	proj = true;
2888	opt = &ops[`5`];
2889	length -= `5`;
2890	break;
2891
2892	case OpImageDrefGather:
2893	dref = ops[`4`];
2894	opt = &ops[`5`];
2895	gather = true;
2896	length -= `5`;
2897	break;
2898
2899	case OpImageGather:
2900	comp = ops[`4`];
2901	opt = &ops[`5`];
2902	gather = true;
2903	length -= `5`;
2904	break;
2905
2906	case OpImageSampleProjImplicitLod:
2907	case OpImageSampleProjExplicitLod:
2908	opt = &ops[`4`];
2909	length -= `4`;
2910	proj = true;
2911	break;
2912
2913	case OpImageQueryLod:
2914	opt = &ops[`4`];
2915	length -= `4`;
2916	break;
2917
2918	default:
2919	opt = &ops[`4`];
2920	length -= `4`;
2921	break;
2922	}
2923
2924	auto &imgtype = expression_type(id: img);
2925	uint32_t coord_components = `0`;
2926	switch (imgtype.image.dim)
2927	{
2928	case spv::Dim1D:
2929	coord_components = `1`;
2930	break;
2931	case spv::Dim2D:
2932	coord_components = `2`;
2933	break;
2934	case spv::Dim3D:
2935	coord_components = `3`;
2936	break;
2937	case spv::DimCube:
2938	coord_components = `3`;
2939	break;
2940	case spv::DimBuffer:
2941	coord_components = `1`;
2942	break;
2943	default:
2944	coord_components = `2`;
2945	break;
2946	}
2947
2948	if (dref)
2949	inherited_expressions.push_back(t: dref);
2950
2951	if (imgtype.image.arrayed)
2952	coord_components++;
2953
2954	uint32_t bias = `0`;
2955	uint32_t lod = `0`;
2956	uint32_t grad_x = `0`;
2957	uint32_t grad_y = `0`;
2958	uint32_t coffset = `0`;
2959	uint32_t offset = `0`;
2960	uint32_t coffsets = `0`;
2961	uint32_t sample = `0`;
2962	uint32_t minlod = `0`;
2963	uint32_t flags = `0`;
2964
2965	if (length)
2966	{
2967	flags = opt[`0`];
2968	opt++;
2969	length--;
2970	}
2971
2972	auto test = [&](uint32_t &v, uint32_t flag) {
2973	if (length && (flags & flag))
2974	{
2975	v = *opt++;
2976	inherited_expressions.push_back(t: v);
2977	length--;
2978	}
2979	};
2980
2981	test (bias, ImageOperandsBiasMask);
2982	test (lod, ImageOperandsLodMask);
2983	test (grad_x, ImageOperandsGradMask);
2984	test (grad_y, ImageOperandsGradMask);
2985	test (coffset, ImageOperandsConstOffsetMask);
2986	test (offset, ImageOperandsOffsetMask);
2987	test (coffsets, ImageOperandsConstOffsetsMask);
2988	test (sample, ImageOperandsSampleMask);
2989	test (minlod, ImageOperandsMinLodMask);
2990
2991	string expr;
2992	string texop;
2993
2994	if (minlod != `0`)
2995	SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL.");
2996
2997	if (op == OpImageFetch)
2998	{
2999	if (hlsl_options.shader_model < `40`)
3000	{
3001	SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3.");
3002	}
3003	texop += img_expr;
3004	texop += ".Load";
3005	}
3006	else if (op == OpImageQueryLod)
3007	{
3008	texop += img_expr;
3009	texop += ".CalculateLevelOfDetail";
3010	}
3011	else
3012	{
3013	auto &imgformat = get<SPIRType>(id: imgtype.image.type);
3014	if (imgformat.basetype != SPIRType::Float)
3015	{
3016	SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL.");
3017	}
3018
3019	if (hlsl_options.shader_model >= `40`)
3020	{
3021	texop += img_expr;
3022
3023	if (is_depth_image(type: imgtype, id: img))
3024	{
3025	if (gather)
3026	{
3027	SPIRV_CROSS_THROW("GatherCmp does not exist in HLSL.");
3028	}
3029	else if (lod \|\| grad_x \|\| grad_y)
3030	{
3031	// Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero.
3032	texop += ".SampleCmpLevelZero";
3033	}
3034	else
3035	texop += ".SampleCmp";
3036	}
3037	else if (gather)
3038	{
3039	uint32_t comp_num = evaluate_constant_u32(id: comp);
3040	if (hlsl_options.shader_model >= `50`)
3041	{
3042	switch (comp_num)
3043	{
3044	case `0`:
3045	texop += ".GatherRed";
3046	break;
3047	case `1`:
3048	texop += ".GatherGreen";
3049	break;
3050	case `2`:
3051	texop += ".GatherBlue";
3052	break;
3053	case `3`:
3054	texop += ".GatherAlpha";
3055	break;
3056	default:
3057	SPIRV_CROSS_THROW("Invalid component.");
3058	}
3059	}
3060	else
3061	{
3062	if (comp_num == `0`)
3063	texop += ".Gather";
3064	else
3065	SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component.");
3066	}
3067	}
3068	else if (bias)
3069	texop += ".SampleBias";
3070	else if (grad_x \|\| grad_y)
3071	texop += ".SampleGrad";
3072	else if (lod)
3073	texop += ".SampleLevel";
3074	else
3075	texop += ".Sample";
3076	}
3077	else
3078	{
3079	switch (imgtype.image.dim)
3080	{
3081	case Dim1D:
3082	texop += "tex1D";
3083	break;
3084	case Dim2D:
3085	texop += "tex2D";
3086	break;
3087	case Dim3D:
3088	texop += "tex3D";
3089	break;
3090	case DimCube:
3091	texop += "texCUBE";
3092	break;
3093	case DimRect:
3094	case DimBuffer:
3095	case DimSubpassData:
3096	SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO
3097	default:
3098	SPIRV_CROSS_THROW("Invalid dimension.");
3099	}
3100
3101	if (gather)
3102	SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3.");
3103	if (offset \|\| coffset)
3104	SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3.");
3105
3106	if (grad_x \|\| grad_y)
3107	texop += "grad";
3108	else if (lod)
3109	texop += "lod";
3110	else if (bias)
3111	texop += "bias";
3112	else if (proj \|\| dref)
3113	texop += "proj";
3114	}
3115	}
3116
3117	expr += texop;
3118	expr += "(";
3119	if (hlsl_options.shader_model < `40`)
3120	{
3121	if (combined_image)
3122	SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3.");
3123	expr += to_expression(id: img);
3124	}
3125	else if (op != OpImageFetch)
3126	{
3127	string sampler_expr;
3128	if (combined_image)
3129	sampler_expr = to_non_uniform_aware_expression(id: combined_image->sampler);
3130	else
3131	sampler_expr = to_sampler_expression(id: img);
3132	expr += sampler_expr;
3133	}
3134
3135	auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * {
3136	if (comps == in_comps)
3137	return "";
3138
3139	switch (comps)
3140	{
3141	case `1`:
3142	return ".x";
3143	case `2`:
3144	return ".xy";
3145	case `3`:
3146	return ".xyz";
3147	default:
3148	return "";
3149	}
3150	};
3151
3152	bool forward = should_forward(id: coord);
3153
3154	// The IR can give us more components than we need, so chop them off as needed.
3155	string coord_expr;
3156	auto &coord_type = expression_type(id: coord);
3157	if (coord_components != coord_type.vecsize)
3158	coord_expr = to_enclosed_expression(id: coord) + swizzle (coord_components, expression_type(id: coord).vecsize);
3159	else
3160	coord_expr = to_expression(id: coord);
3161
3162	if (proj && hlsl_options.shader_model >= `40`) // Legacy HLSL has "proj" operations which do this for us.
3163	coord_expr = coord_expr + " / " + to_extract_component_expression(id: coord, index: coord_components);
3164
3165	if (hlsl_options.shader_model < `40`)
3166	{
3167	if (dref)
3168	{
3169	if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D)
3170	{
3171	SPIRV_CROSS_THROW(
3172	"Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3.");
3173	}
3174
3175	if (grad_x \|\| grad_y)
3176	SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3.");
3177
3178	for (uint32_t size = coord_components; size < `2`; ++size)
3179	coord_expr += ", 0.0";
3180
3181	forward = forward && should_forward(id: dref);
3182	coord_expr += ", " + to_expression(id: dref);
3183	}
3184	else if (lod \|\| bias \|\| proj)
3185	{
3186	for (uint32_t size = coord_components; size < `3`; ++size)
3187	coord_expr += ", 0.0";
3188	}
3189
3190	if (lod)
3191	{
3192	coord_expr = "float4(" + coord_expr + ", " + to_expression(id: lod) + ")";
3193	}
3194	else if (bias)
3195	{
3196	coord_expr = "float4(" + coord_expr + ", " + to_expression(id: bias) + ")";
3197	}
3198	else if (proj)
3199	{
3200	coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(id: coord, index: coord_components) + ")";
3201	}
3202	else if (dref)
3203	{
3204	// A "normal" sample gets fed into tex2Dproj as well, because the
3205	// regular tex2D accepts only two coordinates.
3206	coord_expr = "float4(" + coord_expr + ", 1.0)";
3207	}
3208
3209	if (!!lod + !!bias + !!proj > `1`)
3210	SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers.");
3211	}
3212
3213	if (op == OpImageFetch)
3214	{
3215	if (imgtype.image.dim != DimBuffer && !imgtype.image.ms)
3216	coord_expr =
3217	join(ts: "int", ts: coord_components + `1`, ts: "(", ts&: coord_expr, ts: ", ", ts: lod ? to_expression(id: lod) : string ("0"), ts: ")");
3218	}
3219	else
3220	expr += ", ";
3221	expr += coord_expr;
3222
3223	if (dref && hlsl_options.shader_model >= `40`)
3224	{
3225	forward = forward && should_forward(id: dref);
3226	expr += ", ";
3227
3228	if (proj)
3229	expr += to_enclosed_expression(id: dref) + " / " + to_extract_component_expression(id: coord, index: coord_components);
3230	else
3231	expr += to_expression(id: dref);
3232	}
3233
3234	if (!dref && (grad_x \|\| grad_y))
3235	{
3236	forward = forward && should_forward(id: grad_x);
3237	forward = forward && should_forward(id: grad_y);
3238	expr += ", ";
3239	expr += to_expression(id: grad_x);
3240	expr += ", ";
3241	expr += to_expression(id: grad_y);
3242	}
3243
3244	if (!dref && lod && hlsl_options.shader_model >= `40` && op != OpImageFetch)
3245	{
3246	forward = forward && should_forward(id: lod);
3247	expr += ", ";
3248	expr += to_expression(id: lod);
3249	}
3250
3251	if (!dref && bias && hlsl_options.shader_model >= `40`)
3252	{
3253	forward = forward && should_forward(id: bias);
3254	expr += ", ";
3255	expr += to_expression(id: bias);
3256	}
3257
3258	if (coffset)
3259	{
3260	forward = forward && should_forward(id: coffset);
3261	expr += ", ";
3262	expr += to_expression(id: coffset);
3263	}
3264	else if (offset)
3265	{
3266	forward = forward && should_forward(id: offset);
3267	expr += ", ";
3268	expr += to_expression(id: offset);
3269	}
3270
3271	if (sample)
3272	{
3273	expr += ", ";
3274	expr += to_expression(id: sample);
3275	}
3276
3277	expr += ")";
3278
3279	if (dref && hlsl_options.shader_model < `40`)
3280	expr += ".x";
3281
3282	if (op == OpImageQueryLod)
3283	{
3284	// This is rather awkward.
3285	// textureQueryLod returns two values, the "accessed level",
3286	// as well as the actual LOD lambda.
3287	// As far as I can tell, there is no way to get the .x component
3288	// according to GLSL spec, and it depends on the sampler itself.
3289	// Just assume X == Y, so we will need to splat the result to a float2.
3290	statement(ts: "float _", ts&: id, ts: "_tmp = ", ts&: expr, ts: ";");
3291	statement(ts: "float2 _", ts&: id, ts: " = _", ts&: id, ts: "_tmp.xx;");
3292	set<SPIRExpression>(id, args: join(ts: "_", ts&: id), args&: result_type, args: true);
3293	}
3294	else
3295	{
3296	emit_op(result_type, result_id: id, rhs: expr, forward_rhs: forward, suppress_usage_tracking: false);
3297	}
3298
3299	for (auto &inherit : inherited_expressions)
3300	inherit_expression_dependencies(dst: id, source: inherit);
3301
3302	switch (op)
3303	{
3304	case OpImageSampleDrefImplicitLod:
3305	case OpImageSampleImplicitLod:
3306	case OpImageSampleProjImplicitLod:
3307	case OpImageSampleProjDrefImplicitLod:
3308	register_control_dependent_expression(expr: id);
3309	break;
3310
3311	default:
3312	break;
3313	}
3314	}
3315
3316	string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
3317	{
3318	const auto &type = get<SPIRType>(id: var.basetype);
3319
3320	// We can remap push constant blocks, even if they don't have any binding decoration.
3321	if (type.storage != StorageClassPushConstant && !has_decoration(id: var.self, decoration: DecorationBinding))
3322	return "";
3323
3324	char space = `'\0'`;
3325
3326	HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT;
3327
3328	switch (type.basetype)
3329	{
3330	case SPIRType::SampledImage:
3331	space = `'t'`; // SRV
3332	resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
3333	break;
3334
3335	case SPIRType::Image:
3336	if (type.image.sampled == `2` && type.image.dim != DimSubpassData)
3337	{
3338	if (has_decoration(id: var.self, decoration: DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv)
3339	{
3340	space = `'t'`; // SRV
3341	resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
3342	}
3343	else
3344	{
3345	space = `'u'`; // UAV
3346	resource_flags = HLSL_BINDING_AUTO_UAV_BIT;
3347	}
3348	}
3349	else
3350	{
3351	space = `'t'`; // SRV
3352	resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
3353	}
3354	break;
3355
3356	case SPIRType::Sampler:
3357	space = `'s'`;
3358	resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT;
3359	break;
3360
3361	case SPIRType::AccelerationStructure:
3362	space = `'t'`; // SRV
3363	resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
3364	break;
3365
3366	case SPIRType::Struct:
3367	{
3368	auto storage = type.storage;
3369	if (storage == StorageClassUniform)
3370	{
3371	if (has_decoration(id: type.self, decoration: DecorationBufferBlock))
3372	{
3373	Bitset flags = ir.get_buffer_block_flags(var);
3374	bool is_readonly = flags.get(bit: DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(id: var.self);
3375	space = is_readonly ? `'t'` : `'u'`; // UAV
3376	resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
3377	}
3378	else if (has_decoration(id: type.self, decoration: DecorationBlock))
3379	{
3380	space = `'b'`; // Constant buffers
3381	resource_flags = HLSL_BINDING_AUTO_CBV_BIT;
3382	}
3383	}
3384	else if (storage == StorageClassPushConstant)
3385	{
3386	space = `'b'`; // Constant buffers
3387	resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT;
3388	}
3389	else if (storage == StorageClassStorageBuffer)
3390	{
3391	// UAV or SRV depending on readonly flag.
3392	Bitset flags = ir.get_buffer_block_flags(var);
3393	bool is_readonly = flags.get(bit: DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(id: var.self);
3394	space = is_readonly ? `'t'` : `'u'`;
3395	resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
3396	}
3397
3398	break;
3399	}
3400	default:
3401	break;
3402	}
3403
3404	if (!space)
3405	return "";
3406
3407	uint32_t desc_set =
3408	resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : `0u`;
3409	uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : `0u`;
3410
3411	if (has_decoration(id: var.self, decoration: DecorationBinding))
3412	binding = get_decoration(id: var.self, decoration: DecorationBinding);
3413	if (has_decoration(id: var.self, decoration: DecorationDescriptorSet))
3414	desc_set = get_decoration(id: var.self, decoration: DecorationDescriptorSet);
3415
3416	return to_resource_register(flag: resource_flags, space, binding, set: desc_set);
3417	}
3418
3419	string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
3420	{
3421	// For combined image samplers.
3422	if (!has_decoration(id: var.self, decoration: DecorationBinding))
3423	return "";
3424
3425	return to_resource_register(flag: HLSL_BINDING_AUTO_SAMPLER_BIT, space: `'s'`, binding: get_decoration(id: var.self, decoration: DecorationBinding),
3426	set: get_decoration(id: var.self, decoration: DecorationDescriptorSet));
3427	}
3428
3429	void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding)
3430	{
3431	auto itr = resource_bindings.find(x: { .model: get_execution_model(), .desc_set: desc_set, .binding: binding });
3432	if (itr != end(cont&: resource_bindings))
3433	{
3434	auto &remap = itr ->second;
3435	remap.second = true;
3436
3437	switch (type)
3438	{
3439	case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT:
3440	case HLSL_BINDING_AUTO_CBV_BIT:
3441	desc_set = remap.first.cbv.register_space;
3442	binding = remap.first.cbv.register_binding;
3443	break;
3444
3445	case HLSL_BINDING_AUTO_SRV_BIT:
3446	desc_set = remap.first.srv.register_space;
3447	binding = remap.first.srv.register_binding;
3448	break;
3449
3450	case HLSL_BINDING_AUTO_SAMPLER_BIT:
3451	desc_set = remap.first.sampler.register_space;
3452	binding = remap.first.sampler.register_binding;
3453	break;
3454
3455	case HLSL_BINDING_AUTO_UAV_BIT:
3456	desc_set = remap.first.uav.register_space;
3457	binding = remap.first.uav.register_binding;
3458	break;
3459
3460	default:
3461	break;
3462	}
3463	}
3464	}
3465
3466	string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set)
3467	{
3468	if ((flag & resource_binding_flags) == `0`)
3469	{
3470	remap_hlsl_resource_binding(type: flag, desc_set&: space_set, binding);
3471
3472	// The push constant block did not have a binding, and there were no remap for it,
3473	// so, declare without register binding.
3474	if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet)
3475	return "";
3476
3477	if (hlsl_options.shader_model >= `51`)
3478	return join(ts: " : register(", ts&: space, ts&: binding, ts: ", space", ts&: space_set, ts: ")");
3479	else
3480	return join(ts: " : register(", ts&: space, ts&: binding, ts: ")");
3481	}
3482	else
3483	return "";
3484	}
3485
3486	void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
3487	{
3488	auto &type = get<SPIRType>(id: var.basetype);
3489	switch (type.basetype)
3490	{
3491	case SPIRType::SampledImage:
3492	case SPIRType::Image:
3493	{
3494	bool is_coherent = false;
3495	if (type.basetype == SPIRType::Image && type.image.sampled == `2`)
3496	is_coherent = has_decoration(id: var.self, decoration: DecorationCoherent);
3497
3498	statement(ts: is_coherent ? "globallycoherent " : "", ts: image_type_hlsl_modern(type, id: var.self), ts: " ",
3499	ts: to_name(id: var.self), ts: type_to_array_glsl(type), ts: to_resource_binding(var), ts: ";");
3500
3501	if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
3502	{
3503	// For combined image samplers, also emit a combined image sampler.
3504	if (is_depth_image(type, id: var.self))
3505	statement(ts: "SamplerComparisonState ", ts: to_sampler_expression(id: var.self), ts: type_to_array_glsl(type),
3506	ts: to_resource_binding_sampler(var), ts: ";");
3507	else
3508	statement(ts: "SamplerState ", ts: to_sampler_expression(id: var.self), ts: type_to_array_glsl(type),
3509	ts: to_resource_binding_sampler(var), ts: ";");
3510	}
3511	break;
3512	}
3513
3514	case SPIRType::Sampler:
3515	if (comparison_ids.count(x: var.self))
3516	statement(ts: "SamplerComparisonState ", ts: to_name(id: var.self), ts: type_to_array_glsl(type), ts: to_resource_binding(var),
3517	ts: ";");
3518	else
3519	statement(ts: "SamplerState ", ts: to_name(id: var.self), ts: type_to_array_glsl(type), ts: to_resource_binding(var), ts: ";");
3520	break;
3521
3522	default:
3523	statement(ts: variable_decl(variable: var), ts: to_resource_binding(var), ts: ";");
3524	break;
3525	}
3526	}
3527
3528	void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var)
3529	{
3530	auto &type = get<SPIRType>(id: var.basetype);
3531	switch (type.basetype)
3532	{
3533	case SPIRType::Sampler:
3534	case SPIRType::Image:
3535	SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL.");
3536
3537	default:
3538	statement(ts: variable_decl(variable: var), ts: ";");
3539	break;
3540	}
3541	}
3542
3543	void CompilerHLSL::emit_uniform(const SPIRVariable &var)
3544	{
3545	add_resource_name(id: var.self);
3546	if (hlsl_options.shader_model >= `40`)
3547	emit_modern_uniform(var);
3548	else
3549	emit_legacy_uniform(var);
3550	}
3551
3552	bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
3553	{
3554	return false;
3555	}
3556
3557	string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
3558	{
3559	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
3560	return type_to_glsl(type: out_type);
3561	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
3562	return type_to_glsl(type: out_type);
3563	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
3564	return "asuint";
3565	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
3566	return type_to_glsl(type: out_type);
3567	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
3568	return type_to_glsl(type: out_type);
3569	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
3570	return "asint";
3571	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
3572	return "asfloat";
3573	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
3574	return "asfloat";
3575	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
3576	SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL.");
3577	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
3578	SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL.");
3579	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
3580	return "asdouble";
3581	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
3582	return "asdouble";
3583	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == `1`)
3584	{
3585	if (!requires_explicit_fp16_packing)
3586	{
3587	requires_explicit_fp16_packing = true;
3588	force_recompile();
3589	}
3590	return "spvUnpackFloat2x16";
3591	}
3592	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == `2`)
3593	{
3594	if (!requires_explicit_fp16_packing)
3595	{
3596	requires_explicit_fp16_packing = true;
3597	force_recompile();
3598	}
3599	return "spvPackFloat2x16";
3600	}
3601	else
3602	return "";
3603	}
3604
3605	void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
3606	{
3607	auto op = static_cast<GLSLstd450>(eop);
3608
3609	// If we need to do implicit bitcasts, make sure we do it with the correct type.
3610	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length: count);
3611	auto int_type = to_signed_basetype(width: integer_width);
3612	auto uint_type = to_unsigned_basetype(width: integer_width);
3613
3614	op = get_remapped_glsl_op(std450_op: op);
3615
3616	switch (op)
3617	{
3618	case GLSLstd450InverseSqrt:
3619	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "rsqrt");
3620	break;
3621
3622	case GLSLstd450Fract:
3623	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "frac");
3624	break;
3625
3626	case GLSLstd450RoundEven:
3627	if (hlsl_options.shader_model < `40`)
3628	SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3.");
3629	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "round");
3630	break;
3631
3632	case GLSLstd450Acosh:
3633	case GLSLstd450Asinh:
3634	case GLSLstd450Atanh:
3635	SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL.");
3636
3637	case GLSLstd450FMix:
3638	case GLSLstd450IMix:
3639	emit_trinary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op2: args[`2`], op: "lerp");
3640	break;
3641
3642	case GLSLstd450Atan2:
3643	emit_binary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op: "atan2");
3644	break;
3645
3646	case GLSLstd450Fma:
3647	emit_trinary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op2: args[`2`], op: "mad");
3648	break;
3649
3650	case GLSLstd450InterpolateAtCentroid:
3651	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "EvaluateAttributeAtCentroid");
3652	break;
3653	case GLSLstd450InterpolateAtSample:
3654	emit_binary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op: "EvaluateAttributeAtSample");
3655	break;
3656	case GLSLstd450InterpolateAtOffset:
3657	emit_binary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op: "EvaluateAttributeSnapped");
3658	break;
3659
3660	case GLSLstd450PackHalf2x16:
3661	if (!requires_fp16_packing)
3662	{
3663	requires_fp16_packing = true;
3664	force_recompile();
3665	}
3666	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvPackHalf2x16");
3667	break;
3668
3669	case GLSLstd450UnpackHalf2x16:
3670	if (!requires_fp16_packing)
3671	{
3672	requires_fp16_packing = true;
3673	force_recompile();
3674	}
3675	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvUnpackHalf2x16");
3676	break;
3677
3678	case GLSLstd450PackSnorm4x8:
3679	if (!requires_snorm8_packing)
3680	{
3681	requires_snorm8_packing = true;
3682	force_recompile();
3683	}
3684	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvPackSnorm4x8");
3685	break;
3686
3687	case GLSLstd450UnpackSnorm4x8:
3688	if (!requires_snorm8_packing)
3689	{
3690	requires_snorm8_packing = true;
3691	force_recompile();
3692	}
3693	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvUnpackSnorm4x8");
3694	break;
3695
3696	case GLSLstd450PackUnorm4x8:
3697	if (!requires_unorm8_packing)
3698	{
3699	requires_unorm8_packing = true;
3700	force_recompile();
3701	}
3702	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvPackUnorm4x8");
3703	break;
3704
3705	case GLSLstd450UnpackUnorm4x8:
3706	if (!requires_unorm8_packing)
3707	{
3708	requires_unorm8_packing = true;
3709	force_recompile();
3710	}
3711	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvUnpackUnorm4x8");
3712	break;
3713
3714	case GLSLstd450PackSnorm2x16:
3715	if (!requires_snorm16_packing)
3716	{
3717	requires_snorm16_packing = true;
3718	force_recompile();
3719	}
3720	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvPackSnorm2x16");
3721	break;
3722
3723	case GLSLstd450UnpackSnorm2x16:
3724	if (!requires_snorm16_packing)
3725	{
3726	requires_snorm16_packing = true;
3727	force_recompile();
3728	}
3729	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvUnpackSnorm2x16");
3730	break;
3731
3732	case GLSLstd450PackUnorm2x16:
3733	if (!requires_unorm16_packing)
3734	{
3735	requires_unorm16_packing = true;
3736	force_recompile();
3737	}
3738	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvPackUnorm2x16");
3739	break;
3740
3741	case GLSLstd450UnpackUnorm2x16:
3742	if (!requires_unorm16_packing)
3743	{
3744	requires_unorm16_packing = true;
3745	force_recompile();
3746	}
3747	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvUnpackUnorm2x16");
3748	break;
3749
3750	case GLSLstd450PackDouble2x32:
3751	case GLSLstd450UnpackDouble2x32:
3752	SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
3753
3754	case GLSLstd450FindILsb:
3755	{
3756	auto basetype = expression_type(id: args[`0`]).basetype;
3757	emit_unary_func_op_cast(result_type, result_id: id, op0: args[`0`], op: "firstbitlow", input_type: basetype, expected_result_type: basetype);
3758	break;
3759	}
3760
3761	case GLSLstd450FindSMsb:
3762	emit_unary_func_op_cast(result_type, result_id: id, op0: args[`0`], op: "firstbithigh", input_type: int_type, expected_result_type: int_type);
3763	break;
3764
3765	case GLSLstd450FindUMsb:
3766	emit_unary_func_op_cast(result_type, result_id: id, op0: args[`0`], op: "firstbithigh", input_type: uint_type, expected_result_type: uint_type);
3767	break;
3768
3769	case GLSLstd450MatrixInverse:
3770	{
3771	auto &type = get<SPIRType>(id: result_type);
3772	if (type.vecsize == `2` && type.columns == `2`)
3773	{
3774	if (!requires_inverse_2x2)
3775	{
3776	requires_inverse_2x2 = true;
3777	force_recompile();
3778	}
3779	}
3780	else if (type.vecsize == `3` && type.columns == `3`)
3781	{
3782	if (!requires_inverse_3x3)
3783	{
3784	requires_inverse_3x3 = true;
3785	force_recompile();
3786	}
3787	}
3788	else if (type.vecsize == `4` && type.columns == `4`)
3789	{
3790	if (!requires_inverse_4x4)
3791	{
3792	requires_inverse_4x4 = true;
3793	force_recompile();
3794	}
3795	}
3796	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "spvInverse");
3797	break;
3798	}
3799
3800	case GLSLstd450Normalize:
3801	// HLSL does not support scalar versions here.
3802	if (expression_type(id: args[`0`]).vecsize == `1`)
3803	{
3804	// Returns -1 or 1 for valid input, sign() does the job.
3805	emit_unary_func_op(result_type, result_id: id, op0: args[`0`], op: "sign");
3806	}
3807	else
3808	CompilerGLSL::emit_glsl_op(result_type, result_id: id, op: eop, args, count);
3809	break;
3810
3811	case GLSLstd450Reflect:
3812	if (get<SPIRType>(id: result_type).vecsize == `1`)
3813	{
3814	if (!requires_scalar_reflect)
3815	{
3816	requires_scalar_reflect = true;
3817	force_recompile();
3818	}
3819	emit_binary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op: "spvReflect");
3820	}
3821	else
3822	CompilerGLSL::emit_glsl_op(result_type, result_id: id, op: eop, args, count);
3823	break;
3824
3825	case GLSLstd450Refract:
3826	if (get<SPIRType>(id: result_type).vecsize == `1`)
3827	{
3828	if (!requires_scalar_refract)
3829	{
3830	requires_scalar_refract = true;
3831	force_recompile();
3832	}
3833	emit_trinary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op2: args[`2`], op: "spvRefract");
3834	}
3835	else
3836	CompilerGLSL::emit_glsl_op(result_type, result_id: id, op: eop, args, count);
3837	break;
3838
3839	case GLSLstd450FaceForward:
3840	if (get<SPIRType>(id: result_type).vecsize == `1`)
3841	{
3842	if (!requires_scalar_faceforward)
3843	{
3844	requires_scalar_faceforward = true;
3845	force_recompile();
3846	}
3847	emit_trinary_func_op(result_type, result_id: id, op0: args[`0`], op1: args[`1`], op2: args[`2`], op: "spvFaceForward");
3848	}
3849	else
3850	CompilerGLSL::emit_glsl_op(result_type, result_id: id, op: eop, args, count);
3851	break;
3852
3853	default:
3854	CompilerGLSL::emit_glsl_op(result_type, result_id: id, op: eop, args, count);
3855	break;
3856	}
3857	}
3858
3859	void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain)
3860	{
3861	auto &type = get<SPIRType>(id: chain.basetype);
3862
3863	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
3864	auto ident = get_unique_identifier();
3865
3866	statement(ts: "[unroll]");
3867	statement(ts: "for (int ", ts&: ident, ts: " = 0; ", ts&: ident, ts: " < ", ts: to_array_size(type, index: uint32_t(type.array.size() - `1`)), ts: "; ",
3868	ts&: ident, ts: "++)");
3869	begin_scope();
3870	auto subchain = chain;
3871	subchain.dynamic_index = join(ts&: ident, ts: " * ", ts: chain.array_stride, ts: " + ", ts: chain.dynamic_index);
3872	subchain.basetype = type.parent_type;
3873	if (!get<SPIRType>(id: subchain.basetype).array.empty())
3874	subchain.array_stride = get_decoration(id: subchain.basetype, decoration: DecorationArrayStride);
3875	read_access_chain(expr: nullptr, lhs: join(ts: lhs, ts: "[", ts&: ident, ts: "]"), chain: subchain);
3876	end_scope();
3877	}
3878
3879	void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain)
3880	{
3881	auto &type = get<SPIRType>(id: chain.basetype);
3882	auto subchain = chain;
3883	uint32_t member_count = uint32_t(type.member_types.size());
3884
3885	for (uint32_t i = `0`; i < member_count; i++)
3886	{
3887	uint32_t offset = type_struct_member_offset(type, index: i);
3888	subchain.static_index = chain.static_index + offset;
3889	subchain.basetype = type.member_types [i];
3890
3891	subchain.matrix_stride = `0`;
3892	subchain.array_stride = `0`;
3893	subchain.row_major_matrix = false;
3894
3895	auto &member_type = get<SPIRType>(id: subchain.basetype);
3896	if (member_type.columns > `1`)
3897	{
3898	subchain.matrix_stride = type_struct_member_matrix_stride(type, index: i);
3899	subchain.row_major_matrix = has_member_decoration(id: type.self, index: i, decoration: DecorationRowMajor);
3900	}
3901
3902	if (!member_type.array.empty())
3903	subchain.array_stride = type_struct_member_array_stride(type, index: i);
3904
3905	read_access_chain(expr: nullptr, lhs: join(ts: lhs, ts: ".", ts: to_member_name(type, index: i)), chain: subchain);
3906	}
3907	}
3908
3909	void CompilerHLSL::read_access_chain(string expr, const* string &lhs, const SPIRAccessChain &chain)
3910	{
3911	auto &type = get<SPIRType>(id: chain.basetype);
3912
3913	SPIRType target_type;
3914	target_type.basetype = SPIRType::UInt;
3915	target_type.vecsize = type.vecsize;
3916	target_type.columns = type.columns;
3917
3918	if (!type.array.empty())
3919	{
3920	read_access_chain_array(lhs, chain);
3921	return;
3922	}
3923	else if (type.basetype == SPIRType::Struct)
3924	{
3925	read_access_chain_struct(lhs, chain);
3926	return;
3927	}
3928	else if (type.width != `32` && !hlsl_options.enable_16bit_types)
3929	SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and "
3930	"native 16-bit types are enabled.");
3931
3932	string base = chain.base;
3933	if (has_decoration(id: chain.self, decoration: DecorationNonUniform))
3934	convert_non_uniform_expression(expr&: base, ptr_id: chain.self);
3935
3936	bool templated_load = hlsl_options.shader_model >= `62`;
3937	string load_expr;
3938
3939	string template_expr;
3940	if (templated_load)
3941	template_expr = join(ts: "<", ts: type_to_glsl(type), ts: ">");
3942
3943	// Load a vector or scalar.
3944	if (type.columns == `1` && !chain.row_major_matrix)
3945	{
3946	const char load_op = nullptr*;
3947	switch (type.vecsize)
3948	{
3949	case `1`:
3950	load_op = "Load";
3951	break;
3952	case `2`:
3953	load_op = "Load2";
3954	break;
3955	case `3`:
3956	load_op = "Load3";
3957	break;
3958	case `4`:
3959	load_op = "Load4";
3960	break;
3961	default:
3962	SPIRV_CROSS_THROW("Unknown vector size.");
3963	}
3964
3965	if (templated_load)
3966	load_op = "Load";
3967
3968	load_expr = join(ts&: base, ts: ".", ts&: load_op, ts&: template_expr, ts: "(", ts: chain.dynamic_index, ts: chain.static_index, ts: ")");
3969	}
3970	else if (type.columns == `1`)
3971	{
3972	// Strided load since we are loading a column from a row-major matrix.
3973	if (templated_load)
3974	{
3975	auto scalar_type = type;
3976	scalar_type.vecsize = `1`;
3977	scalar_type.columns = `1`;
3978	template_expr = join(ts: "<", ts: type_to_glsl(type: scalar_type), ts: ">");
3979	if (type.vecsize > `1`)
3980	load_expr += type_to_glsl(type) + "(";
3981	}
3982	else if (type.vecsize > `1`)
3983	{
3984	load_expr = type_to_glsl(type: target_type);
3985	load_expr += "(";
3986	}
3987
3988	for (uint32_t r = `0`; r < type.vecsize; r++)
3989	{
3990	load_expr += join(ts&: base, ts: ".Load", ts&: template_expr, ts: "(", ts: chain.dynamic_index,
3991	ts: chain.static_index + r * chain.matrix_stride, ts: ")");
3992	if (r + `1` < type.vecsize)
3993	load_expr += ", ";
3994	}
3995
3996	if (type.vecsize > `1`)
3997	load_expr += ")";
3998	}
3999	else if (!chain.row_major_matrix)
4000	{
4001	// Load a matrix, column-major, the easy case.
4002	const char load_op = nullptr*;
4003	switch (type.vecsize)
4004	{
4005	case `1`:
4006	load_op = "Load";
4007	break;
4008	case `2`:
4009	load_op = "Load2";
4010	break;
4011	case `3`:
4012	load_op = "Load3";
4013	break;
4014	case `4`:
4015	load_op = "Load4";
4016	break;
4017	default:
4018	SPIRV_CROSS_THROW("Unknown vector size.");
4019	}
4020
4021	if (templated_load)
4022	{
4023	auto vector_type = type;
4024	vector_type.columns = `1`;
4025	template_expr = join(ts: "<", ts: type_to_glsl(type: vector_type), ts: ">");
4026	load_expr = type_to_glsl(type);
4027	load_op = "Load";
4028	}
4029	else
4030	{
4031	// Note, this loading style in HLSL is actually* row-major, but we always treat matrices as transposed in this backend,*
4032	// so row-major is technically column-major ...
4033	load_expr = type_to_glsl(type: target_type);
4034	}
4035	load_expr += "(";
4036
4037	for (uint32_t c = `0`; c < type.columns; c++)
4038	{
4039	load_expr += join(ts&: base, ts: ".", ts&: load_op, ts&: template_expr, ts: "(", ts: chain.dynamic_index,
4040	ts: chain.static_index + c * chain.matrix_stride, ts: ")");
4041	if (c + `1` < type.columns)
4042	load_expr += ", ";
4043	}
4044	load_expr += ")";
4045	}
4046	else
4047	{
4048	// Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern
4049	// considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ...
4050
4051	if (templated_load)
4052	{
4053	load_expr = type_to_glsl(type);
4054	auto scalar_type = type;
4055	scalar_type.vecsize = `1`;
4056	scalar_type.columns = `1`;
4057	template_expr = join(ts: "<", ts: type_to_glsl(type: scalar_type), ts: ">");
4058	}
4059	else
4060	load_expr = type_to_glsl(type: target_type);
4061
4062	load_expr += "(";
4063
4064	for (uint32_t c = `0`; c < type.columns; c++)
4065	{
4066	for (uint32_t r = `0`; r < type.vecsize; r++)
4067	{
4068	load_expr += join(ts&: base, ts: ".Load", ts&: template_expr, ts: "(", ts: chain.dynamic_index,
4069	ts: chain.static_index + c * (type.width / `8`) + r * chain.matrix_stride, ts: ")");
4070
4071	if ((r + `1` < type.vecsize) \|\| (c + `1` < type.columns))
4072	load_expr += ", ";
4073	}
4074	}
4075	load_expr += ")";
4076	}
4077
4078	if (!templated_load)
4079	{
4080	auto bitcast_op = bitcast_glsl_op(out_type: type, in_type: target_type);
4081	if (!bitcast_op.empty())
4082	load_expr = join(ts&: bitcast_op, ts: "(", ts&: load_expr, ts: ")");
4083	}
4084
4085	if (lhs.empty())
4086	{
4087	assert(expr);
4088	*expr = std::move(load_expr);
4089	}
4090	else
4091	statement(ts: lhs, ts: " = ", ts&: load_expr, ts: ";");
4092	}
4093
4094	void CompilerHLSL::emit_load(const Instruction &instruction)
4095	{
4096	auto ops = stream(instr: instruction);
4097
4098	auto *chain = maybe_get<SPIRAccessChain>(id: ops[`2`]);
4099	if (chain)
4100	{
4101	uint32_t result_type = ops[`0`];
4102	uint32_t id = ops[`1`];
4103	uint32_t ptr = ops[`2`];
4104
4105	auto &type = get<SPIRType>(id: result_type);
4106	bool composite_load = !type.array.empty() \|\| type.basetype == SPIRType::Struct;
4107
4108	if (composite_load)
4109	{
4110	// We cannot make this work in one single expression as we might have nested structures and arrays,
4111	// so unroll the load to an uninitialized temporary.
4112	emit_uninitialized_temporary_expression(type: result_type, id);
4113	read_access_chain(expr: nullptr, lhs: to_expression(id), chain: *chain);
4114	track_expression_read(id: chain->self);
4115	}
4116	else
4117	{
4118	string load_expr;
4119	read_access_chain(expr: &load_expr, lhs: "", chain: *chain);
4120
4121	bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
4122
4123	// If we are forwarding this load,
4124	// don't register the read to access chain here, defer that to when we actually use the expression,
4125	// using the add_implied_read_expression mechanism.
4126	if (!forward)
4127	track_expression_read(id: chain->self);
4128
4129	// Do not forward complex load sequences like matrices, structs and arrays.
4130	if (type.columns > `1`)
4131	forward = false;
4132
4133	auto &e = emit_op(result_type, result_id: id, rhs: load_expr, forward_rhs: forward, suppress_usage_tracking: true);
4134	e.need_transpose = false;
4135	register_read(expr: id, chain: ptr, forwarded: forward);
4136	inherit_expression_dependencies(dst: id, source: ptr);
4137	if (forward)
4138	add_implied_read_expression(e, source: chain->self);
4139	}
4140	}
4141	else
4142	CompilerGLSL::emit_instruction(instr: instruction);
4143	}
4144
4145	void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
4146	const SmallVector<uint32_t> &composite_chain)
4147	{
4148	auto &type = get<SPIRType>(id: chain.basetype);
4149
4150	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
4151	auto ident = get_unique_identifier();
4152
4153	uint32_t id = ir.increase_bound_by(count: `2`);
4154	uint32_t int_type_id = id + `1`;
4155	SPIRType int_type;
4156	int_type.basetype = SPIRType::Int;
4157	int_type.width = `32`;
4158	set<SPIRType>(id: int_type_id, args&: int_type);
4159	set<SPIRExpression>(id, args&: ident, args&: int_type_id, args: true);
4160	set_name(id, name: ident);
4161	suppressed_usage_tracking.insert(x: id);
4162
4163	statement(ts: "[unroll]");
4164	statement(ts: "for (int ", ts&: ident, ts: " = 0; ", ts&: ident, ts: " < ", ts: to_array_size(type, index: uint32_t(type.array.size() - `1`)), ts: "; ",
4165	ts&: ident, ts: "++)");
4166	begin_scope();
4167	auto subchain = chain;
4168	subchain.dynamic_index = join(ts&: ident, ts: " * ", ts: chain.array_stride, ts: " + ", ts: chain.dynamic_index);
4169	subchain.basetype = type.parent_type;
4170
4171	// Forcefully allow us to use an ID here by setting MSB.
4172	auto subcomposite_chain = composite_chain;
4173	subcomposite_chain.push_back(t: `0x80000000u` \| id);
4174
4175	if (!get<SPIRType>(id: subchain.basetype).array.empty())
4176	subchain.array_stride = get_decoration(id: subchain.basetype, decoration: DecorationArrayStride);
4177
4178	write_access_chain(chain: subchain, value, composite_chain: subcomposite_chain);
4179	end_scope();
4180	}
4181
4182	void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
4183	const SmallVector<uint32_t> &composite_chain)
4184	{
4185	auto &type = get<SPIRType>(id: chain.basetype);
4186	uint32_t member_count = uint32_t(type.member_types.size());
4187	auto subchain = chain;
4188
4189	auto subcomposite_chain = composite_chain;
4190	subcomposite_chain.push_back(t: `0`);
4191
4192	for (uint32_t i = `0`; i < member_count; i++)
4193	{
4194	uint32_t offset = type_struct_member_offset(type, index: i);
4195	subchain.static_index = chain.static_index + offset;
4196	subchain.basetype = type.member_types [i];
4197
4198	subchain.matrix_stride = `0`;
4199	subchain.array_stride = `0`;
4200	subchain.row_major_matrix = false;
4201
4202	auto &member_type = get<SPIRType>(id: subchain.basetype);
4203	if (member_type.columns > `1`)
4204	{
4205	subchain.matrix_stride = type_struct_member_matrix_stride(type, index: i);
4206	subchain.row_major_matrix = has_member_decoration(id: type.self, index: i, decoration: DecorationRowMajor);
4207	}
4208
4209	if (!member_type.array.empty())
4210	subchain.array_stride = type_struct_member_array_stride(type, index: i);
4211
4212	subcomposite_chain.back() = i;
4213	write_access_chain(chain: subchain, value, composite_chain: subcomposite_chain);
4214	}
4215	}
4216
4217	string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain,
4218	bool enclose)
4219	{
4220	string ret;
4221	if (composite_chain.empty())
4222	ret = to_expression(id: value);
4223	else
4224	{
4225	AccessChainMeta meta;
4226	ret = access_chain_internal(base: value, indices: composite_chain.data(), count: uint32_t(composite_chain.size()),
4227	flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT \| ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, meta: &meta);
4228	}
4229
4230	if (enclose)
4231	ret = enclose_expression(expr: ret);
4232	return ret;
4233	}
4234
4235	void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value,
4236	const SmallVector<uint32_t> &composite_chain)
4237	{
4238	auto &type = get<SPIRType>(id: chain.basetype);
4239
4240	// Make sure we trigger a read of the constituents in the access chain.
4241	track_expression_read(id: chain.self);
4242
4243	SPIRType target_type;
4244	target_type.basetype = SPIRType::UInt;
4245	target_type.vecsize = type.vecsize;
4246	target_type.columns = type.columns;
4247
4248	if (!type.array.empty())
4249	{
4250	write_access_chain_array(chain, value, composite_chain);
4251	register_write(chain: chain.self);
4252	return;
4253	}
4254	else if (type.basetype == SPIRType::Struct)
4255	{
4256	write_access_chain_struct(chain, value, composite_chain);
4257	register_write(chain: chain.self);
4258	return;
4259	}
4260	else if (type.width != `32` && !hlsl_options.enable_16bit_types)
4261	SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and "
4262	"native 16-bit types are enabled.");
4263
4264	bool templated_store = hlsl_options.shader_model >= `62`;
4265
4266	auto base = chain.base;
4267	if (has_decoration(id: chain.self, decoration: DecorationNonUniform))
4268	convert_non_uniform_expression(expr&: base, ptr_id: chain.self);
4269
4270	string template_expr;
4271	if (templated_store)
4272	template_expr = join(ts: "<", ts: type_to_glsl(type), ts: ">");
4273
4274	if (type.columns == `1` && !chain.row_major_matrix)
4275	{
4276	const char store_op = nullptr*;
4277	switch (type.vecsize)
4278	{
4279	case `1`:
4280	store_op = "Store";
4281	break;
4282	case `2`:
4283	store_op = "Store2";
4284	break;
4285	case `3`:
4286	store_op = "Store3";
4287	break;
4288	case `4`:
4289	store_op = "Store4";
4290	break;
4291	default:
4292	SPIRV_CROSS_THROW("Unknown vector size.");
4293	}
4294
4295	auto store_expr = write_access_chain_value(value, composite_chain, enclose: false);
4296
4297	if (!templated_store)
4298	{
4299	auto bitcast_op = bitcast_glsl_op(out_type: target_type, in_type: type);
4300	if (!bitcast_op.empty())
4301	store_expr = join(ts&: bitcast_op, ts: "(", ts&: store_expr, ts: ")");
4302	}
4303	else
4304	store_op = "Store";
4305	statement(ts&: base, ts: ".", ts&: store_op, ts&: template_expr, ts: "(", ts: chain.dynamic_index, ts: chain.static_index, ts: ", ",
4306	ts&: store_expr, ts: ");");
4307	}
4308	else if (type.columns == `1`)
4309	{
4310	if (templated_store)
4311	{
4312	auto scalar_type = type;
4313	scalar_type.vecsize = `1`;
4314	scalar_type.columns = `1`;
4315	template_expr = join(ts: "<", ts: type_to_glsl(type: scalar_type), ts: ">");
4316	}
4317
4318	// Strided store.
4319	for (uint32_t r = `0`; r < type.vecsize; r++)
4320	{
4321	auto store_expr = write_access_chain_value(value, composite_chain, enclose: true);
4322	if (type.vecsize > `1`)
4323	{
4324	store_expr += ".";
4325	store_expr += index_to_swizzle(index: r);
4326	}
4327	remove_duplicate_swizzle(op&: store_expr);
4328
4329	if (!templated_store)
4330	{
4331	auto bitcast_op = bitcast_glsl_op(out_type: target_type, in_type: type);
4332	if (!bitcast_op.empty())
4333	store_expr = join(ts&: bitcast_op, ts: "(", ts&: store_expr, ts: ")");
4334	}
4335
4336	statement(ts&: base, ts: ".Store", ts&: template_expr, ts: "(", ts: chain.dynamic_index,
4337	ts: chain.static_index + chain.matrix_stride * r, ts: ", ", ts&: store_expr, ts: ");");
4338	}
4339	}
4340	else if (!chain.row_major_matrix)
4341	{
4342	const char store_op = nullptr*;
4343	switch (type.vecsize)
4344	{
4345	case `1`:
4346	store_op = "Store";
4347	break;
4348	case `2`:
4349	store_op = "Store2";
4350	break;
4351	case `3`:
4352	store_op = "Store3";
4353	break;
4354	case `4`:
4355	store_op = "Store4";
4356	break;
4357	default:
4358	SPIRV_CROSS_THROW("Unknown vector size.");
4359	}
4360
4361	if (templated_store)
4362	{
4363	store_op = "Store";
4364	auto vector_type = type;
4365	vector_type.columns = `1`;
4366	template_expr = join(ts: "<", ts: type_to_glsl(type: vector_type), ts: ">");
4367	}
4368
4369	for (uint32_t c = `0`; c < type.columns; c++)
4370	{
4371	auto store_expr = join(ts: write_access_chain_value(value, composite_chain, enclose: true), ts: "[", ts&: c, ts: "]");
4372
4373	if (!templated_store)
4374	{
4375	auto bitcast_op = bitcast_glsl_op(out_type: target_type, in_type: type);
4376	if (!bitcast_op.empty())
4377	store_expr = join(ts&: bitcast_op, ts: "(", ts&: store_expr, ts: ")");
4378	}
4379
4380	statement(ts&: base, ts: ".", ts&: store_op, ts&: template_expr, ts: "(", ts: chain.dynamic_index,
4381	ts: chain.static_index + c * chain.matrix_stride, ts: ", ", ts&: store_expr, ts: ");");
4382	}
4383	}
4384	else
4385	{
4386	if (templated_store)
4387	{
4388	auto scalar_type = type;
4389	scalar_type.vecsize = `1`;
4390	scalar_type.columns = `1`;
4391	template_expr = join(ts: "<", ts: type_to_glsl(type: scalar_type), ts: ">");
4392	}
4393
4394	for (uint32_t r = `0`; r < type.vecsize; r++)
4395	{
4396	for (uint32_t c = `0`; c < type.columns; c++)
4397	{
4398	auto store_expr =
4399	join(ts: write_access_chain_value(value, composite_chain, enclose: true), ts: "[", ts&: c, ts: "].", ts: index_to_swizzle(index: r));
4400	remove_duplicate_swizzle(op&: store_expr);
4401	auto bitcast_op = bitcast_glsl_op(out_type: target_type, in_type: type);
4402	if (!bitcast_op.empty())
4403	store_expr = join(ts&: bitcast_op, ts: "(", ts&: store_expr, ts: ")");
4404	statement(ts&: base, ts: ".Store", ts&: template_expr, ts: "(", ts: chain.dynamic_index,
4405	ts: chain.static_index + c * (type.width / `8`) + r * chain.matrix_stride, ts: ", ", ts&: store_expr, ts: ");");
4406	}
4407	}
4408	}
4409
4410	register_write(chain: chain.self);
4411	}
4412
4413	void CompilerHLSL::emit_store(const Instruction &instruction)
4414	{
4415	auto ops = stream(instr: instruction);
4416	auto *chain = maybe_get<SPIRAccessChain>(id: ops[`0`]);
4417	if (chain)
4418	write_access_chain(chain: *chain, value: ops[`1`], composite_chain: {});
4419	else
4420	CompilerGLSL::emit_instruction(instr: instruction);
4421	}
4422
4423	void CompilerHLSL::emit_access_chain(const Instruction &instruction)
4424	{
4425	auto ops = stream(instr: instruction);
4426	uint32_t length = instruction.length;
4427
4428	bool need_byte_access_chain = false;
4429	auto &type = expression_type(id: ops[`2`]);
4430	const auto *chain = maybe_get<SPIRAccessChain>(id: ops[`2`]);
4431
4432	if (chain)
4433	{
4434	// Keep tacking on an existing access chain.
4435	need_byte_access_chain = true;
4436	}
4437	else if (type.storage == StorageClassStorageBuffer \|\| has_decoration(id: type.self, decoration: DecorationBufferBlock))
4438	{
4439	// If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need
4440	// to emit SPIRAccessChain rather than a plain SPIRExpression.
4441	uint32_t chain_arguments = length - `3`;
4442	if (chain_arguments > type.array.size())
4443	need_byte_access_chain = true;
4444	}
4445
4446	if (need_byte_access_chain)
4447	{
4448	// If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block,
4449	// and not array of SSBO.
4450	uint32_t to_plain_buffer_length = chain ? `0u` : static_cast<uint32_t>(type.array.size());
4451
4452	auto *backing_variable = maybe_get_backing_variable(chain: ops[`2`]);
4453
4454	string base;
4455	if (to_plain_buffer_length != `0`)
4456	base = access_chain(base: ops[`2`], indices: &ops[`3`], count: to_plain_buffer_length, target_type: get<SPIRType>(id: ops[`0`]));
4457	else if (chain)
4458	base = chain->base;
4459	else
4460	base = to_expression(id: ops[`2`]);
4461
4462	// Start traversing type hierarchy at the proper non-pointer types.
4463	auto *basetype = &get_pointee_type(type);
4464
4465	// Traverse the type hierarchy down to the actual buffer types.
4466	for (uint32_t i = `0`; i < to_plain_buffer_length; i++)
4467	{
4468	assert(basetype->parent_type);
4469	basetype = &get<SPIRType>(id: basetype->parent_type);
4470	}
4471
4472	uint32_t matrix_stride = `0`;
4473	uint32_t array_stride = `0`;
4474	bool row_major_matrix = false;
4475
4476	// Inherit matrix information.
4477	if (chain)
4478	{
4479	matrix_stride = chain->matrix_stride;
4480	row_major_matrix = chain->row_major_matrix;
4481	array_stride = chain->array_stride;
4482	}
4483
4484	auto offsets = flattened_access_chain_offset(basetype: *basetype, indices: &ops[`3` + to_plain_buffer_length],
4485	count: length - `3` - to_plain_buffer_length, offset: `0`, word_stride: `1`, need_transpose: &row_major_matrix,
4486	matrix_stride: &matrix_stride, array_stride: &array_stride);
4487
4488	auto &e = set<SPIRAccessChain>(id: ops[`1`], args: ops[`0`], args: type.storage, args&: base, args&: offsets.first, args&: offsets.second);
4489	e.row_major_matrix = row_major_matrix;
4490	e.matrix_stride = matrix_stride;
4491	e.array_stride = array_stride;
4492	e.immutable = should_forward(id: ops[`2`]);
4493	e.loaded_from = backing_variable ? backing_variable->self : ID (`0`);
4494
4495	if (chain)
4496	{
4497	e.dynamic_index += chain->dynamic_index;
4498	e.static_index += chain->static_index;
4499	}
4500
4501	for (uint32_t i = `2`; i < length; i++)
4502	{
4503	inherit_expression_dependencies(dst: ops[`1`], source: ops[i]);
4504	add_implied_read_expression(e, source: ops[i]);
4505	}
4506	}
4507	else
4508	{
4509	CompilerGLSL::emit_instruction(instr: instruction);
4510	}
4511	}
4512
4513	void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
4514	{
4515	const char atomic_op = nullptr*;
4516
4517	string value_expr;
4518	if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore)
4519	value_expr = to_expression(id: ops[op == OpAtomicCompareExchange ? `6` : `5`]);
4520
4521	bool is_atomic_store = false;
4522
4523	switch (op)
4524	{
4525	case OpAtomicIIncrement:
4526	atomic_op = "InterlockedAdd";
4527	value_expr = "1";
4528	break;
4529
4530	case OpAtomicIDecrement:
4531	atomic_op = "InterlockedAdd";
4532	value_expr = "-1";
4533	break;
4534
4535	case OpAtomicLoad:
4536	atomic_op = "InterlockedAdd";
4537	value_expr = "0";
4538	break;
4539
4540	case OpAtomicISub:
4541	atomic_op = "InterlockedAdd";
4542	value_expr = join(ts: "-", ts: enclose_expression(expr: value_expr));
4543	break;
4544
4545	case OpAtomicSMin:
4546	case OpAtomicUMin:
4547	atomic_op = "InterlockedMin";
4548	break;
4549
4550	case OpAtomicSMax:
4551	case OpAtomicUMax:
4552	atomic_op = "InterlockedMax";
4553	break;
4554
4555	case OpAtomicAnd:
4556	atomic_op = "InterlockedAnd";
4557	break;
4558
4559	case OpAtomicOr:
4560	atomic_op = "InterlockedOr";
4561	break;
4562
4563	case OpAtomicXor:
4564	atomic_op = "InterlockedXor";
4565	break;
4566
4567	case OpAtomicIAdd:
4568	atomic_op = "InterlockedAdd";
4569	break;
4570
4571	case OpAtomicExchange:
4572	atomic_op = "InterlockedExchange";
4573	break;
4574
4575	case OpAtomicStore:
4576	atomic_op = "InterlockedExchange";
4577	is_atomic_store = true;
4578	break;
4579
4580	case OpAtomicCompareExchange:
4581	if (length < `8`)
4582	SPIRV_CROSS_THROW("Not enough data for opcode.");
4583	atomic_op = "InterlockedCompareExchange";
4584	value_expr = join(ts: to_expression(id: ops[`7`]), ts: ", ", ts&: value_expr);
4585	break;
4586
4587	default:
4588	SPIRV_CROSS_THROW("Unknown atomic opcode.");
4589	}
4590
4591	if (is_atomic_store)
4592	{
4593	auto &data_type = expression_type(id: ops[`0`]);
4594	auto *chain = maybe_get<SPIRAccessChain>(id: ops[`0`]);
4595
4596	auto &tmp_id = extra_sub_expressions [ops[`0`]];
4597	if (!tmp_id)
4598	{
4599	tmp_id = ir.increase_bound_by(count: `1`);
4600	emit_uninitialized_temporary_expression(type: get_pointee_type(type: data_type).self, id: tmp_id);
4601	}
4602
4603	if (data_type.storage == StorageClassImage \|\| !chain)
4604	{
4605	statement(ts&: atomic_op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[`0`]), ts: ", ",
4606	ts: to_expression(id: ops[`3`]), ts: ", ", ts: to_expression(id: tmp_id), ts: ");");
4607	}
4608	else
4609	{
4610	string base = chain->base;
4611	if (has_decoration(id: chain->self, decoration: DecorationNonUniform))
4612	convert_non_uniform_expression(expr&: base, ptr_id: chain->self);
4613	// RWByteAddress buffer is always uint in its underlying type.
4614	statement(ts&: base, ts: ".", ts&: atomic_op, ts: "(", ts&: chain->dynamic_index, ts&: chain->static_index, ts: ", ",
4615	ts: to_expression(id: ops[`3`]), ts: ", ", ts: to_expression(id: tmp_id), ts: ");");
4616	}
4617	}
4618	else
4619	{
4620	uint32_t result_type = ops[`0`];
4621	uint32_t id = ops[`1`];
4622	forced_temporaries.insert(x: ops[`1`]);
4623
4624	auto &type = get<SPIRType>(id: result_type);
4625	statement(ts: variable_decl(type, name: to_name(id)), ts: ";");
4626
4627	auto &data_type = expression_type(id: ops[`2`]);
4628	auto *chain = maybe_get<SPIRAccessChain>(id: ops[`2`]);
4629	SPIRType::BaseType expr_type;
4630	if (data_type.storage == StorageClassImage \|\| !chain)
4631	{
4632	statement(ts&: atomic_op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", ", ts&: value_expr, ts: ", ", ts: to_name(id), ts: ");");
4633	expr_type = data_type.basetype;
4634	}
4635	else
4636	{
4637	// RWByteAddress buffer is always uint in its underlying type.
4638	string base = chain->base;
4639	if (has_decoration(id: chain->self, decoration: DecorationNonUniform))
4640	convert_non_uniform_expression(expr&: base, ptr_id: chain->self);
4641	expr_type = SPIRType::UInt;
4642	statement(ts&: base, ts: ".", ts&: atomic_op, ts: "(", ts&: chain->dynamic_index, ts&: chain->static_index, ts: ", ", ts&: value_expr,
4643	ts: ", ", ts: to_name(id), ts: ");");
4644	}
4645
4646	auto expr = bitcast_expression(target_type: type, expr_type, expr: to_name(id));
4647	set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
4648	}
4649	flush_all_atomic_capable_variables();
4650	}
4651
4652	void CompilerHLSL::emit_subgroup_op(const Instruction &i)
4653	{
4654	if (hlsl_options.shader_model < `60`)
4655	SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher.");
4656
4657	const uint32_t *ops = stream(instr: i);
4658	auto op = static_cast<Op>(i.op);
4659
4660	uint32_t result_type = ops[`0`];
4661	uint32_t id = ops[`1`];
4662
4663	auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[`2`]));
4664	if (scope != ScopeSubgroup)
4665	SPIRV_CROSS_THROW("Only subgroup scope is supported.");
4666
4667	const auto make_inclusive_Sum = [&](const string &expr) -> string {
4668	return join(ts: expr, ts: " + ", ts: to_expression(id: ops[`4`]));
4669	};
4670
4671	const auto make_inclusive_Product = [&](const string &expr) -> string {
4672	return join(ts: expr, ts: " * ", ts: to_expression(id: ops[`4`]));
4673	};
4674
4675	// If we need to do implicit bitcasts, make sure we do it with the correct type.
4676	uint32_t integer_width = get_integer_width_for_instruction(instr: i);
4677	auto int_type = to_signed_basetype(width: integer_width);
4678	auto uint_type = to_unsigned_basetype(width: integer_width);
4679
4680	#define make_inclusive_BitAnd(expr) ""
4681	#define make_inclusive_BitOr(expr) ""
4682	#define make_inclusive_BitXor(expr) ""
4683	#define make_inclusive_Min(expr) ""
4684	#define make_inclusive_Max(expr) ""
4685
4686	switch (op)
4687	{
4688	case OpGroupNonUniformElect:
4689	emit_op(result_type, result_id: id, rhs: "WaveIsFirstLane()", forward_rhs: true);
4690	break;
4691
4692	case OpGroupNonUniformBroadcast:
4693	emit_binary_func_op(result_type, result_id: id, op0: ops[`3`], op1: ops[`4`], op: "WaveReadLaneAt");
4694	break;
4695
4696	case OpGroupNonUniformBroadcastFirst:
4697	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "WaveReadLaneFirst");
4698	break;
4699
4700	case OpGroupNonUniformBallot:
4701	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "WaveActiveBallot");
4702	break;
4703
4704	case OpGroupNonUniformInverseBallot:
4705	SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL.");
4706
4707	case OpGroupNonUniformBallotBitExtract:
4708	SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL.");
4709
4710	case OpGroupNonUniformBallotFindLSB:
4711	SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL.");
4712
4713	case OpGroupNonUniformBallotFindMSB:
4714	SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL.");
4715
4716	case OpGroupNonUniformBallotBitCount:
4717	{
4718	auto operation = static_cast<GroupOperation>(ops[`3`]);
4719	if (operation == GroupOperationReduce)
4720	{
4721	bool forward = should_forward(id: ops[`4`]);
4722	auto left = join(ts: "countbits(", ts: to_enclosed_expression(id: ops[`4`]), ts: ".x) + countbits(",
4723	ts: to_enclosed_expression(id: ops[`4`]), ts: ".y)");
4724	auto right = join(ts: "countbits(", ts: to_enclosed_expression(id: ops[`4`]), ts: ".z) + countbits(",
4725	ts: to_enclosed_expression(id: ops[`4`]), ts: ".w)");
4726	emit_op(result_type, result_id: id, rhs: join(ts&: left, ts: " + ", ts&: right), forward_rhs: forward);
4727	inherit_expression_dependencies(dst: id, source: ops[`4`]);
4728	}
4729	else if (operation == GroupOperationInclusiveScan)
4730	SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL.");
4731	else if (operation == GroupOperationExclusiveScan)
4732	SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL.");
4733	else
4734	SPIRV_CROSS_THROW("Invalid BitCount operation.");
4735	break;
4736	}
4737
4738	case OpGroupNonUniformShuffle:
4739	emit_binary_func_op(result_type, result_id: id, op0: ops[`3`], op1: ops[`4`], op: "WaveReadLaneAt");
4740	break;
4741	case OpGroupNonUniformShuffleXor:
4742	{
4743	bool forward = should_forward(id: ops[`3`]);
4744	emit_op(result_type: ops[`0`], result_id: ops[`1`],
4745	rhs: join(ts: "WaveReadLaneAt(", ts: to_unpacked_expression(id: ops[`3`]), ts: ", ",
4746	ts: "WaveGetLaneIndex() ^ ", ts: to_enclosed_expression(id: ops[`4`]), ts: ")"), forward_rhs: forward);
4747	inherit_expression_dependencies(dst: ops[`1`], source: ops[`3`]);
4748	break;
4749	}
4750	case OpGroupNonUniformShuffleUp:
4751	{
4752	bool forward = should_forward(id: ops[`3`]);
4753	emit_op(result_type: ops[`0`], result_id: ops[`1`],
4754	rhs: join(ts: "WaveReadLaneAt(", ts: to_unpacked_expression(id: ops[`3`]), ts: ", ",
4755	ts: "WaveGetLaneIndex() - ", ts: to_enclosed_expression(id: ops[`4`]), ts: ")"), forward_rhs: forward);
4756	inherit_expression_dependencies(dst: ops[`1`], source: ops[`3`]);
4757	break;
4758	}
4759	case OpGroupNonUniformShuffleDown:
4760	{
4761	bool forward = should_forward(id: ops[`3`]);
4762	emit_op(result_type: ops[`0`], result_id: ops[`1`],
4763	rhs: join(ts: "WaveReadLaneAt(", ts: to_unpacked_expression(id: ops[`3`]), ts: ", ",
4764	ts: "WaveGetLaneIndex() + ", ts: to_enclosed_expression(id: ops[`4`]), ts: ")"), forward_rhs: forward);
4765	inherit_expression_dependencies(dst: ops[`1`], source: ops[`3`]);
4766	break;
4767	}
4768
4769	case OpGroupNonUniformAll:
4770	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "WaveActiveAllTrue");
4771	break;
4772
4773	case OpGroupNonUniformAny:
4774	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "WaveActiveAnyTrue");
4775	break;
4776
4777	case OpGroupNonUniformAllEqual:
4778	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "WaveActiveAllEqual");
4779	break;
4780
4781	// clang-format off
4782	#define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \
4783	case OpGroupNonUniform##op: \
4784	{ \
4785	auto operation = static_cast<GroupOperation>(ops[3]); \
4786	if (operation == GroupOperationReduce) \
4787	emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \
4788	else if (operation == GroupOperationInclusiveScan && supports_scan) \
4789	{ \
4790	bool forward = should_forward(ops[4]); \
4791	emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \
4792	inherit_expression_dependencies(id, ops[4]); \
4793	} \
4794	else if (operation == GroupOperationExclusiveScan && supports_scan) \
4795	emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \
4796	else if (operation == GroupOperationClusteredReduce) \
4797	SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \
4798	else \
4799	SPIRV_CROSS_THROW("Invalid group operation."); \
4800	break; \
4801	}
4802
4803	#define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \
4804	case OpGroupNonUniform##op: \
4805	{ \
4806	auto operation = static_cast<GroupOperation>(ops[3]); \
4807	if (operation == GroupOperationReduce) \
4808	emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \
4809	else \
4810	SPIRV_CROSS_THROW("Invalid group operation."); \
4811	break; \
4812	}
4813
4814	HLSL_GROUP_OP(FAdd, Sum, true)
4815	HLSL_GROUP_OP(FMul, Product, true)
4816	HLSL_GROUP_OP(FMin, Min, false)
4817	HLSL_GROUP_OP(FMax, Max, false)
4818	HLSL_GROUP_OP(IAdd, Sum, true)
4819	HLSL_GROUP_OP(IMul, Product, true)
4820	HLSL_GROUP_OP_CAST(SMin, Min, int_type)
4821	HLSL_GROUP_OP_CAST(SMax, Max, int_type)
4822	HLSL_GROUP_OP_CAST(UMin, Min, uint_type)
4823	HLSL_GROUP_OP_CAST(UMax, Max, uint_type)
4824	HLSL_GROUP_OP(BitwiseAnd, BitAnd, false)
4825	HLSL_GROUP_OP(BitwiseOr, BitOr, false)
4826	HLSL_GROUP_OP(BitwiseXor, BitXor, false)
4827	HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type)
4828	HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type)
4829	HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type)
4830
4831	#undef HLSL_GROUP_OP
4832	#undef HLSL_GROUP_OP_CAST
4833	// clang-format on
4834
4835	case OpGroupNonUniformQuadSwap:
4836	{
4837	uint32_t direction = evaluate_constant_u32(id: ops[`4`]);
4838	if (direction == `0`)
4839	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "QuadReadAcrossX");
4840	else if (direction == `1`)
4841	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "QuadReadAcrossY");
4842	else if (direction == `2`)
4843	emit_unary_func_op(result_type, result_id: id, op0: ops[`3`], op: "QuadReadAcrossDiagonal");
4844	else
4845	SPIRV_CROSS_THROW("Invalid quad swap direction.");
4846	break;
4847	}
4848
4849	case OpGroupNonUniformQuadBroadcast:
4850	{
4851	emit_binary_func_op(result_type, result_id: id, op0: ops[`3`], op1: ops[`4`], op: "QuadReadLaneAt");
4852	break;
4853	}
4854
4855	default:
4856	SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
4857	}
4858
4859	register_control_dependent_expression(expr: id);
4860	}
4861
4862	void CompilerHLSL::emit_instruction(const Instruction &instruction)
4863	{
4864	auto ops = stream(instr: instruction);
4865	auto opcode = static_cast<Op>(instruction.op);
4866
4867	#define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
4868	#define HLSL_BOP_CAST(op, type) \
4869	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
4870	#define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
4871	#define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
4872	#define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
4873	#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
4874	#define HLSL_BFOP_CAST(op, type) \
4875	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
4876	#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
4877	#define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
4878
4879	// If we need to do implicit bitcasts, make sure we do it with the correct type.
4880	uint32_t integer_width = get_integer_width_for_instruction(instr: instruction);
4881	auto int_type = to_signed_basetype(width: integer_width);
4882	auto uint_type = to_unsigned_basetype(width: integer_width);
4883
4884	opcode = get_remapped_spirv_op(op: opcode);
4885
4886	switch (opcode)
4887	{
4888	case OpAccessChain:
4889	case OpInBoundsAccessChain:
4890	{
4891	emit_access_chain(instruction);
4892	break;
4893	}
4894	case OpBitcast:
4895	{
4896	auto bitcast_type = get_bitcast_type(result_type: ops[`0`], op0: ops[`2`]);
4897	if (bitcast_type == CompilerHLSL::TypeNormal)
4898	CompilerGLSL::emit_instruction(instr: instruction);
4899	else
4900	{
4901	if (!requires_uint2_packing)
4902	{
4903	requires_uint2_packing = true;
4904	force_recompile();
4905	}
4906
4907	if (bitcast_type == CompilerHLSL::TypePackUint2x32)
4908	emit_unary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`2`], op: "spvPackUint2x32");
4909	else
4910	emit_unary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`2`], op: "spvUnpackUint2x32");
4911	}
4912
4913	break;
4914	}
4915
4916	case OpSelect:
4917	{
4918	auto &value_type = expression_type(id: ops[`3`]);
4919	if (value_type.basetype == SPIRType::Struct \|\| is_array(type: value_type))
4920	{
4921	// HLSL does not support ternary expressions on composites.
4922	// Cannot use branches, since we might be in a continue block
4923	// where explicit control flow is prohibited.
4924	// Emit a helper function where we can use control flow.
4925	TypeID value_type_id = expression_type_id(id: ops[`3`]);
4926	auto itr = std::find(first: composite_selection_workaround_types.begin(),
4927	last: composite_selection_workaround_types.end(),
4928	val: value_type_id);
4929	if (itr == composite_selection_workaround_types.end())
4930	{
4931	composite_selection_workaround_types.push_back(x: value_type_id);
4932	force_recompile();
4933	}
4934	emit_uninitialized_temporary_expression(type: ops[`0`], id: ops[`1`]);
4935	statement(ts: "spvSelectComposite(",
4936	ts: to_expression(id: ops[`1`]), ts: ", ", ts: to_expression(id: ops[`2`]), ts: ", ",
4937	ts: to_expression(id: ops[`3`]), ts: ", ", ts: to_expression(id: ops[`4`]), ts: ");");
4938	}
4939	else
4940	CompilerGLSL::emit_instruction(instr: instruction);
4941	break;
4942	}
4943
4944	case OpStore:
4945	{
4946	emit_store(instruction);
4947	break;
4948	}
4949
4950	case OpLoad:
4951	{
4952	emit_load(instruction);
4953	break;
4954	}
4955
4956	case OpMatrixTimesVector:
4957	{
4958	// Matrices are kept in a transposed state all the time, flip multiplication order always.
4959	emit_binary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`3`], op1: ops[`2`], op: "mul");
4960	break;
4961	}
4962
4963	case OpVectorTimesMatrix:
4964	{
4965	// Matrices are kept in a transposed state all the time, flip multiplication order always.
4966	emit_binary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`3`], op1: ops[`2`], op: "mul");
4967	break;
4968	}
4969
4970	case OpMatrixTimesMatrix:
4971	{
4972	// Matrices are kept in a transposed state all the time, flip multiplication order always.
4973	emit_binary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`3`], op1: ops[`2`], op: "mul");
4974	break;
4975	}
4976
4977	case OpOuterProduct:
4978	{
4979	uint32_t result_type = ops[`0`];
4980	uint32_t id = ops[`1`];
4981	uint32_t a = ops[`2`];
4982	uint32_t b = ops[`3`];
4983
4984	auto &type = get<SPIRType>(id: result_type);
4985	string expr = type_to_glsl_constructor(type);
4986	expr += "(";
4987	for (uint32_t col = `0`; col < type.columns; col++)
4988	{
4989	expr += to_enclosed_expression(id: a);
4990	expr += " * ";
4991	expr += to_extract_component_expression(id: b, index: col);
4992	if (col + `1` < type.columns)
4993	expr += ", ";
4994	}
4995	expr += ")";
4996	emit_op(result_type, result_id: id, rhs: expr, forward_rhs: should_forward(id: a) && should_forward(id: b));
4997	inherit_expression_dependencies(dst: id, source: a);
4998	inherit_expression_dependencies(dst: id, source: b);
4999	break;
5000	}
5001
5002	case OpFMod:
5003	{
5004	if (!requires_op_fmod)
5005	{
5006	requires_op_fmod = true;
5007	force_recompile();
5008	}
5009	CompilerGLSL::emit_instruction(instr: instruction);
5010	break;
5011	}
5012
5013	case OpFRem:
5014	emit_binary_func_op(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`2`], op1: ops[`3`], op: "fmod");
5015	break;
5016
5017	case OpImage:
5018	{
5019	uint32_t result_type = ops[`0`];
5020	uint32_t id = ops[`1`];
5021	auto *combined = maybe_get<SPIRCombinedImageSampler>(id: ops[`2`]);
5022
5023	if (combined)
5024	{
5025	auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: combined->image), forward_rhs: true, suppress_usage_tracking: true);
5026	auto *var = maybe_get_backing_variable(chain: combined->image);
5027	if (var)
5028	e.loaded_from = var->self;
5029	}
5030	else
5031	{
5032	auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[`2`]), forward_rhs: true, suppress_usage_tracking: true);
5033	auto *var = maybe_get_backing_variable(chain: ops[`2`]);
5034	if (var)
5035	e.loaded_from = var->self;
5036	}
5037	break;
5038	}
5039
5040	case OpDPdx:
5041	HLSL_UFOP(ddx);
5042	register_control_dependent_expression(expr: ops[`1`]);
5043	break;
5044
5045	case OpDPdy:
5046	HLSL_UFOP(ddy);
5047	register_control_dependent_expression(expr: ops[`1`]);
5048	break;
5049
5050	case OpDPdxFine:
5051	HLSL_UFOP(ddx_fine);
5052	register_control_dependent_expression(expr: ops[`1`]);
5053	break;
5054
5055	case OpDPdyFine:
5056	HLSL_UFOP(ddy_fine);
5057	register_control_dependent_expression(expr: ops[`1`]);
5058	break;
5059
5060	case OpDPdxCoarse:
5061	HLSL_UFOP(ddx_coarse);
5062	register_control_dependent_expression(expr: ops[`1`]);
5063	break;
5064
5065	case OpDPdyCoarse:
5066	HLSL_UFOP(ddy_coarse);
5067	register_control_dependent_expression(expr: ops[`1`]);
5068	break;
5069
5070	case OpFwidth:
5071	case OpFwidthCoarse:
5072	case OpFwidthFine:
5073	HLSL_UFOP(fwidth);
5074	register_control_dependent_expression(expr: ops[`1`]);
5075	break;
5076
5077	case OpLogicalNot:
5078	{
5079	auto result_type = ops[`0`];
5080	auto id = ops[`1`];
5081	auto &type = get<SPIRType>(id: result_type);
5082
5083	if (type.vecsize > `1`)
5084	emit_unrolled_unary_op(result_type, result_id: id, operand: ops[`2`], op: "!");
5085	else
5086	HLSL_UOP(!);
5087	break;
5088	}
5089
5090	case OpIEqual:
5091	{
5092	auto result_type = ops[`0`];
5093	auto id = ops[`1`];
5094
5095	if (expression_type(id: ops[`2`]).vecsize > `1`)
5096	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "==", negate: false, expected_type: SPIRType::Unknown);
5097	else
5098	HLSL_BOP_CAST(==, int_type);
5099	break;
5100	}
5101
5102	case OpLogicalEqual:
5103	case OpFOrdEqual:
5104	case OpFUnordEqual:
5105	{
5106	// HLSL != operator is unordered.
5107	// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
5108	// isnan() is apparently implemented as x != x as well.
5109	// We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
5110	// HACK: FUnordEqual will be implemented as FOrdEqual.
5111
5112	auto result_type = ops[`0`];
5113	auto id = ops[`1`];
5114
5115	if (expression_type(id: ops[`2`]).vecsize > `1`)
5116	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "==", negate: false, expected_type: SPIRType::Unknown);
5117	else
5118	HLSL_BOP(==);
5119	break;
5120	}
5121
5122	case OpINotEqual:
5123	{
5124	auto result_type = ops[`0`];
5125	auto id = ops[`1`];
5126
5127	if (expression_type(id: ops[`2`]).vecsize > `1`)
5128	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "!=", negate: false, expected_type: SPIRType::Unknown);
5129	else
5130	HLSL_BOP_CAST(!=, int_type);
5131	break;
5132	}
5133
5134	case OpLogicalNotEqual:
5135	case OpFOrdNotEqual:
5136	case OpFUnordNotEqual:
5137	{
5138	// HLSL != operator is unordered.
5139	// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
5140	// isnan() is apparently implemented as x != x as well.
5141
5142	// FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
5143	// We would need to do something like not(UnordEqual), but that cannot be expressed either.
5144	// Adding a lot of NaN checks would be a breaking change from perspective of performance.
5145	// SPIR-V will generally use isnan() checks when this even matters.
5146	// HACK: FOrdNotEqual will be implemented as FUnordEqual.
5147
5148	auto result_type = ops[`0`];
5149	auto id = ops[`1`];
5150
5151	if (expression_type(id: ops[`2`]).vecsize > `1`)
5152	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "!=", negate: false, expected_type: SPIRType::Unknown);
5153	else
5154	HLSL_BOP(!=);
5155	break;
5156	}
5157
5158	case OpUGreaterThan:
5159	case OpSGreaterThan:
5160	{
5161	auto result_type = ops[`0`];
5162	auto id = ops[`1`];
5163	auto type = opcode == OpUGreaterThan ? uint_type : int_type;
5164
5165	if (expression_type(id: ops[`2`]).vecsize > `1`)
5166	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">", negate: false, expected_type: type);
5167	else
5168	HLSL_BOP_CAST(>, type);
5169	break;
5170	}
5171
5172	case OpFOrdGreaterThan:
5173	{
5174	auto result_type = ops[`0`];
5175	auto id = ops[`1`];
5176
5177	if (expression_type(id: ops[`2`]).vecsize > `1`)
5178	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">", negate: false, expected_type: SPIRType::Unknown);
5179	else
5180	HLSL_BOP(>);
5181	break;
5182	}
5183
5184	case OpFUnordGreaterThan:
5185	{
5186	auto result_type = ops[`0`];
5187	auto id = ops[`1`];
5188
5189	if (expression_type(id: ops[`2`]).vecsize > `1`)
5190	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<=", negate: true, expected_type: SPIRType::Unknown);
5191	else
5192	CompilerGLSL::emit_instruction(instr: instruction);
5193	break;
5194	}
5195
5196	case OpUGreaterThanEqual:
5197	case OpSGreaterThanEqual:
5198	{
5199	auto result_type = ops[`0`];
5200	auto id = ops[`1`];
5201
5202	auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
5203	if (expression_type(id: ops[`2`]).vecsize > `1`)
5204	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">=", negate: false, expected_type: type);
5205	else
5206	HLSL_BOP_CAST(>=, type);
5207	break;
5208	}
5209
5210	case OpFOrdGreaterThanEqual:
5211	{
5212	auto result_type = ops[`0`];
5213	auto id = ops[`1`];
5214
5215	if (expression_type(id: ops[`2`]).vecsize > `1`)
5216	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">=", negate: false, expected_type: SPIRType::Unknown);
5217	else
5218	HLSL_BOP(>=);
5219	break;
5220	}
5221
5222	case OpFUnordGreaterThanEqual:
5223	{
5224	auto result_type = ops[`0`];
5225	auto id = ops[`1`];
5226
5227	if (expression_type(id: ops[`2`]).vecsize > `1`)
5228	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<", negate: true, expected_type: SPIRType::Unknown);
5229	else
5230	CompilerGLSL::emit_instruction(instr: instruction);
5231	break;
5232	}
5233
5234	case OpULessThan:
5235	case OpSLessThan:
5236	{
5237	auto result_type = ops[`0`];
5238	auto id = ops[`1`];
5239
5240	auto type = opcode == OpULessThan ? uint_type : int_type;
5241	if (expression_type(id: ops[`2`]).vecsize > `1`)
5242	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<", negate: false, expected_type: type);
5243	else
5244	HLSL_BOP_CAST(<, type);
5245	break;
5246	}
5247
5248	case OpFOrdLessThan:
5249	{
5250	auto result_type = ops[`0`];
5251	auto id = ops[`1`];
5252
5253	if (expression_type(id: ops[`2`]).vecsize > `1`)
5254	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<", negate: false, expected_type: SPIRType::Unknown);
5255	else
5256	HLSL_BOP(<);
5257	break;
5258	}
5259
5260	case OpFUnordLessThan:
5261	{
5262	auto result_type = ops[`0`];
5263	auto id = ops[`1`];
5264
5265	if (expression_type(id: ops[`2`]).vecsize > `1`)
5266	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">=", negate: true, expected_type: SPIRType::Unknown);
5267	else
5268	CompilerGLSL::emit_instruction(instr: instruction);
5269	break;
5270	}
5271
5272	case OpULessThanEqual:
5273	case OpSLessThanEqual:
5274	{
5275	auto result_type = ops[`0`];
5276	auto id = ops[`1`];
5277
5278	auto type = opcode == OpULessThanEqual ? uint_type : int_type;
5279	if (expression_type(id: ops[`2`]).vecsize > `1`)
5280	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<=", negate: false, expected_type: type);
5281	else
5282	HLSL_BOP_CAST(<=, type);
5283	break;
5284	}
5285
5286	case OpFOrdLessThanEqual:
5287	{
5288	auto result_type = ops[`0`];
5289	auto id = ops[`1`];
5290
5291	if (expression_type(id: ops[`2`]).vecsize > `1`)
5292	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: "<=", negate: false, expected_type: SPIRType::Unknown);
5293	else
5294	HLSL_BOP(<=);
5295	break;
5296	}
5297
5298	case OpFUnordLessThanEqual:
5299	{
5300	auto result_type = ops[`0`];
5301	auto id = ops[`1`];
5302
5303	if (expression_type(id: ops[`2`]).vecsize > `1`)
5304	emit_unrolled_binary_op(result_type, result_id: id, op0: ops[`2`], op1: ops[`3`], op: ">", negate: true, expected_type: SPIRType::Unknown);
5305	else
5306	CompilerGLSL::emit_instruction(instr: instruction);
5307	break;
5308	}
5309
5310	case OpImageQueryLod:
5311	emit_texture_op(i: instruction, sparse: false);
5312	break;
5313
5314	case OpImageQuerySizeLod:
5315	{
5316	auto result_type = ops[`0`];
5317	auto id = ops[`1`];
5318
5319	require_texture_query_variant(var_id: ops[`2`]);
5320	auto dummy_samples_levels = join(ts: get_fallback_name(id), ts: "_dummy_parameter");
5321	statement(ts: "uint ", ts&: dummy_samples_levels, ts: ";");
5322
5323	auto expr = join(ts: "spvTextureSize(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", ",
5324	ts: bitcast_expression(target_type: SPIRType::UInt, arg: ops[`3`]), ts: ", ", ts&: dummy_samples_levels, ts: ")");
5325
5326	auto &restype = get<SPIRType>(id: ops[`0`]);
5327	expr = bitcast_expression(target_type: restype, expr_type: SPIRType::UInt, expr);
5328	emit_op(result_type, result_id: id, rhs: expr, forward_rhs: true);
5329	break;
5330	}
5331
5332	case OpImageQuerySize:
5333	{
5334	auto result_type = ops[`0`];
5335	auto id = ops[`1`];
5336
5337	require_texture_query_variant(var_id: ops[`2`]);
5338	bool uav = expression_type(id: ops[`2`]).image.sampled == `2`;
5339
5340	if (const auto *var = maybe_get_backing_variable(chain: ops[`2`]))
5341	if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id: var->self, decoration: DecorationNonWritable))
5342	uav = false;
5343
5344	auto dummy_samples_levels = join(ts: get_fallback_name(id), ts: "_dummy_parameter");
5345	statement(ts: "uint ", ts&: dummy_samples_levels, ts: ";");
5346
5347	string expr;
5348	if (uav)
5349	expr = join(ts: "spvImageSize(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", ", ts&: dummy_samples_levels, ts: ")");
5350	else
5351	expr = join(ts: "spvTextureSize(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", 0u, ", ts&: dummy_samples_levels, ts: ")");
5352
5353	auto &restype = get<SPIRType>(id: ops[`0`]);
5354	expr = bitcast_expression(target_type: restype, expr_type: SPIRType::UInt, expr);
5355	emit_op(result_type, result_id: id, rhs: expr, forward_rhs: true);
5356	break;
5357	}
5358
5359	case OpImageQuerySamples:
5360	case OpImageQueryLevels:
5361	{
5362	auto result_type = ops[`0`];
5363	auto id = ops[`1`];
5364
5365	require_texture_query_variant(var_id: ops[`2`]);
5366	bool uav = expression_type(id: ops[`2`]).image.sampled == `2`;
5367	if (opcode == OpImageQueryLevels && uav)
5368	SPIRV_CROSS_THROW("Cannot query levels for UAV images.");
5369
5370	if (const auto *var = maybe_get_backing_variable(chain: ops[`2`]))
5371	if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id: var->self, decoration: DecorationNonWritable))
5372	uav = false;
5373
5374	// Keep it simple and do not emit special variants to make this look nicer ...
5375	// This stuff is barely, if ever, used.
5376	forced_temporaries.insert(x: id);
5377	auto &type = get<SPIRType>(id: result_type);
5378	statement(ts: variable_decl(type, name: to_name(id)), ts: ";");
5379
5380	if (uav)
5381	statement(ts: "spvImageSize(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", ", ts: to_name(id), ts: ");");
5382	else
5383	statement(ts: "spvTextureSize(", ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ", 0u, ", ts: to_name(id), ts: ");");
5384
5385	auto &restype = get<SPIRType>(id: ops[`0`]);
5386	auto expr = bitcast_expression(target_type: restype, expr_type: SPIRType::UInt, expr: to_name(id));
5387	set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
5388	break;
5389	}
5390
5391	case OpImageRead:
5392	{
5393	uint32_t result_type = ops[`0`];
5394	uint32_t id = ops[`1`];
5395	auto *var = maybe_get_backing_variable(chain: ops[`2`]);
5396	auto &type = expression_type(id: ops[`2`]);
5397	bool subpass_data = type.image.dim == DimSubpassData;
5398	bool pure = false;
5399
5400	string imgexpr;
5401
5402	if (subpass_data)
5403	{
5404	if (hlsl_options.shader_model < `40`)
5405	SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3.");
5406
5407	// Similar to GLSL, implement subpass loads using texelFetch.
5408	if (type.image.ms)
5409	{
5410	uint32_t operands = ops[`4`];
5411	if (operands != ImageOperandsSampleMask \|\| instruction.length != `6`)
5412	SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
5413	uint32_t sample = ops[`5`];
5414	imgexpr = join(ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ".Load(int2(gl_FragCoord.xy), ", ts: to_expression(id: sample), ts: ")");
5415	}
5416	else
5417	imgexpr = join(ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ".Load(int3(int2(gl_FragCoord.xy), 0))");
5418
5419	pure = true;
5420	}
5421	else
5422	{
5423	imgexpr = join(ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: "[", ts: to_expression(id: ops[`3`]), ts: "]");
5424	// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
5425	// except that the underlying type changes how the data is interpreted.
5426
5427	bool force_srv =
5428	hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(id: var->self, decoration: DecorationNonWritable);
5429	pure = force_srv;
5430
5431	if (var && !subpass_data && !force_srv)
5432	imgexpr = remap_swizzle(result_type: get<SPIRType>(id: result_type),
5433	input_components: image_format_to_components(fmt: get<SPIRType>(id: var->basetype).image.format), expr: imgexpr);
5434	}
5435
5436	if (var)
5437	{
5438	bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
5439	auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forward_rhs: forward);
5440
5441	if (!pure)
5442	{
5443	e.loaded_from = var->self;
5444	if (forward)
5445	var->dependees.push_back(t: id);
5446	}
5447	}
5448	else
5449	emit_op(result_type, result_id: id, rhs: imgexpr, forward_rhs: false);
5450
5451	inherit_expression_dependencies(dst: id, source: ops[`2`]);
5452	if (type.image.ms)
5453	inherit_expression_dependencies(dst: id, source: ops[`5`]);
5454	break;
5455	}
5456
5457	case OpImageWrite:
5458	{
5459	auto *var = maybe_get_backing_variable(chain: ops[`0`]);
5460
5461	// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
5462	// except that the underlying type changes how the data is interpreted.
5463	auto value_expr = to_expression(id: ops[`2`]);
5464	if (var)
5465	{
5466	auto &type = get<SPIRType>(id: var->basetype);
5467	auto narrowed_type = get<SPIRType>(id: type.image.type);
5468	narrowed_type.vecsize = image_format_to_components(fmt: type.image.format);
5469	value_expr = remap_swizzle(result_type: narrowed_type, input_components: expression_type(id: ops[`2`]).vecsize, expr: value_expr);
5470	}
5471
5472	statement(ts: to_non_uniform_aware_expression(id: ops[`0`]), ts: "[", ts: to_expression(id: ops[`1`]), ts: "] = ", ts&: value_expr, ts: ";");
5473	if (var && variable_storage_is_aliased(var: *var))
5474	flush_all_aliased_variables();
5475	break;
5476	}
5477
5478	case OpImageTexelPointer:
5479	{
5480	uint32_t result_type = ops[`0`];
5481	uint32_t id = ops[`1`];
5482
5483	auto expr = to_expression(id: ops[`2`]);
5484	expr += join(ts: "[", ts: to_expression(id: ops[`3`]), ts: "]");
5485	auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
5486
5487	// When using the pointer, we need to know which variable it is actually loaded from.
5488	auto *var = maybe_get_backing_variable(chain: ops[`2`]);
5489	e.loaded_from = var ? var->self : ID (`0`);
5490	inherit_expression_dependencies(dst: id, source: ops[`3`]);
5491	break;
5492	}
5493
5494	case OpAtomicCompareExchange:
5495	case OpAtomicExchange:
5496	case OpAtomicISub:
5497	case OpAtomicSMin:
5498	case OpAtomicUMin:
5499	case OpAtomicSMax:
5500	case OpAtomicUMax:
5501	case OpAtomicAnd:
5502	case OpAtomicOr:
5503	case OpAtomicXor:
5504	case OpAtomicIAdd:
5505	case OpAtomicIIncrement:
5506	case OpAtomicIDecrement:
5507	case OpAtomicLoad:
5508	case OpAtomicStore:
5509	{
5510	emit_atomic(ops, length: instruction.length, op: opcode);
5511	break;
5512	}
5513
5514	case OpControlBarrier:
5515	case OpMemoryBarrier:
5516	{
5517	uint32_t memory;
5518	uint32_t semantics;
5519
5520	if (opcode == OpMemoryBarrier)
5521	{
5522	memory = evaluate_constant_u32(id: ops[`0`]);
5523	semantics = evaluate_constant_u32(id: ops[`1`]);
5524	}
5525	else
5526	{
5527	memory = evaluate_constant_u32(id: ops[`1`]);
5528	semantics = evaluate_constant_u32(id: ops[`2`]);
5529	}
5530
5531	if (memory == ScopeSubgroup)
5532	{
5533	// No Wave-barriers in HLSL.
5534	break;
5535	}
5536
5537	// We only care about these flags, acquire/release and friends are not relevant to GLSL.
5538	semantics = mask_relevant_memory_semantics(semantics);
5539
5540	if (opcode == OpMemoryBarrier)
5541	{
5542	// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
5543	// does what we need, so we avoid redundant barriers.
5544	const Instruction *next = get_next_instruction_in_block(instr: instruction);
5545	if (next && next->op == OpControlBarrier)
5546	{
5547	auto next_ops = stream(instr: next);
5548	uint32_t next_memory = evaluate_constant_u32(id: next_ops[`1`]);
5549	uint32_t next_semantics = evaluate_constant_u32(id: next_ops[`2`]);
5550	next_semantics = mask_relevant_memory_semantics(semantics: next_semantics);
5551
5552	// There is no "just execution barrier" in HLSL.
5553	// If there are no memory semantics for next instruction, we will imply group shared memory is synced.
5554	if (next_semantics == `0`)
5555	next_semantics = MemorySemanticsWorkgroupMemoryMask;
5556
5557	bool memory_scope_covered = false;
5558	if (next_memory == memory)
5559	memory_scope_covered = true;
5560	else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
5561	{
5562	// If we only care about workgroup memory, either Device or Workgroup scope is fine,
5563	// scope does not have to match.
5564	if ((next_memory == ScopeDevice \|\| next_memory == ScopeWorkgroup) &&
5565	(memory == ScopeDevice \|\| memory == ScopeWorkgroup))
5566	{
5567	memory_scope_covered = true;
5568	}
5569	}
5570	else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
5571	{
5572	// The control barrier has device scope, but the memory barrier just has workgroup scope.
5573	memory_scope_covered = true;
5574	}
5575
5576	// If we have the same memory scope, and all memory types are covered, we're good.
5577	if (memory_scope_covered && (semantics & next_semantics) == semantics)
5578	break;
5579	}
5580	}
5581
5582	// We are synchronizing some memory or syncing execution,
5583	// so we cannot forward any loads beyond the memory barrier.
5584	if (semantics \|\| opcode == OpControlBarrier)
5585	{
5586	assert(current_emitting_block);
5587	flush_control_dependent_expressions(block: current_emitting_block->self);
5588	flush_all_active_variables();
5589	}
5590
5591	if (opcode == OpControlBarrier)
5592	{
5593	// We cannot emit just execution barrier, for no memory semantics pick the cheapest option.
5594	if (semantics == MemorySemanticsWorkgroupMemoryMask \|\| semantics == `0`)
5595	statement(ts: "GroupMemoryBarrierWithGroupSync();");
5596	else if (semantics != `0` && (semantics & MemorySemanticsWorkgroupMemoryMask) == `0`)
5597	statement(ts: "DeviceMemoryBarrierWithGroupSync();");
5598	else
5599	statement(ts: "AllMemoryBarrierWithGroupSync();");
5600	}
5601	else
5602	{
5603	if (semantics == MemorySemanticsWorkgroupMemoryMask)
5604	statement(ts: "GroupMemoryBarrier();");
5605	else if (semantics != `0` && (semantics & MemorySemanticsWorkgroupMemoryMask) == `0`)
5606	statement(ts: "DeviceMemoryBarrier();");
5607	else
5608	statement(ts: "AllMemoryBarrier();");
5609	}
5610	break;
5611	}
5612
5613	case OpBitFieldInsert:
5614	{
5615	if (!requires_bitfield_insert)
5616	{
5617	requires_bitfield_insert = true;
5618	force_recompile();
5619	}
5620
5621	auto expr = join(ts: "spvBitfieldInsert(", ts: to_expression(id: ops[`2`]), ts: ", ", ts: to_expression(id: ops[`3`]), ts: ", ",
5622	ts: to_expression(id: ops[`4`]), ts: ", ", ts: to_expression(id: ops[`5`]), ts: ")");
5623
5624	bool forward =
5625	should_forward(id: ops[`2`]) && should_forward(id: ops[`3`]) && should_forward(id: ops[`4`]) && should_forward(id: ops[`5`]);
5626
5627	auto &restype = get<SPIRType>(id: ops[`0`]);
5628	expr = bitcast_expression(target_type: restype, expr_type: SPIRType::UInt, expr);
5629	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: expr, forward_rhs: forward);
5630	break;
5631	}
5632
5633	case OpBitFieldSExtract:
5634	case OpBitFieldUExtract:
5635	{
5636	if (!requires_bitfield_extract)
5637	{
5638	requires_bitfield_extract = true;
5639	force_recompile();
5640	}
5641
5642	if (opcode == OpBitFieldSExtract)
5643	HLSL_TFOP(spvBitfieldSExtract);
5644	else
5645	HLSL_TFOP(spvBitfieldUExtract);
5646	break;
5647	}
5648
5649	case OpBitCount:
5650	{
5651	auto basetype = expression_type(id: ops[`2`]).basetype;
5652	emit_unary_func_op_cast(result_type: ops[`0`], result_id: ops[`1`], op0: ops[`2`], op: "countbits", input_type: basetype, expected_result_type: basetype);
5653	break;
5654	}
5655
5656	case OpBitReverse:
5657	HLSL_UFOP(reversebits);
5658	break;
5659
5660	case OpArrayLength:
5661	{
5662	auto *var = maybe_get_backing_variable(chain: ops[`2`]);
5663	if (!var)
5664	SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
5665
5666	auto &type = get<SPIRType>(id: var->basetype);
5667	if (!has_decoration(id: type.self, decoration: DecorationBlock) && !has_decoration(id: type.self, decoration: DecorationBufferBlock))
5668	SPIRV_CROSS_THROW("Array length expression must point to a block type.");
5669
5670	// This must be 32-bit uint, so we're good to go.
5671	emit_uninitialized_temporary_expression(type: ops[`0`], id: ops[`1`]);
5672	statement(ts: to_non_uniform_aware_expression(id: ops[`2`]), ts: ".GetDimensions(", ts: to_expression(id: ops[`1`]), ts: ");");
5673	uint32_t offset = type_struct_member_offset(type, index: ops[`3`]);
5674	uint32_t stride = type_struct_member_array_stride(type, index: ops[`3`]);
5675	statement(ts: to_expression(id: ops[`1`]), ts: " = (", ts: to_expression(id: ops[`1`]), ts: " - ", ts&: offset, ts: ") / ", ts&: stride, ts: ";");
5676	break;
5677	}
5678
5679	case OpIsHelperInvocationEXT:
5680	if (hlsl_options.shader_model < `50` \|\| get_entry_point().model != ExecutionModelFragment)
5681	SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
5682	// Helper lane state with demote is volatile by nature.
5683	// Do not forward this.
5684	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: "IsHelperLane()", forward_rhs: false);
5685	break;
5686
5687	case OpBeginInvocationInterlockEXT:
5688	case OpEndInvocationInterlockEXT:
5689	if (hlsl_options.shader_model < `51`)
5690	SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
5691	break; // Nothing to do in the body
5692
5693	case OpRayQueryInitializeKHR:
5694	{
5695	flush_variable_declaration(id: ops[`0`]);
5696
5697	std::string ray_desc_name = get_unique_identifier();
5698	statement(ts: "RayDesc ", ts&: ray_desc_name, ts: " = {", ts: to_expression(id: ops[`4`]), ts: ", ", ts: to_expression(id: ops[`5`]), ts: ", ",
5699	ts: to_expression(id: ops[`6`]), ts: ", ", ts: to_expression(id: ops[`7`]), ts: "};");
5700
5701	statement(ts: to_expression(id: ops[`0`]), ts: ".TraceRayInline(",
5702	ts: to_expression(id: ops[`1`]), ts: ", ", // acc structure
5703	ts: to_expression(id: ops[`2`]), ts: ", ", // ray flags
5704	ts: to_expression(id: ops[`3`]), ts: ", ", // mask
5705	ts&: ray_desc_name, ts: ");"); // ray
5706	break;
5707	}
5708	case OpRayQueryProceedKHR:
5709	{
5710	flush_variable_declaration(id: ops[`0`]);
5711	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".Proceed()"), forward_rhs: false);
5712	break;
5713	}
5714	case OpRayQueryTerminateKHR:
5715	{
5716	flush_variable_declaration(id: ops[`0`]);
5717	statement(ts: to_expression(id: ops[`0`]), ts: ".Abort();");
5718	break;
5719	}
5720	case OpRayQueryGenerateIntersectionKHR:
5721	{
5722	flush_variable_declaration(id: ops[`0`]);
5723	statement(ts: to_expression(id: ops[`0`]), ts: ".CommitProceduralPrimitiveHit(", ts: ops[`1`], ts: ");");
5724	break;
5725	}
5726	case OpRayQueryConfirmIntersectionKHR:
5727	{
5728	flush_variable_declaration(id: ops[`0`]);
5729	statement(ts: to_expression(id: ops[`0`]), ts: ".CommitNonOpaqueTriangleHit();");
5730	break;
5731	}
5732	case OpRayQueryGetIntersectionTypeKHR:
5733	{
5734	emit_rayquery_function(commited: ".CommittedStatus()", candidate: ".CandidateType()", ops);
5735	break;
5736	}
5737	case OpRayQueryGetIntersectionTKHR:
5738	{
5739	emit_rayquery_function(commited: ".CommittedRayT()", candidate: ".CandidateTriangleRayT()", ops);
5740	break;
5741	}
5742	case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
5743	{
5744	emit_rayquery_function(commited: ".CommittedInstanceID()", candidate: ".CandidateInstanceID()", ops);
5745	break;
5746	}
5747	case OpRayQueryGetIntersectionInstanceIdKHR:
5748	{
5749	emit_rayquery_function(commited: ".CommittedInstanceIndex()", candidate: ".CandidateInstanceIndex()", ops);
5750	break;
5751	}
5752	case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
5753	{
5754	emit_rayquery_function(commited: ".CommittedInstanceContributionToHitGroupIndex()",
5755	candidate: ".CandidateInstanceContributionToHitGroupIndex()", ops);
5756	break;
5757	}
5758	case OpRayQueryGetIntersectionGeometryIndexKHR:
5759	{
5760	emit_rayquery_function(commited: ".CommittedGeometryIndex()",
5761	candidate: ".CandidateGeometryIndex()", ops);
5762	break;
5763	}
5764	case OpRayQueryGetIntersectionPrimitiveIndexKHR:
5765	{
5766	emit_rayquery_function(commited: ".CommittedPrimitiveIndex()", candidate: ".CandidatePrimitiveIndex()", ops);
5767	break;
5768	}
5769	case OpRayQueryGetIntersectionBarycentricsKHR:
5770	{
5771	emit_rayquery_function(commited: ".CommittedTriangleBarycentrics()", candidate: ".CandidateTriangleBarycentrics()", ops);
5772	break;
5773	}
5774	case OpRayQueryGetIntersectionFrontFaceKHR:
5775	{
5776	emit_rayquery_function(commited: ".CommittedTriangleFrontFace()", candidate: ".CandidateTriangleFrontFace()", ops);
5777	break;
5778	}
5779	case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
5780	{
5781	flush_variable_declaration(id: ops[`0`]);
5782	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".CandidateProceduralPrimitiveNonOpaque()"), forward_rhs: false);
5783	break;
5784	}
5785	case OpRayQueryGetIntersectionObjectRayDirectionKHR:
5786	{
5787	emit_rayquery_function(commited: ".CommittedObjectRayDirection()", candidate: ".CandidateObjectRayDirection()", ops);
5788	break;
5789	}
5790	case OpRayQueryGetIntersectionObjectRayOriginKHR:
5791	{
5792	flush_variable_declaration(id: ops[`0`]);
5793	emit_rayquery_function(commited: ".CommittedObjectRayOrigin()", candidate: ".CandidateObjectRayOrigin()", ops);
5794	break;
5795	}
5796	case OpRayQueryGetIntersectionObjectToWorldKHR:
5797	{
5798	emit_rayquery_function(commited: ".CommittedObjectToWorld4x3()", candidate: ".CandidateObjectToWorld4x3()", ops);
5799	break;
5800	}
5801	case OpRayQueryGetIntersectionWorldToObjectKHR:
5802	{
5803	emit_rayquery_function(commited: ".CommittedWorldToObject4x3()", candidate: ".CandidateWorldToObject4x3()", ops);
5804	break;
5805	}
5806	case OpRayQueryGetRayFlagsKHR:
5807	{
5808	flush_variable_declaration(id: ops[`0`]);
5809	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".RayFlags()"), forward_rhs: false);
5810	break;
5811	}
5812	case OpRayQueryGetRayTMinKHR:
5813	{
5814	flush_variable_declaration(id: ops[`0`]);
5815	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".RayTMin()"), forward_rhs: false);
5816	break;
5817	}
5818	case OpRayQueryGetWorldRayOriginKHR:
5819	{
5820	flush_variable_declaration(id: ops[`0`]);
5821	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".WorldRayOrigin()"), forward_rhs: false);
5822	break;
5823	}
5824	case OpRayQueryGetWorldRayDirectionKHR:
5825	{
5826	flush_variable_declaration(id: ops[`0`]);
5827	emit_op(result_type: ops[`0`], result_id: ops[`1`], rhs: join(ts: to_expression(id: ops[`2`]), ts: ".WorldRayDirection()"), forward_rhs: false);
5828	break;
5829	}
5830	default:
5831	CompilerGLSL::emit_instruction(instr: instruction);
5832	break;
5833	}
5834	}
5835
5836	void CompilerHLSL::require_texture_query_variant(uint32_t var_id)
5837	{
5838	if (const auto *var = maybe_get_backing_variable(chain: var_id))
5839	var_id = var->self;
5840
5841	auto &type = expression_type(id: var_id);
5842	bool uav = type.image.sampled == `2`;
5843	if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id: var_id, decoration: DecorationNonWritable))
5844	uav = false;
5845
5846	uint32_t bit = `0`;
5847	switch (type.image.dim)
5848	{
5849	case Dim1D:
5850	bit = type.image.arrayed ? Query1DArray : Query1D;
5851	break;
5852
5853	case Dim2D:
5854	if (type.image.ms)
5855	bit = type.image.arrayed ? Query2DMSArray : Query2DMS;
5856	else
5857	bit = type.image.arrayed ? Query2DArray : Query2D;
5858	break;
5859
5860	case Dim3D:
5861	bit = Query3D;
5862	break;
5863
5864	case DimCube:
5865	bit = type.image.arrayed ? QueryCubeArray : QueryCube;
5866	break;
5867
5868	case DimBuffer:
5869	bit = QueryBuffer;
5870	break;
5871
5872	default:
5873	SPIRV_CROSS_THROW("Unsupported query type.");
5874	}
5875
5876	switch (get<SPIRType>(id: type.image.type).basetype)
5877	{
5878	case SPIRType::Float:
5879	bit += QueryTypeFloat;
5880	break;
5881
5882	case SPIRType::Int:
5883	bit += QueryTypeInt;
5884	break;
5885
5886	case SPIRType::UInt:
5887	bit += QueryTypeUInt;
5888	break;
5889
5890	default:
5891	SPIRV_CROSS_THROW("Unsupported query type.");
5892	}
5893
5894	auto norm_state = image_format_to_normalized_state(fmt: type.image.format);
5895	auto &variant = uav ? required_texture_size_variants
5896	.uav[uint32_t(norm_state)][image_format_to_components(fmt: type.image.format) - `1`] :
5897	required_texture_size_variants.srv;
5898
5899	uint64_t mask = `1ull` << bit;
5900	if ((variant & mask) == `0`)
5901	{
5902	force_recompile();
5903	variant \|= mask;
5904	}
5905	}
5906
5907	void CompilerHLSL::set_root_constant_layouts(std::vector<RootConstants> layout)
5908	{
5909	root_constants_layout = std::move(layout);
5910	}
5911
5912	void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes)
5913	{
5914	remap_vertex_attributes.push_back(t: vertex_attributes);
5915	}
5916
5917	VariableID CompilerHLSL::remap_num_workgroups_builtin()
5918	{
5919	update_active_builtins();
5920
5921	if (!active_input_builtins.get(bit: BuiltInNumWorkgroups))
5922	return `0`;
5923
5924	// Create a new, fake UBO.
5925	uint32_t offset = ir.increase_bound_by(count: `4`);
5926
5927	uint32_t uint_type_id = offset;
5928	uint32_t block_type_id = offset + `1`;
5929	uint32_t block_pointer_type_id = offset + `2`;
5930	uint32_t variable_id = offset + `3`;
5931
5932	SPIRType uint_type;
5933	uint_type.basetype = SPIRType::UInt;
5934	uint_type.width = `32`;
5935	uint_type.vecsize = `3`;
5936	uint_type.columns = `1`;
5937	set<SPIRType>(id: uint_type_id, args&: uint_type);
5938
5939	SPIRType block_type;
5940	block_type.basetype = SPIRType::Struct;
5941	block_type.member_types.push_back(t: uint_type_id);
5942	set<SPIRType>(id: block_type_id, args&: block_type);
5943	set_decoration(id: block_type_id, decoration: DecorationBlock);
5944	set_member_name(id: block_type_id, index: `0`, name: "count");
5945	set_member_decoration(id: block_type_id, index: `0`, decoration: DecorationOffset, argument: `0`);
5946
5947	SPIRType block_pointer_type = block_type;
5948	block_pointer_type.pointer = true;
5949	block_pointer_type.storage = StorageClassUniform;
5950	block_pointer_type.parent_type = block_type_id;
5951	auto &ptr_type = set<SPIRType>(id: block_pointer_type_id, args&: block_pointer_type);
5952
5953	// Preserve self.
5954	ptr_type.self = block_type_id;
5955
5956	set<SPIRVariable>(id: variable_id, args&: block_pointer_type_id, args: StorageClassUniform);
5957	ir.meta [variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups";
5958
5959	num_workgroups_builtin = variable_id;
5960	get_entry_point().interface_variables.push_back(t: num_workgroups_builtin);
5961	return variable_id;
5962	}
5963
5964	void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags)
5965	{
5966	resource_binding_flags = flags;
5967	}
5968
5969	void CompilerHLSL::validate_shader_model()
5970	{
5971	// Check for nonuniform qualifier.
5972	// Instead of looping over all decorations to find this, just look at capabilities.
5973	for (auto &cap : ir.declared_capabilities)
5974	{
5975	switch (cap)
5976	{
5977	case CapabilityShaderNonUniformEXT:
5978	case CapabilityRuntimeDescriptorArrayEXT:
5979	if (hlsl_options.shader_model < `51`)
5980	SPIRV_CROSS_THROW(
5981	"Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex.");
5982	break;
5983
5984	case CapabilityVariablePointers:
5985	case CapabilityVariablePointersStorageBuffer:
5986	SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL.");
5987
5988	default:
5989	break;
5990	}
5991	}
5992
5993	if (ir.addressing_model != AddressingModelLogical)
5994	SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL.");
5995
5996	if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < `62`)
5997	SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support.");
5998	}
5999
6000	string CompilerHLSL::compile()
6001	{
6002	ir.fixup_reserved_names();
6003
6004	// Do not deal with ES-isms like precision, older extensions and such.
6005	options.es = false;
6006	options.version = `450`;
6007	options.vulkan_semantics = true;
6008	backend.float_literal_suffix = true;
6009	backend.double_literal_suffix = false;
6010	backend.long_long_literal_suffix = true;
6011	backend.uint32_t_literal_suffix = true;
6012	backend.int16_t_literal_suffix = "";
6013	backend.uint16_t_literal_suffix = "u";
6014	backend.basic_int_type = "int";
6015	backend.basic_uint_type = "uint";
6016	backend.demote_literal = "discard";
6017	backend.boolean_mix_function = "";
6018	backend.swizzle_is_function = false;
6019	backend.shared_is_implied = true;
6020	backend.unsized_array_supported = true;
6021	backend.explicit_struct_type = false;
6022	backend.use_initializer_list = true;
6023	backend.use_constructor_splatting = false;
6024	backend.can_swizzle_scalar = true;
6025	backend.can_declare_struct_inline = false;
6026	backend.can_declare_arrays_inline = false;
6027	backend.can_return_array = false;
6028	backend.nonuniform_qualifier = "NonUniformResourceIndex";
6029	backend.support_case_fallthrough = false;
6030
6031	// SM 4.1 does not support precise for some reason.
6032	backend.support_precise_qualifier = hlsl_options.shader_model >= `50` \|\| hlsl_options.shader_model == `40`;
6033
6034	fixup_anonymous_struct_names();
6035	fixup_type_alias();
6036	reorder_type_alias();
6037	build_function_control_flow_graphs_and_analyze();
6038	validate_shader_model();
6039	update_active_builtins();
6040	analyze_image_and_sampler_usage();
6041	analyze_interlocked_resource_usage();
6042
6043	// Subpass input needs SV_Position.
6044	if (need_subpass_input)
6045	active_input_builtins.set(BuiltInFragCoord);
6046
6047	uint32_t pass_count = `0`;
6048	do
6049	{
6050	reset(iteration_count: pass_count);
6051
6052	// Move constructor for this type is broken on GCC 4.9 ...
6053	buffer.reset();
6054
6055	emit_header();
6056	emit_resources();
6057
6058	emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset ());
6059	emit_hlsl_entry_point();
6060
6061	pass_count++;
6062	} while (is_forcing_recompilation());
6063
6064	// Entry point in HLSL is always main() for the time being.
6065	get_entry_point().name = "main";
6066
6067	return buffer.str();
6068	}
6069
6070	void CompilerHLSL::emit_block_hints(const SPIRBlock &block)
6071	{
6072	switch (block.hint)
6073	{
6074	case SPIRBlock::HintFlatten:
6075	statement(ts: "[flatten]");
6076	break;
6077	case SPIRBlock::HintDontFlatten:
6078	statement(ts: "[branch]");
6079	break;
6080	case SPIRBlock::HintUnroll:
6081	statement(ts: "[unroll]");
6082	break;
6083	case SPIRBlock::HintDontUnroll:
6084	statement(ts: "[loop]");
6085	break;
6086	default:
6087	break;
6088	}
6089	}
6090
6091	string CompilerHLSL::get_unique_identifier()
6092	{
6093	return join(ts: "_", ts: unique_identifier_count++, ts: "ident");
6094	}
6095
6096	void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding)
6097	{
6098	StageSetBinding tuple = { .model: binding.stage, .desc_set: binding.desc_set, .binding: binding.binding };
6099	resource_bindings [tuple] = { binding, false };
6100	}
6101
6102	bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
6103	{
6104	StageSetBinding tuple = { .model: model, .desc_set: desc_set, .binding: binding };
6105	auto itr = resource_bindings.find(x: tuple);
6106	return itr != end(cont: resource_bindings) && itr ->second.second;
6107	}
6108
6109	CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0)
6110	{
6111	auto &rslt_type = get<SPIRType>(id: result_type);
6112	auto &expr_type = expression_type(id: op0);
6113
6114	if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt &&
6115	expr_type.vecsize == `2`)
6116	return BitcastType::TypePackUint2x32;
6117	else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == `2` &&
6118	expr_type.basetype == SPIRType::BaseType::UInt64)
6119	return BitcastType::TypeUnpackUint64;
6120
6121	return BitcastType::TypeNormal;
6122	}
6123
6124	bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const
6125	{
6126	if (hlsl_options.force_storage_buffer_as_uav)
6127	{
6128	return true;
6129	}
6130
6131	const uint32_t desc_set = get_decoration(id, decoration: spv::DecorationDescriptorSet);
6132	const uint32_t binding = get_decoration(id, decoration: spv::DecorationBinding);
6133
6134	return (force_uav_buffer_bindings.find(x: { .desc_set: desc_set, .binding: binding }) != force_uav_buffer_bindings.end());
6135	}
6136
6137	void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding)
6138	{
6139	SetBindingPair pair = { .desc_set: desc_set, .binding: binding };
6140	force_uav_buffer_bindings.insert(x: pair);
6141	}
6142
6143	bool CompilerHLSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
6144	{
6145	return (builtin == BuiltInSampleMask);
6146	}
6147

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp