1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_glsl.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_common.hpp"
27#include <algorithm>
28#include <assert.h>
29#include <cmath>
30#include <limits>
31#include <locale.h>
32#include <utility>
33
34#ifndef _WIN32
35#ifndef __ghs__
36#include <langinfo.h>
37#endif
38#endif
39#include <locale.h>
40
41using namespace spv;
42using namespace SPIRV_CROSS_NAMESPACE;
43using namespace std;
44
45enum ExtraSubExpressionType
46{
47 // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
48 EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
49 EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
50};
51
52static bool is_unsigned_opcode(Op op)
53{
54 // Don't have to be exhaustive, only relevant for legacy target checking ...
55 switch (op)
56 {
57 case OpShiftRightLogical:
58 case OpUGreaterThan:
59 case OpUGreaterThanEqual:
60 case OpULessThan:
61 case OpULessThanEqual:
62 case OpUConvert:
63 case OpUDiv:
64 case OpUMod:
65 case OpUMulExtended:
66 case OpConvertUToF:
67 case OpConvertFToU:
68 return true;
69
70 default:
71 return false;
72 }
73}
74
75static bool is_unsigned_glsl_opcode(GLSLstd450 op)
76{
77 // Don't have to be exhaustive, only relevant for legacy target checking ...
78 switch (op)
79 {
80 case GLSLstd450UClamp:
81 case GLSLstd450UMin:
82 case GLSLstd450UMax:
83 case GLSLstd450FindUMsb:
84 return true;
85
86 default:
87 return false;
88 }
89}
90
91static bool packing_is_vec4_padded(BufferPackingStandard packing)
92{
93 switch (packing)
94 {
95 case BufferPackingHLSLCbuffer:
96 case BufferPackingHLSLCbufferPackOffset:
97 case BufferPackingStd140:
98 case BufferPackingStd140EnhancedLayout:
99 return true;
100
101 default:
102 return false;
103 }
104}
105
106static bool packing_is_hlsl(BufferPackingStandard packing)
107{
108 switch (packing)
109 {
110 case BufferPackingHLSLCbuffer:
111 case BufferPackingHLSLCbufferPackOffset:
112 return true;
113
114 default:
115 return false;
116 }
117}
118
119static bool packing_has_flexible_offset(BufferPackingStandard packing)
120{
121 switch (packing)
122 {
123 case BufferPackingStd140:
124 case BufferPackingStd430:
125 case BufferPackingScalar:
126 case BufferPackingHLSLCbuffer:
127 return false;
128
129 default:
130 return true;
131 }
132}
133
134static bool packing_is_scalar(BufferPackingStandard packing)
135{
136 switch (packing)
137 {
138 case BufferPackingScalar:
139 case BufferPackingScalarEnhancedLayout:
140 return true;
141
142 default:
143 return false;
144 }
145}
146
147static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
148{
149 switch (packing)
150 {
151 case BufferPackingStd140EnhancedLayout:
152 return BufferPackingStd140;
153 case BufferPackingStd430EnhancedLayout:
154 return BufferPackingStd430;
155 case BufferPackingHLSLCbufferPackOffset:
156 return BufferPackingHLSLCbuffer;
157 case BufferPackingScalarEnhancedLayout:
158 return BufferPackingScalar;
159 default:
160 return packing;
161 }
162}
163
164void CompilerGLSL::init()
165{
166 if (ir.source.known)
167 {
168 options.es = ir.source.es;
169 options.version = ir.source.version;
170 }
171
172 // Query the locale to see what the decimal point is.
173 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
174 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
175 // tricky.
176#ifdef _WIN32
177 // On Windows, localeconv uses thread-local storage, so it should be fine.
178 const struct lconv *conv = localeconv();
179 if (conv && conv->decimal_point)
180 current_locale_radix_character = *conv->decimal_point;
181#elif defined(__ANDROID__) && __ANDROID_API__ < 26 || defined(__ghs__) || defined(__QNXNTO__)
182 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
183 const struct lconv *conv = localeconv();
184 if (conv && conv->decimal_point)
185 current_locale_radix_character = *conv->decimal_point;
186#else
187 // localeconv, the portable function is not MT safe ...
188 const char *decimal_point = nl_langinfo(RADIXCHAR);
189 if (decimal_point && *decimal_point != '\0')
190 current_locale_radix_character = *decimal_point;
191#endif
192}
193
194static const char *to_pls_layout(PlsFormat format)
195{
196 switch (format)
197 {
198 case PlsR11FG11FB10F:
199 return "layout(r11f_g11f_b10f) ";
200 case PlsR32F:
201 return "layout(r32f) ";
202 case PlsRG16F:
203 return "layout(rg16f) ";
204 case PlsRGB10A2:
205 return "layout(rgb10_a2) ";
206 case PlsRGBA8:
207 return "layout(rgba8) ";
208 case PlsRG16:
209 return "layout(rg16) ";
210 case PlsRGBA8I:
211 return "layout(rgba8i)";
212 case PlsRG16I:
213 return "layout(rg16i) ";
214 case PlsRGB10A2UI:
215 return "layout(rgb10_a2ui) ";
216 case PlsRGBA8UI:
217 return "layout(rgba8ui) ";
218 case PlsRG16UI:
219 return "layout(rg16ui) ";
220 case PlsR32UI:
221 return "layout(r32ui) ";
222 default:
223 return "";
224 }
225}
226
227static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
228{
229 switch (format)
230 {
231 default:
232 case PlsR11FG11FB10F:
233 case PlsR32F:
234 case PlsRG16F:
235 case PlsRGB10A2:
236 case PlsRGBA8:
237 case PlsRG16:
238 return SPIRType::Float;
239
240 case PlsRGBA8I:
241 case PlsRG16I:
242 return SPIRType::Int;
243
244 case PlsRGB10A2UI:
245 case PlsRGBA8UI:
246 case PlsRG16UI:
247 case PlsR32UI:
248 return SPIRType::UInt;
249 }
250}
251
252static uint32_t pls_format_to_components(PlsFormat format)
253{
254 switch (format)
255 {
256 default:
257 case PlsR32F:
258 case PlsR32UI:
259 return 1;
260
261 case PlsRG16F:
262 case PlsRG16:
263 case PlsRG16UI:
264 case PlsRG16I:
265 return 2;
266
267 case PlsR11FG11FB10F:
268 return 3;
269
270 case PlsRGB10A2:
271 case PlsRGBA8:
272 case PlsRGBA8I:
273 case PlsRGB10A2UI:
274 case PlsRGBA8UI:
275 return 4;
276 }
277}
278
279const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
280{
281 static const char *const swizzle[4][4] = {
282 { ".x", ".y", ".z", ".w" },
283 { ".xy", ".yz", ".zw", nullptr },
284 { ".xyz", ".yzw", nullptr, nullptr },
285#if defined(__GNUC__) && (__GNUC__ == 9)
286 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
287 // This array ends up being compiled as all nullptrs, tripping the assertions below.
288 { "", nullptr, nullptr, "$" },
289#else
290 { "", nullptr, nullptr, nullptr },
291#endif
292 };
293
294 assert(vecsize >= 1 && vecsize <= 4);
295 assert(index >= 0 && index < 4);
296 assert(swizzle[vecsize - 1][index]);
297
298 return swizzle[vecsize - 1][index];
299}
300
301void CompilerGLSL::reset(uint32_t iteration_count)
302{
303 // Sanity check the iteration count to be robust against a certain class of bugs where
304 // we keep forcing recompilations without making clear forward progress.
305 // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
306 // Certain types of recompilations are considered to make forward progress,
307 // but in almost all situations, we'll never see more than 3 iterations.
308 // It is highly context-sensitive when we need to force recompilation,
309 // and it is not practical with the current architecture
310 // to resolve everything up front.
311 if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
312 SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
313
314 // We do some speculative optimizations which should pretty much always work out,
315 // but just in case the SPIR-V is rather weird, recompile until it's happy.
316 // This typically only means one extra pass.
317 clear_force_recompile();
318
319 // Clear invalid expression tracking.
320 invalid_expressions.clear();
321 composite_insert_overwritten.clear();
322 current_function = nullptr;
323
324 // Clear temporary usage tracking.
325 expression_usage_counts.clear();
326 forwarded_temporaries.clear();
327 suppressed_usage_tracking.clear();
328
329 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
330 flushed_phi_variables.clear();
331
332 reset_name_caches();
333
334 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) {
335 func.active = false;
336 func.flush_undeclared = true;
337 });
338
339 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
340
341 ir.reset_all_of_type<SPIRExpression>();
342 ir.reset_all_of_type<SPIRAccessChain>();
343
344 statement_count = 0;
345 indent = 0;
346 current_loop_level = 0;
347}
348
349void CompilerGLSL::remap_pls_variables()
350{
351 for (auto &input : pls_inputs)
352 {
353 auto &var = get<SPIRVariable>(id: input.id);
354
355 bool input_is_target = false;
356 if (var.storage == StorageClassUniformConstant)
357 {
358 auto &type = get<SPIRType>(id: var.basetype);
359 input_is_target = type.image.dim == DimSubpassData;
360 }
361
362 if (var.storage != StorageClassInput && !input_is_target)
363 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
364 var.remapped_variable = true;
365 }
366
367 for (auto &output : pls_outputs)
368 {
369 auto &var = get<SPIRVariable>(id: output.id);
370 if (var.storage != StorageClassOutput)
371 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
372 var.remapped_variable = true;
373 }
374}
375
376void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
377{
378 subpass_to_framebuffer_fetch_attachment.push_back(x: { input_attachment_index, color_location });
379 inout_color_attachments.push_back(x: { color_location, coherent });
380}
381
382bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
383{
384 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
385 pred: [&](const std::pair<uint32_t, bool> &elem) {
386 return elem.first == location;
387 }) != end(cont: inout_color_attachments);
388}
389
390bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
391{
392 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
393 pred: [&](const std::pair<uint32_t, bool> &elem) {
394 return elem.first == location && !elem.second;
395 }) != end(cont: inout_color_attachments);
396}
397
398void CompilerGLSL::find_static_extensions()
399{
400 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
401 if (type.basetype == SPIRType::Double)
402 {
403 if (options.es)
404 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
405 if (!options.es && options.version < 400)
406 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
407 }
408 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
409 {
410 if (options.es)
411 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
412 if (!options.es)
413 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
414 }
415 else if (type.basetype == SPIRType::Half)
416 {
417 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_float16");
418 if (options.vulkan_semantics)
419 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
420 }
421 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
422 {
423 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int8");
424 if (options.vulkan_semantics)
425 require_extension_internal(ext: "GL_EXT_shader_8bit_storage");
426 }
427 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
428 {
429 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int16");
430 if (options.vulkan_semantics)
431 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
432 }
433 });
434
435 auto &execution = get_entry_point();
436 switch (execution.model)
437 {
438 case ExecutionModelGLCompute:
439 if (!options.es && options.version < 430)
440 require_extension_internal(ext: "GL_ARB_compute_shader");
441 if (options.es && options.version < 310)
442 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
443 break;
444
445 case ExecutionModelGeometry:
446 if (options.es && options.version < 320)
447 require_extension_internal(ext: "GL_EXT_geometry_shader");
448 if (!options.es && options.version < 150)
449 require_extension_internal(ext: "GL_ARB_geometry_shader4");
450
451 if (execution.flags.get(bit: ExecutionModeInvocations) && execution.invocations != 1)
452 {
453 // Instanced GS is part of 400 core or this extension.
454 if (!options.es && options.version < 400)
455 require_extension_internal(ext: "GL_ARB_gpu_shader5");
456 }
457 break;
458
459 case ExecutionModelTessellationEvaluation:
460 case ExecutionModelTessellationControl:
461 if (options.es && options.version < 320)
462 require_extension_internal(ext: "GL_EXT_tessellation_shader");
463 if (!options.es && options.version < 400)
464 require_extension_internal(ext: "GL_ARB_tessellation_shader");
465 break;
466
467 case ExecutionModelRayGenerationKHR:
468 case ExecutionModelIntersectionKHR:
469 case ExecutionModelAnyHitKHR:
470 case ExecutionModelClosestHitKHR:
471 case ExecutionModelMissKHR:
472 case ExecutionModelCallableKHR:
473 // NV enums are aliases.
474 if (options.es || options.version < 460)
475 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
476 if (!options.vulkan_semantics)
477 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
478
479 // Need to figure out if we should target KHR or NV extension based on capabilities.
480 for (auto &cap : ir.declared_capabilities)
481 {
482 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
483 cap == CapabilityRayTraversalPrimitiveCullingKHR)
484 {
485 ray_tracing_is_khr = true;
486 break;
487 }
488 }
489
490 if (ray_tracing_is_khr)
491 {
492 // In KHR ray tracing we pass payloads by pointer instead of location,
493 // so make sure we assign locations properly.
494 ray_tracing_khr_fixup_locations();
495 require_extension_internal(ext: "GL_EXT_ray_tracing");
496 }
497 else
498 require_extension_internal(ext: "GL_NV_ray_tracing");
499 break;
500
501 default:
502 break;
503 }
504
505 if (!pls_inputs.empty() || !pls_outputs.empty())
506 {
507 if (execution.model != ExecutionModelFragment)
508 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
509 require_extension_internal(ext: "GL_EXT_shader_pixel_local_storage");
510 }
511
512 if (!inout_color_attachments.empty())
513 {
514 if (execution.model != ExecutionModelFragment)
515 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
516 if (options.vulkan_semantics)
517 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
518
519 bool has_coherent = false;
520 bool has_incoherent = false;
521
522 for (auto &att : inout_color_attachments)
523 {
524 if (att.second)
525 has_coherent = true;
526 else
527 has_incoherent = true;
528 }
529
530 if (has_coherent)
531 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch");
532 if (has_incoherent)
533 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch_non_coherent");
534 }
535
536 if (options.separate_shader_objects && !options.es && options.version < 410)
537 require_extension_internal(ext: "GL_ARB_separate_shader_objects");
538
539 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
540 {
541 if (!options.vulkan_semantics)
542 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
543 if (options.es && options.version < 320)
544 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
545 else if (!options.es && options.version < 450)
546 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
547 require_extension_internal(ext: "GL_EXT_buffer_reference");
548 }
549 else if (ir.addressing_model != AddressingModelLogical)
550 {
551 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
552 }
553
554 // Check for nonuniform qualifier and passthrough.
555 // Instead of looping over all decorations to find this, just look at capabilities.
556 for (auto &cap : ir.declared_capabilities)
557 {
558 switch (cap)
559 {
560 case CapabilityShaderNonUniformEXT:
561 if (!options.vulkan_semantics)
562 require_extension_internal(ext: "GL_NV_gpu_shader5");
563 else
564 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
565 break;
566 case CapabilityRuntimeDescriptorArrayEXT:
567 if (!options.vulkan_semantics)
568 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
569 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
570 break;
571
572 case CapabilityGeometryShaderPassthroughNV:
573 if (execution.model == ExecutionModelGeometry)
574 {
575 require_extension_internal(ext: "GL_NV_geometry_shader_passthrough");
576 execution.geometry_passthrough = true;
577 }
578 break;
579
580 case CapabilityVariablePointers:
581 case CapabilityVariablePointersStorageBuffer:
582 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
583
584 case CapabilityMultiView:
585 if (options.vulkan_semantics)
586 require_extension_internal(ext: "GL_EXT_multiview");
587 else
588 {
589 require_extension_internal(ext: "GL_OVR_multiview2");
590 if (options.ovr_multiview_view_count == 0)
591 SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
592 if (get_execution_model() != ExecutionModelVertex)
593 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
594 }
595 break;
596
597 case CapabilityRayQueryKHR:
598 if (options.es || options.version < 460 || !options.vulkan_semantics)
599 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
600 require_extension_internal(ext: "GL_EXT_ray_query");
601 ray_tracing_is_khr = true;
602 break;
603
604 case CapabilityRayTraversalPrimitiveCullingKHR:
605 if (options.es || options.version < 460 || !options.vulkan_semantics)
606 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
607 require_extension_internal(ext: "GL_EXT_ray_flags_primitive_culling");
608 ray_tracing_is_khr = true;
609 break;
610
611 default:
612 break;
613 }
614 }
615
616 if (options.ovr_multiview_view_count)
617 {
618 if (options.vulkan_semantics)
619 SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
620 if (get_execution_model() != ExecutionModelVertex)
621 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
622 require_extension_internal(ext: "GL_OVR_multiview2");
623 }
624
625 // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
626 for (auto &ext : ir.declared_extensions)
627 if (ext == "SPV_NV_fragment_shader_barycentric")
628 barycentric_is_nv = true;
629}
630
631void CompilerGLSL::ray_tracing_khr_fixup_locations()
632{
633 uint32_t location = 0;
634 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
635 // Incoming payload storage can also be used for tracing.
636 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
637 var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
638 return;
639 if (is_hidden_variable(var))
640 return;
641 set_decoration(id: var.self, decoration: DecorationLocation, argument: location++);
642 });
643}
644
645string CompilerGLSL::compile()
646{
647 ir.fixup_reserved_names();
648
649 if (!options.vulkan_semantics)
650 {
651 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
652 backend.nonuniform_qualifier = "";
653 backend.needs_row_major_load_workaround = true;
654 }
655 backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
656 backend.force_gl_in_out_block = true;
657 backend.supports_extensions = true;
658 backend.use_array_constructor = true;
659 backend.workgroup_size_is_hidden = true;
660 backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
661 backend.support_precise_qualifier =
662 (!options.es && options.version >= 400) || (options.es && options.version >= 320);
663
664 if (is_legacy_es())
665 backend.support_case_fallthrough = false;
666
667 // Scan the SPIR-V to find trivial uses of extensions.
668 fixup_anonymous_struct_names();
669 fixup_type_alias();
670 reorder_type_alias();
671 build_function_control_flow_graphs_and_analyze();
672 find_static_extensions();
673 fixup_image_load_store_access();
674 update_active_builtins();
675 analyze_image_and_sampler_usage();
676 analyze_interlocked_resource_usage();
677 if (!inout_color_attachments.empty())
678 emit_inout_fragment_outputs_copy_to_subpass_inputs();
679
680 // Shaders might cast unrelated data to pointers of non-block types.
681 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
682 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
683 analyze_non_block_pointer_types();
684
685 uint32_t pass_count = 0;
686 do
687 {
688 reset(iteration_count: pass_count);
689
690 buffer.reset();
691
692 emit_header();
693 emit_resources();
694 emit_extension_workarounds(model: get_execution_model());
695
696 emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset());
697
698 pass_count++;
699 } while (is_forcing_recompilation());
700
701 // Implement the interlocked wrapper function at the end.
702 // The body was implemented in lieu of main().
703 if (interlocked_is_complex)
704 {
705 statement(ts: "void main()");
706 begin_scope();
707 statement(ts: "// Interlocks were used in a way not compatible with GLSL, this is very slow.");
708 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
709 statement(ts: "spvMainInterlockedBody();");
710 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
711 end_scope();
712 }
713
714 // Entry point in GLSL is always main().
715 get_entry_point().name = "main";
716
717 return buffer.str();
718}
719
720std::string CompilerGLSL::get_partial_source()
721{
722 return buffer.str();
723}
724
725void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
726 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
727{
728 auto &execution = get_entry_point();
729 bool builtin_workgroup = execution.workgroup_size.constant != 0;
730 bool use_local_size_id = !builtin_workgroup && execution.flags.get(bit: ExecutionModeLocalSizeId);
731
732 if (wg_x.id)
733 {
734 if (options.vulkan_semantics)
735 arguments.push_back(t: join(ts: "local_size_x_id = ", ts: wg_x.constant_id));
736 else
737 arguments.push_back(t: join(ts: "local_size_x = ", ts&: get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name));
738 }
739 else if (use_local_size_id && execution.workgroup_size.id_x)
740 arguments.push_back(t: join(ts: "local_size_x = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar()));
741 else
742 arguments.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
743
744 if (wg_y.id)
745 {
746 if (options.vulkan_semantics)
747 arguments.push_back(t: join(ts: "local_size_y_id = ", ts: wg_y.constant_id));
748 else
749 arguments.push_back(t: join(ts: "local_size_y = ", ts&: get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name));
750 }
751 else if (use_local_size_id && execution.workgroup_size.id_y)
752 arguments.push_back(t: join(ts: "local_size_y = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar()));
753 else
754 arguments.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
755
756 if (wg_z.id)
757 {
758 if (options.vulkan_semantics)
759 arguments.push_back(t: join(ts: "local_size_z_id = ", ts: wg_z.constant_id));
760 else
761 arguments.push_back(t: join(ts: "local_size_z = ", ts&: get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name));
762 }
763 else if (use_local_size_id && execution.workgroup_size.id_z)
764 arguments.push_back(t: join(ts: "local_size_z = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar()));
765 else
766 arguments.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
767}
768
769void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
770{
771 if (options.vulkan_semantics)
772 {
773 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
774 require_extension_internal(ext: ShaderSubgroupSupportHelper::get_extension_name(c: khr_extension));
775 }
776 else
777 {
778 if (!shader_subgroup_supporter.is_feature_requested(feature))
779 force_recompile();
780 shader_subgroup_supporter.request_feature(feature);
781 }
782}
783
784void CompilerGLSL::emit_header()
785{
786 auto &execution = get_entry_point();
787 statement(ts: "#version ", ts&: options.version, ts: options.es && options.version > 100 ? " es" : "");
788
789 if (!options.es && options.version < 420)
790 {
791 // Needed for binding = # on UBOs, etc.
792 if (options.enable_420pack_extension)
793 {
794 statement(ts: "#ifdef GL_ARB_shading_language_420pack");
795 statement(ts: "#extension GL_ARB_shading_language_420pack : require");
796 statement(ts: "#endif");
797 }
798 // Needed for: layout(early_fragment_tests) in;
799 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
800 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
801 }
802
803 // Needed for: layout(post_depth_coverage) in;
804 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
805 require_extension_internal(ext: "GL_ARB_post_depth_coverage");
806
807 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
808 bool interlock_used = execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT) ||
809 execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) ||
810 execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) ||
811 execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT);
812
813 if (interlock_used)
814 {
815 if (options.es)
816 {
817 if (options.version < 310)
818 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
819 require_extension_internal(ext: "GL_NV_fragment_shader_interlock");
820 }
821 else
822 {
823 if (options.version < 420)
824 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
825 require_extension_internal(ext: "GL_ARB_fragment_shader_interlock");
826 }
827 }
828
829 for (auto &ext : forced_extensions)
830 {
831 if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
832 {
833 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
834 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
835 statement(ts: "#if defined(GL_AMD_gpu_shader_half_float)");
836 statement(ts: "#extension GL_AMD_gpu_shader_half_float : require");
837 if (!options.vulkan_semantics)
838 {
839 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
840 statement(ts: "#extension GL_NV_gpu_shader5 : require");
841 }
842 else
843 {
844 statement(ts: "#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
845 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
846 }
847 statement(ts: "#else");
848 statement(ts: "#error No extension available for FP16.");
849 statement(ts: "#endif");
850 }
851 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
852 {
853 if (options.vulkan_semantics)
854 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
855 else
856 {
857 statement(ts: "#if defined(GL_AMD_gpu_shader_int16)");
858 statement(ts: "#extension GL_AMD_gpu_shader_int16 : require");
859 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
860 statement(ts: "#extension GL_NV_gpu_shader5 : require");
861 statement(ts: "#else");
862 statement(ts: "#error No extension available for Int16.");
863 statement(ts: "#endif");
864 }
865 }
866 else if (ext == "GL_ARB_post_depth_coverage")
867 {
868 if (options.es)
869 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
870 else
871 {
872 statement(ts: "#if defined(GL_ARB_post_depth_coverge)");
873 statement(ts: "#extension GL_ARB_post_depth_coverage : require");
874 statement(ts: "#else");
875 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
876 statement(ts: "#endif");
877 }
878 }
879 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
880 {
881 // Soft-enable this extension on plain GLSL.
882 statement(ts: "#ifdef ", ts&: ext);
883 statement(ts: "#extension ", ts&: ext, ts: " : enable");
884 statement(ts: "#endif");
885 }
886 else if (ext == "GL_EXT_control_flow_attributes")
887 {
888 // These are just hints so we can conditionally enable and fallback in the shader.
889 statement(ts: "#if defined(GL_EXT_control_flow_attributes)");
890 statement(ts: "#extension GL_EXT_control_flow_attributes : require");
891 statement(ts: "#define SPIRV_CROSS_FLATTEN [[flatten]]");
892 statement(ts: "#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
893 statement(ts: "#define SPIRV_CROSS_UNROLL [[unroll]]");
894 statement(ts: "#define SPIRV_CROSS_LOOP [[dont_unroll]]");
895 statement(ts: "#else");
896 statement(ts: "#define SPIRV_CROSS_FLATTEN");
897 statement(ts: "#define SPIRV_CROSS_BRANCH");
898 statement(ts: "#define SPIRV_CROSS_UNROLL");
899 statement(ts: "#define SPIRV_CROSS_LOOP");
900 statement(ts: "#endif");
901 }
902 else if (ext == "GL_NV_fragment_shader_interlock")
903 {
904 statement(ts: "#extension GL_NV_fragment_shader_interlock : require");
905 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
906 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
907 }
908 else if (ext == "GL_ARB_fragment_shader_interlock")
909 {
910 statement(ts: "#ifdef GL_ARB_fragment_shader_interlock");
911 statement(ts: "#extension GL_ARB_fragment_shader_interlock : enable");
912 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
913 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
914 statement(ts: "#elif defined(GL_INTEL_fragment_shader_ordering)");
915 statement(ts: "#extension GL_INTEL_fragment_shader_ordering : enable");
916 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
917 statement(ts: "#define SPIRV_Cross_endInvocationInterlock()");
918 statement(ts: "#endif");
919 }
920 else
921 statement(ts: "#extension ", ts&: ext, ts: " : require");
922 }
923
924 if (!options.vulkan_semantics)
925 {
926 using Supp = ShaderSubgroupSupportHelper;
927 auto result = shader_subgroup_supporter.resolve();
928
929 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
930 {
931 auto feature = static_cast<Supp::Feature>(feature_index);
932 if (!shader_subgroup_supporter.is_feature_requested(feature))
933 continue;
934
935 auto exts = Supp::get_candidates_for_feature(ft: feature, r: result);
936 if (exts.empty())
937 continue;
938
939 statement(ts: "");
940
941 for (auto &ext : exts)
942 {
943 const char *name = Supp::get_extension_name(c: ext);
944 const char *extra_predicate = Supp::get_extra_required_extension_predicate(c: ext);
945 auto extra_names = Supp::get_extra_required_extension_names(c: ext);
946 statement(ts: &ext != &exts.front() ? "#elif" : "#if", ts: " defined(", ts&: name, ts: ")",
947 ts: (*extra_predicate != '\0' ? " && " : ""), ts&: extra_predicate);
948 for (const auto &e : extra_names)
949 statement(ts: "#extension ", ts: e, ts: " : enable");
950 statement(ts: "#extension ", ts&: name, ts: " : require");
951 }
952
953 if (!Supp::can_feature_be_implemented_without_extensions(feature))
954 {
955 statement(ts: "#else");
956 statement(ts: "#error No extensions available to emulate requested subgroup feature.");
957 }
958
959 statement(ts: "#endif");
960 }
961 }
962
963 for (auto &header : header_lines)
964 statement(ts&: header);
965
966 SmallVector<string> inputs;
967 SmallVector<string> outputs;
968
969 switch (execution.model)
970 {
971 case ExecutionModelVertex:
972 if (options.ovr_multiview_view_count)
973 inputs.push_back(t: join(ts: "num_views = ", ts&: options.ovr_multiview_view_count));
974 break;
975 case ExecutionModelGeometry:
976 if ((execution.flags.get(bit: ExecutionModeInvocations)) && execution.invocations != 1)
977 inputs.push_back(t: join(ts: "invocations = ", ts&: execution.invocations));
978 if (execution.flags.get(bit: ExecutionModeInputPoints))
979 inputs.push_back(t: "points");
980 if (execution.flags.get(bit: ExecutionModeInputLines))
981 inputs.push_back(t: "lines");
982 if (execution.flags.get(bit: ExecutionModeInputLinesAdjacency))
983 inputs.push_back(t: "lines_adjacency");
984 if (execution.flags.get(bit: ExecutionModeTriangles))
985 inputs.push_back(t: "triangles");
986 if (execution.flags.get(bit: ExecutionModeInputTrianglesAdjacency))
987 inputs.push_back(t: "triangles_adjacency");
988
989 if (!execution.geometry_passthrough)
990 {
991 // For passthrough, these are implies and cannot be declared in shader.
992 outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices));
993 if (execution.flags.get(bit: ExecutionModeOutputTriangleStrip))
994 outputs.push_back(t: "triangle_strip");
995 if (execution.flags.get(bit: ExecutionModeOutputPoints))
996 outputs.push_back(t: "points");
997 if (execution.flags.get(bit: ExecutionModeOutputLineStrip))
998 outputs.push_back(t: "line_strip");
999 }
1000 break;
1001
1002 case ExecutionModelTessellationControl:
1003 if (execution.flags.get(bit: ExecutionModeOutputVertices))
1004 outputs.push_back(t: join(ts: "vertices = ", ts&: execution.output_vertices));
1005 break;
1006
1007 case ExecutionModelTessellationEvaluation:
1008 if (execution.flags.get(bit: ExecutionModeQuads))
1009 inputs.push_back(t: "quads");
1010 if (execution.flags.get(bit: ExecutionModeTriangles))
1011 inputs.push_back(t: "triangles");
1012 if (execution.flags.get(bit: ExecutionModeIsolines))
1013 inputs.push_back(t: "isolines");
1014 if (execution.flags.get(bit: ExecutionModePointMode))
1015 inputs.push_back(t: "point_mode");
1016
1017 if (!execution.flags.get(bit: ExecutionModeIsolines))
1018 {
1019 if (execution.flags.get(bit: ExecutionModeVertexOrderCw))
1020 inputs.push_back(t: "cw");
1021 if (execution.flags.get(bit: ExecutionModeVertexOrderCcw))
1022 inputs.push_back(t: "ccw");
1023 }
1024
1025 if (execution.flags.get(bit: ExecutionModeSpacingFractionalEven))
1026 inputs.push_back(t: "fractional_even_spacing");
1027 if (execution.flags.get(bit: ExecutionModeSpacingFractionalOdd))
1028 inputs.push_back(t: "fractional_odd_spacing");
1029 if (execution.flags.get(bit: ExecutionModeSpacingEqual))
1030 inputs.push_back(t: "equal_spacing");
1031 break;
1032
1033 case ExecutionModelGLCompute:
1034 {
1035 if (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId))
1036 {
1037 SpecializationConstant wg_x, wg_y, wg_z;
1038 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
1039
1040 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1041 // declarations before we can emit the work group size.
1042 if (options.vulkan_semantics ||
1043 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1044 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
1045 }
1046 else
1047 {
1048 inputs.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
1049 inputs.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
1050 inputs.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
1051 }
1052 break;
1053 }
1054
1055 case ExecutionModelFragment:
1056 if (options.es)
1057 {
1058 switch (options.fragment.default_float_precision)
1059 {
1060 case Options::Lowp:
1061 statement(ts: "precision lowp float;");
1062 break;
1063
1064 case Options::Mediump:
1065 statement(ts: "precision mediump float;");
1066 break;
1067
1068 case Options::Highp:
1069 statement(ts: "precision highp float;");
1070 break;
1071
1072 default:
1073 break;
1074 }
1075
1076 switch (options.fragment.default_int_precision)
1077 {
1078 case Options::Lowp:
1079 statement(ts: "precision lowp int;");
1080 break;
1081
1082 case Options::Mediump:
1083 statement(ts: "precision mediump int;");
1084 break;
1085
1086 case Options::Highp:
1087 statement(ts: "precision highp int;");
1088 break;
1089
1090 default:
1091 break;
1092 }
1093 }
1094
1095 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
1096 inputs.push_back(t: "early_fragment_tests");
1097 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
1098 inputs.push_back(t: "post_depth_coverage");
1099
1100 if (interlock_used)
1101 statement(ts: "#if defined(GL_ARB_fragment_shader_interlock)");
1102
1103 if (execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT))
1104 statement(ts: "layout(pixel_interlock_ordered) in;");
1105 else if (execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT))
1106 statement(ts: "layout(pixel_interlock_unordered) in;");
1107 else if (execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT))
1108 statement(ts: "layout(sample_interlock_ordered) in;");
1109 else if (execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT))
1110 statement(ts: "layout(sample_interlock_unordered) in;");
1111
1112 if (interlock_used)
1113 {
1114 statement(ts: "#elif !defined(GL_INTEL_fragment_shader_ordering)");
1115 statement(ts: "#error Fragment Shader Interlock/Ordering extension missing!");
1116 statement(ts: "#endif");
1117 }
1118
1119 if (!options.es && execution.flags.get(bit: ExecutionModeDepthGreater))
1120 statement(ts: "layout(depth_greater) out float gl_FragDepth;");
1121 else if (!options.es && execution.flags.get(bit: ExecutionModeDepthLess))
1122 statement(ts: "layout(depth_less) out float gl_FragDepth;");
1123
1124 break;
1125
1126 default:
1127 break;
1128 }
1129
1130 for (auto &cap : ir.declared_capabilities)
1131 if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1132 statement(ts: "layout(primitive_culling);");
1133
1134 if (!inputs.empty())
1135 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
1136 if (!outputs.empty())
1137 statement(ts: "layout(", ts: merge(list: outputs), ts: ") out;");
1138
1139 statement(ts: "");
1140}
1141
1142bool CompilerGLSL::type_is_empty(const SPIRType &type)
1143{
1144 return type.basetype == SPIRType::Struct && type.member_types.empty();
1145}
1146
1147void CompilerGLSL::emit_struct(SPIRType &type)
1148{
1149 // Struct types can be stamped out multiple times
1150 // with just different offsets, matrix layouts, etc ...
1151 // Type-punning with these types is legal, which complicates things
1152 // when we are storing struct and array types in an SSBO for example.
1153 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1154 if (type.type_alias != TypeID(0) &&
1155 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
1156 return;
1157
1158 add_resource_name(id: type.self);
1159 auto name = type_to_glsl(type);
1160
1161 statement(ts: !backend.explicit_struct_type ? "struct " : "", ts&: name);
1162 begin_scope();
1163
1164 type.member_name_cache.clear();
1165
1166 uint32_t i = 0;
1167 bool emitted = false;
1168 for (auto &member : type.member_types)
1169 {
1170 add_member_name(type, name: i);
1171 emit_struct_member(type, member_type_id: member, index: i);
1172 i++;
1173 emitted = true;
1174 }
1175
1176 // Don't declare empty structs in GLSL, this is not allowed.
1177 if (type_is_empty(type) && !backend.supports_empty_struct)
1178 {
1179 statement(ts: "int empty_struct_member;");
1180 emitted = true;
1181 }
1182
1183 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationPaddingTarget))
1184 emit_struct_padding_target(type);
1185
1186 end_scope_decl();
1187
1188 if (emitted)
1189 statement(ts: "");
1190}
1191
1192string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1193{
1194 string res;
1195 //if (flags & (1ull << DecorationSmooth))
1196 // res += "smooth ";
1197 if (flags.get(bit: DecorationFlat))
1198 res += "flat ";
1199 if (flags.get(bit: DecorationNoPerspective))
1200 res += "noperspective ";
1201 if (flags.get(bit: DecorationCentroid))
1202 res += "centroid ";
1203 if (flags.get(bit: DecorationPatch))
1204 res += "patch ";
1205 if (flags.get(bit: DecorationSample))
1206 res += "sample ";
1207 if (flags.get(bit: DecorationInvariant))
1208 res += "invariant ";
1209
1210 if (flags.get(bit: DecorationExplicitInterpAMD))
1211 {
1212 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
1213 res += "__explicitInterpAMD ";
1214 }
1215
1216 if (flags.get(bit: DecorationPerVertexKHR))
1217 {
1218 if (options.es && options.version < 320)
1219 SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
1220 else if (!options.es && options.version < 450)
1221 SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
1222
1223 if (barycentric_is_nv)
1224 {
1225 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
1226 res += "pervertexNV ";
1227 }
1228 else
1229 {
1230 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
1231 res += "pervertexEXT ";
1232 }
1233 }
1234
1235 return res;
1236}
1237
1238string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1239{
1240 if (is_legacy())
1241 return "";
1242
1243 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1244 if (!is_block)
1245 return "";
1246
1247 auto &memb = ir.meta[type.self].members;
1248 if (index >= memb.size())
1249 return "";
1250 auto &dec = memb[index];
1251
1252 SmallVector<string> attr;
1253
1254 if (has_member_decoration(id: type.self, index, decoration: DecorationPassthroughNV))
1255 attr.push_back(t: "passthrough");
1256
1257 // We can only apply layouts on members in block interfaces.
1258 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1259 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1260 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1261 //
1262 // We would like to go from (SPIR-V style):
1263 //
1264 // struct Foo { layout(row_major) mat4 matrix; };
1265 // buffer UBO { Foo foo; };
1266 //
1267 // to
1268 //
1269 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1270 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1271 auto flags = combined_decoration_for_member(type, index);
1272
1273 if (flags.get(bit: DecorationRowMajor))
1274 attr.push_back(t: "row_major");
1275 // We don't emit any global layouts, so column_major is default.
1276 //if (flags & (1ull << DecorationColMajor))
1277 // attr.push_back("column_major");
1278
1279 if (dec.decoration_flags.get(bit: DecorationLocation) && can_use_io_location(storage: type.storage, block: true))
1280 attr.push_back(t: join(ts: "location = ", ts&: dec.location));
1281
1282 // Can only declare component if we can declare location.
1283 if (dec.decoration_flags.get(bit: DecorationComponent) && can_use_io_location(storage: type.storage, block: true))
1284 {
1285 if (!options.es)
1286 {
1287 if (options.version < 440 && options.version >= 140)
1288 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
1289 else if (options.version < 140)
1290 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1291 attr.push_back(t: join(ts: "component = ", ts&: dec.component));
1292 }
1293 else
1294 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1295 }
1296
1297 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1298 // This is only done selectively in GLSL as needed.
1299 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) &&
1300 dec.decoration_flags.get(bit: DecorationOffset))
1301 attr.push_back(t: join(ts: "offset = ", ts&: dec.offset));
1302 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(bit: DecorationOffset))
1303 attr.push_back(t: join(ts: "xfb_offset = ", ts&: dec.offset));
1304
1305 if (attr.empty())
1306 return "";
1307
1308 string res = "layout(";
1309 res += merge(list: attr);
1310 res += ") ";
1311 return res;
1312}
1313
1314const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1315{
1316 if (options.es && is_desktop_only_format(format))
1317 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1318
1319 switch (format)
1320 {
1321 case ImageFormatRgba32f:
1322 return "rgba32f";
1323 case ImageFormatRgba16f:
1324 return "rgba16f";
1325 case ImageFormatR32f:
1326 return "r32f";
1327 case ImageFormatRgba8:
1328 return "rgba8";
1329 case ImageFormatRgba8Snorm:
1330 return "rgba8_snorm";
1331 case ImageFormatRg32f:
1332 return "rg32f";
1333 case ImageFormatRg16f:
1334 return "rg16f";
1335 case ImageFormatRgba32i:
1336 return "rgba32i";
1337 case ImageFormatRgba16i:
1338 return "rgba16i";
1339 case ImageFormatR32i:
1340 return "r32i";
1341 case ImageFormatRgba8i:
1342 return "rgba8i";
1343 case ImageFormatRg32i:
1344 return "rg32i";
1345 case ImageFormatRg16i:
1346 return "rg16i";
1347 case ImageFormatRgba32ui:
1348 return "rgba32ui";
1349 case ImageFormatRgba16ui:
1350 return "rgba16ui";
1351 case ImageFormatR32ui:
1352 return "r32ui";
1353 case ImageFormatRgba8ui:
1354 return "rgba8ui";
1355 case ImageFormatRg32ui:
1356 return "rg32ui";
1357 case ImageFormatRg16ui:
1358 return "rg16ui";
1359 case ImageFormatR11fG11fB10f:
1360 return "r11f_g11f_b10f";
1361 case ImageFormatR16f:
1362 return "r16f";
1363 case ImageFormatRgb10A2:
1364 return "rgb10_a2";
1365 case ImageFormatR8:
1366 return "r8";
1367 case ImageFormatRg8:
1368 return "rg8";
1369 case ImageFormatR16:
1370 return "r16";
1371 case ImageFormatRg16:
1372 return "rg16";
1373 case ImageFormatRgba16:
1374 return "rgba16";
1375 case ImageFormatR16Snorm:
1376 return "r16_snorm";
1377 case ImageFormatRg16Snorm:
1378 return "rg16_snorm";
1379 case ImageFormatRgba16Snorm:
1380 return "rgba16_snorm";
1381 case ImageFormatR8Snorm:
1382 return "r8_snorm";
1383 case ImageFormatRg8Snorm:
1384 return "rg8_snorm";
1385 case ImageFormatR8ui:
1386 return "r8ui";
1387 case ImageFormatRg8ui:
1388 return "rg8ui";
1389 case ImageFormatR16ui:
1390 return "r16ui";
1391 case ImageFormatRgb10a2ui:
1392 return "rgb10_a2ui";
1393 case ImageFormatR8i:
1394 return "r8i";
1395 case ImageFormatRg8i:
1396 return "rg8i";
1397 case ImageFormatR16i:
1398 return "r16i";
1399 default:
1400 case ImageFormatUnknown:
1401 return nullptr;
1402 }
1403}
1404
1405uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1406{
1407 switch (type.basetype)
1408 {
1409 case SPIRType::Double:
1410 case SPIRType::Int64:
1411 case SPIRType::UInt64:
1412 return 8;
1413 case SPIRType::Float:
1414 case SPIRType::Int:
1415 case SPIRType::UInt:
1416 return 4;
1417 case SPIRType::Half:
1418 case SPIRType::Short:
1419 case SPIRType::UShort:
1420 return 2;
1421 case SPIRType::SByte:
1422 case SPIRType::UByte:
1423 return 1;
1424
1425 default:
1426 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1427 }
1428}
1429
1430uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1431 BufferPackingStandard packing)
1432{
1433 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1434 // and is 64-bit.
1435 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1436 {
1437 if (!type.pointer)
1438 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1439
1440 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1441 {
1442 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1443 return 16;
1444 else
1445 return 8;
1446 }
1447 else
1448 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1449 }
1450
1451 if (!type.array.empty())
1452 {
1453 uint32_t minimum_alignment = 1;
1454 if (packing_is_vec4_padded(packing))
1455 minimum_alignment = 16;
1456
1457 auto *tmp = &get<SPIRType>(id: type.parent_type);
1458 while (!tmp->array.empty())
1459 tmp = &get<SPIRType>(id: tmp->parent_type);
1460
1461 // Get the alignment of the base type, then maybe round up.
1462 return max(a: minimum_alignment, b: type_to_packed_alignment(type: *tmp, flags, packing));
1463 }
1464
1465 if (type.basetype == SPIRType::Struct)
1466 {
1467 // Rule 9. Structs alignments are maximum alignment of its members.
1468 uint32_t alignment = 1;
1469 for (uint32_t i = 0; i < type.member_types.size(); i++)
1470 {
1471 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1472 alignment =
1473 max(a: alignment, b: type_to_packed_alignment(type: get<SPIRType>(id: type.member_types[i]), flags: member_flags, packing));
1474 }
1475
1476 // In std140, struct alignment is rounded up to 16.
1477 if (packing_is_vec4_padded(packing))
1478 alignment = max(a: alignment, b: 16u);
1479
1480 return alignment;
1481 }
1482 else
1483 {
1484 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1485
1486 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1487 if (packing_is_scalar(packing))
1488 return base_alignment;
1489
1490 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1491 // a vec4, this is handled outside since that part knows our current offset.
1492 if (type.columns == 1 && packing_is_hlsl(packing))
1493 return base_alignment;
1494
1495 // From 7.6.2.2 in GL 4.5 core spec.
1496 // Rule 1
1497 if (type.vecsize == 1 && type.columns == 1)
1498 return base_alignment;
1499
1500 // Rule 2
1501 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1502 return type.vecsize * base_alignment;
1503
1504 // Rule 3
1505 if (type.vecsize == 3 && type.columns == 1)
1506 return 4 * base_alignment;
1507
1508 // Rule 4 implied. Alignment does not change in std430.
1509
1510 // Rule 5. Column-major matrices are stored as arrays of
1511 // vectors.
1512 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1513 {
1514 if (packing_is_vec4_padded(packing))
1515 return 4 * base_alignment;
1516 else if (type.vecsize == 3)
1517 return 4 * base_alignment;
1518 else
1519 return type.vecsize * base_alignment;
1520 }
1521
1522 // Rule 6 implied.
1523
1524 // Rule 7.
1525 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1526 {
1527 if (packing_is_vec4_padded(packing))
1528 return 4 * base_alignment;
1529 else if (type.columns == 3)
1530 return 4 * base_alignment;
1531 else
1532 return type.columns * base_alignment;
1533 }
1534
1535 // Rule 8 implied.
1536 }
1537
1538 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1539}
1540
1541uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1542 BufferPackingStandard packing)
1543{
1544 // Array stride is equal to aligned size of the underlying type.
1545 uint32_t parent = type.parent_type;
1546 assert(parent);
1547
1548 auto &tmp = get<SPIRType>(id: parent);
1549
1550 uint32_t size = type_to_packed_size(type: tmp, flags, packing);
1551 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1552 return (size + alignment - 1) & ~(alignment - 1);
1553}
1554
1555uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1556{
1557 if (!type.array.empty())
1558 {
1559 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1560
1561 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1562 // so that it is possible to pack other vectors into the last element.
1563 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1564 packed_size -= (4 - type.vecsize) * (type.width / 8);
1565
1566 return packed_size;
1567 }
1568
1569 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1570 // and is 64-bit.
1571 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1572 {
1573 if (!type.pointer)
1574 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1575
1576 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1577 return 8;
1578 else
1579 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1580 }
1581
1582 uint32_t size = 0;
1583
1584 if (type.basetype == SPIRType::Struct)
1585 {
1586 uint32_t pad_alignment = 1;
1587
1588 for (uint32_t i = 0; i < type.member_types.size(); i++)
1589 {
1590 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1591 auto &member_type = get<SPIRType>(id: type.member_types[i]);
1592
1593 uint32_t packed_alignment = type_to_packed_alignment(type: member_type, flags: member_flags, packing);
1594 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1595
1596 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1597 // GL 4.5 spec, 7.6.2.2.
1598 if (member_type.basetype == SPIRType::Struct)
1599 pad_alignment = packed_alignment;
1600 else
1601 pad_alignment = 1;
1602
1603 size = (size + alignment - 1) & ~(alignment - 1);
1604 size += type_to_packed_size(type: member_type, flags: member_flags, packing);
1605 }
1606 }
1607 else
1608 {
1609 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1610
1611 if (packing_is_scalar(packing))
1612 {
1613 size = type.vecsize * type.columns * base_alignment;
1614 }
1615 else
1616 {
1617 if (type.columns == 1)
1618 size = type.vecsize * base_alignment;
1619
1620 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1621 {
1622 if (packing_is_vec4_padded(packing))
1623 size = type.columns * 4 * base_alignment;
1624 else if (type.vecsize == 3)
1625 size = type.columns * 4 * base_alignment;
1626 else
1627 size = type.columns * type.vecsize * base_alignment;
1628 }
1629
1630 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1631 {
1632 if (packing_is_vec4_padded(packing))
1633 size = type.vecsize * 4 * base_alignment;
1634 else if (type.columns == 3)
1635 size = type.vecsize * 4 * base_alignment;
1636 else
1637 size = type.vecsize * type.columns * base_alignment;
1638 }
1639
1640 // For matrices in HLSL, the last element has a size which depends on its vector size,
1641 // so that it is possible to pack other vectors into the last element.
1642 if (packing_is_hlsl(packing) && type.columns > 1)
1643 size -= (4 - type.vecsize) * (type.width / 8);
1644 }
1645 }
1646
1647 return size;
1648}
1649
1650bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1651 uint32_t *failed_validation_index, uint32_t start_offset,
1652 uint32_t end_offset)
1653{
1654 // This is very tricky and error prone, but try to be exhaustive and correct here.
1655 // SPIR-V doesn't directly say if we're using std430 or std140.
1656 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1657 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1658 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1659 //
1660 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1661 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1662 //
1663 // The only two differences between std140 and std430 are related to padding alignment/array stride
1664 // in arrays and structs. In std140 they take minimum vec4 alignment.
1665 // std430 only removes the vec4 requirement.
1666
1667 uint32_t offset = 0;
1668 uint32_t pad_alignment = 1;
1669
1670 bool is_top_level_block =
1671 has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1672
1673 for (uint32_t i = 0; i < type.member_types.size(); i++)
1674 {
1675 auto &memb_type = get<SPIRType>(id: type.member_types[i]);
1676 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1677
1678 // Verify alignment rules.
1679 uint32_t packed_alignment = type_to_packed_alignment(type: memb_type, flags: member_flags, packing);
1680
1681 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1682 // layout(constant_id = 0) const int s = 10;
1683 // const int S = s + 5; // SpecConstantOp
1684 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1685 // we would need full implementation of compile-time constant folding. :(
1686 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1687 // for our analysis (e.g. unsized arrays).
1688 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1689 // Querying size of this member will fail, so just don't call it unless we have to.
1690 //
1691 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1692 bool member_can_be_unsized =
1693 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1694
1695 uint32_t packed_size = 0;
1696 if (!member_can_be_unsized || packing_is_hlsl(packing))
1697 packed_size = type_to_packed_size(type: memb_type, flags: member_flags, packing);
1698
1699 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1700 if (packing_is_hlsl(packing))
1701 {
1702 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1703 uint32_t begin_word = offset / 16;
1704 uint32_t end_word = (offset + packed_size - 1) / 16;
1705 if (begin_word != end_word)
1706 packed_alignment = max(a: packed_alignment, b: 16u);
1707 }
1708
1709 uint32_t actual_offset = type_struct_member_offset(type, index: i);
1710 // Field is not in the specified range anymore and we can ignore any further fields.
1711 if (actual_offset >= end_offset)
1712 break;
1713
1714 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1715 offset = (offset + alignment - 1) & ~(alignment - 1);
1716
1717 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1718 // GL 4.5 spec, 7.6.2.2.
1719 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1720 pad_alignment = packed_alignment;
1721 else
1722 pad_alignment = 1;
1723
1724 // Only care about packing if we are in the given range
1725 if (actual_offset >= start_offset)
1726 {
1727 // We only care about offsets in std140, std430, etc ...
1728 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1729 if (!packing_has_flexible_offset(packing))
1730 {
1731 if (actual_offset != offset) // This cannot be the packing we're looking for.
1732 {
1733 if (failed_validation_index)
1734 *failed_validation_index = i;
1735 return false;
1736 }
1737 }
1738 else if ((actual_offset & (alignment - 1)) != 0)
1739 {
1740 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1741 if (failed_validation_index)
1742 *failed_validation_index = i;
1743 return false;
1744 }
1745
1746 // Verify array stride rules.
1747 if (!memb_type.array.empty() && type_to_packed_array_stride(type: memb_type, flags: member_flags, packing) !=
1748 type_struct_member_array_stride(type, index: i))
1749 {
1750 if (failed_validation_index)
1751 *failed_validation_index = i;
1752 return false;
1753 }
1754
1755 // Verify that sub-structs also follow packing rules.
1756 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1757 auto substruct_packing = packing_to_substruct_packing(packing);
1758
1759 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1760 !buffer_is_packing_standard(type: memb_type, packing: substruct_packing))
1761 {
1762 if (failed_validation_index)
1763 *failed_validation_index = i;
1764 return false;
1765 }
1766 }
1767
1768 // Bump size.
1769 offset = actual_offset + packed_size;
1770 }
1771
1772 return true;
1773}
1774
1775bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1776{
1777 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1778 // Be very explicit here about how to solve the issue.
1779 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1780 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1781 {
1782 uint32_t minimum_desktop_version = block ? 440 : 410;
1783 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1784
1785 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1786 return false;
1787 else if (options.es && options.version < 310)
1788 return false;
1789 }
1790
1791 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1792 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1793 {
1794 if (options.es && options.version < 300)
1795 return false;
1796 else if (!options.es && options.version < 330)
1797 return false;
1798 }
1799
1800 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1801 {
1802 if (options.es && options.version < 310)
1803 return false;
1804 else if (!options.es && options.version < 430)
1805 return false;
1806 }
1807
1808 return true;
1809}
1810
1811string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1812{
1813 // FIXME: Come up with a better solution for when to disable layouts.
1814 // Having layouts depend on extensions as well as which types
1815 // of layouts are used. For now, the simple solution is to just disable
1816 // layouts for legacy versions.
1817 if (is_legacy())
1818 return "";
1819
1820 if (subpass_input_is_framebuffer_fetch(id: var.self))
1821 return "";
1822
1823 SmallVector<string> attr;
1824
1825 auto &type = get<SPIRType>(id: var.basetype);
1826 auto &flags = get_decoration_bitset(id: var.self);
1827 auto &typeflags = get_decoration_bitset(id: type.self);
1828
1829 if (flags.get(bit: DecorationPassthroughNV))
1830 attr.push_back(t: "passthrough");
1831
1832 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1833 attr.push_back(t: "push_constant");
1834 else if (var.storage == StorageClassShaderRecordBufferKHR)
1835 attr.push_back(t: ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1836
1837 if (flags.get(bit: DecorationRowMajor))
1838 attr.push_back(t: "row_major");
1839 if (flags.get(bit: DecorationColMajor))
1840 attr.push_back(t: "column_major");
1841
1842 if (options.vulkan_semantics)
1843 {
1844 if (flags.get(bit: DecorationInputAttachmentIndex))
1845 attr.push_back(t: join(ts: "input_attachment_index = ", ts: get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex)));
1846 }
1847
1848 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
1849 if (flags.get(bit: DecorationLocation) && can_use_io_location(storage: var.storage, block: is_block))
1850 {
1851 Bitset combined_decoration;
1852 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1853 combined_decoration.merge_or(other: combined_decoration_for_member(type, index: i));
1854
1855 // If our members have location decorations, we don't need to
1856 // emit location decorations at the top as well (looks weird).
1857 if (!combined_decoration.get(bit: DecorationLocation))
1858 attr.push_back(t: join(ts: "location = ", ts: get_decoration(id: var.self, decoration: DecorationLocation)));
1859 }
1860
1861 if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1862 location_is_non_coherent_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)))
1863 {
1864 attr.push_back(t: "noncoherent");
1865 }
1866
1867 // Transform feedback
1868 bool uses_enhanced_layouts = false;
1869 if (is_block && var.storage == StorageClassOutput)
1870 {
1871 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1872 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1873 // is the xfb_offset.
1874 uint32_t member_count = uint32_t(type.member_types.size());
1875 bool have_xfb_buffer_stride = false;
1876 bool have_any_xfb_offset = false;
1877 bool have_geom_stream = false;
1878 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1879
1880 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride))
1881 {
1882 have_xfb_buffer_stride = true;
1883 xfb_buffer = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
1884 xfb_stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
1885 }
1886
1887 if (flags.get(bit: DecorationStream))
1888 {
1889 have_geom_stream = true;
1890 geom_stream = get_decoration(id: var.self, decoration: DecorationStream);
1891 }
1892
1893 // Verify that none of the members violate our assumption.
1894 for (uint32_t i = 0; i < member_count; i++)
1895 {
1896 if (has_member_decoration(id: type.self, index: i, decoration: DecorationStream))
1897 {
1898 uint32_t member_geom_stream = get_member_decoration(id: type.self, index: i, decoration: DecorationStream);
1899 if (have_geom_stream && member_geom_stream != geom_stream)
1900 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1901 have_geom_stream = true;
1902 geom_stream = member_geom_stream;
1903 }
1904
1905 // Only members with an Offset decoration participate in XFB.
1906 if (!has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
1907 continue;
1908 have_any_xfb_offset = true;
1909
1910 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer))
1911 {
1912 uint32_t buffer_index = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer);
1913 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1914 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1915 have_xfb_buffer_stride = true;
1916 xfb_buffer = buffer_index;
1917 }
1918
1919 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride))
1920 {
1921 uint32_t stride = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride);
1922 if (have_xfb_buffer_stride && stride != xfb_stride)
1923 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1924 have_xfb_buffer_stride = true;
1925 xfb_stride = stride;
1926 }
1927 }
1928
1929 if (have_xfb_buffer_stride && have_any_xfb_offset)
1930 {
1931 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer));
1932 attr.push_back(t: join(ts: "xfb_stride = ", ts&: xfb_stride));
1933 uses_enhanced_layouts = true;
1934 }
1935
1936 if (have_geom_stream)
1937 {
1938 if (get_execution_model() != ExecutionModelGeometry)
1939 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1940 if (options.es)
1941 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1942 if (options.version < 400)
1943 require_extension_internal(ext: "GL_ARB_transform_feedback3");
1944 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
1945 }
1946 }
1947 else if (var.storage == StorageClassOutput)
1948 {
1949 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride) && flags.get(bit: DecorationOffset))
1950 {
1951 // XFB for standalone variables, we can emit all decorations.
1952 attr.push_back(t: join(ts: "xfb_buffer = ", ts: get_decoration(id: var.self, decoration: DecorationXfbBuffer)));
1953 attr.push_back(t: join(ts: "xfb_stride = ", ts: get_decoration(id: var.self, decoration: DecorationXfbStride)));
1954 attr.push_back(t: join(ts: "xfb_offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
1955 uses_enhanced_layouts = true;
1956 }
1957
1958 if (flags.get(bit: DecorationStream))
1959 {
1960 if (get_execution_model() != ExecutionModelGeometry)
1961 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1962 if (options.es)
1963 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1964 if (options.version < 400)
1965 require_extension_internal(ext: "GL_ARB_transform_feedback3");
1966 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
1967 }
1968 }
1969
1970 // Can only declare Component if we can declare location.
1971 if (flags.get(bit: DecorationComponent) && can_use_io_location(storage: var.storage, block: is_block))
1972 {
1973 uses_enhanced_layouts = true;
1974 attr.push_back(t: join(ts: "component = ", ts: get_decoration(id: var.self, decoration: DecorationComponent)));
1975 }
1976
1977 if (uses_enhanced_layouts)
1978 {
1979 if (!options.es)
1980 {
1981 if (options.version < 440 && options.version >= 140)
1982 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
1983 else if (options.version < 140)
1984 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1985 if (!options.es && options.version < 440)
1986 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
1987 }
1988 else if (options.es)
1989 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1990 }
1991
1992 if (flags.get(bit: DecorationIndex))
1993 attr.push_back(t: join(ts: "index = ", ts: get_decoration(id: var.self, decoration: DecorationIndex)));
1994
1995 // Do not emit set = decoration in regular GLSL output, but
1996 // we need to preserve it in Vulkan GLSL mode.
1997 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1998 {
1999 if (flags.get(bit: DecorationDescriptorSet) && options.vulkan_semantics)
2000 attr.push_back(t: join(ts: "set = ", ts: get_decoration(id: var.self, decoration: DecorationDescriptorSet)));
2001 }
2002
2003 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
2004 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2005 (var.storage == StorageClassUniform && typeflags.get(bit: DecorationBufferBlock));
2006 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
2007 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(bit: DecorationBlock);
2008
2009 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
2010 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
2011
2012 // pretend no UBOs when options say so
2013 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
2014 can_use_buffer_blocks = false;
2015
2016 bool can_use_binding;
2017 if (options.es)
2018 can_use_binding = options.version >= 310;
2019 else
2020 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
2021
2022 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
2023 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
2024 can_use_binding = false;
2025
2026 if (var.storage == StorageClassShaderRecordBufferKHR)
2027 can_use_binding = false;
2028
2029 if (can_use_binding && flags.get(bit: DecorationBinding))
2030 attr.push_back(t: join(ts: "binding = ", ts: get_decoration(id: var.self, decoration: DecorationBinding)));
2031
2032 if (var.storage != StorageClassOutput && flags.get(bit: DecorationOffset))
2033 attr.push_back(t: join(ts: "offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
2034
2035 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
2036 // If SPIR-V does not comply with either layout, we cannot really work around it.
2037 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
2038 {
2039 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: false));
2040 }
2041 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
2042 {
2043 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true));
2044 }
2045
2046 // For images, the type itself adds a layout qualifer.
2047 // Only emit the format for storage images.
2048 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2049 {
2050 const char *fmt = format_to_glsl(format: type.image.format);
2051 if (fmt)
2052 attr.push_back(t: fmt);
2053 }
2054
2055 if (attr.empty())
2056 return "";
2057
2058 string res = "layout(";
2059 res += merge(list: attr);
2060 res += ") ";
2061 return res;
2062}
2063
2064string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
2065{
2066 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, packing: BufferPackingStd430))
2067 return "std430";
2068 else if (buffer_is_packing_standard(type, packing: BufferPackingStd140))
2069 return "std140";
2070 else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalar))
2071 {
2072 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2073 return "scalar";
2074 }
2075 else if (support_std430_without_scalar_layout &&
2076 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2077 {
2078 if (options.es && !options.vulkan_semantics)
2079 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2080 "not support GL_ARB_enhanced_layouts.");
2081 if (!options.es && !options.vulkan_semantics && options.version < 440)
2082 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2083
2084 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2085 return "std430";
2086 }
2087 else if (buffer_is_packing_standard(type, packing: BufferPackingStd140EnhancedLayout))
2088 {
2089 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2090 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2091 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2092 if (options.es && !options.vulkan_semantics)
2093 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2094 "not support GL_ARB_enhanced_layouts.");
2095 if (!options.es && !options.vulkan_semantics && options.version < 440)
2096 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2097
2098 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2099 return "std140";
2100 }
2101 else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalarEnhancedLayout))
2102 {
2103 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2104 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2105 return "scalar";
2106 }
2107 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2108 buffer_is_packing_standard(type, packing: BufferPackingStd430))
2109 {
2110 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2111 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2112 return "std430";
2113 }
2114 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2115 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2116 {
2117 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2118 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2119 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2120 return "std430";
2121 }
2122 else
2123 {
2124 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2125 "layouts. You can try flattening this block to support a more flexible layout.");
2126 }
2127}
2128
2129void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2130{
2131 if (flattened_buffer_blocks.count(x: var.self))
2132 emit_buffer_block_flattened(type: var);
2133 else if (options.vulkan_semantics)
2134 emit_push_constant_block_vulkan(var);
2135 else if (options.emit_push_constant_as_uniform_buffer)
2136 emit_buffer_block_native(var);
2137 else
2138 emit_push_constant_block_glsl(var);
2139}
2140
2141void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2142{
2143 emit_buffer_block(type: var);
2144}
2145
2146void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2147{
2148 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2149 auto &type = get<SPIRType>(id: var.basetype);
2150
2151 unset_decoration(id: var.self, decoration: DecorationBinding);
2152 unset_decoration(id: var.self, decoration: DecorationDescriptorSet);
2153
2154#if 0
2155 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2156 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2157 "Remap to location with reflection API first or disable these decorations.");
2158#endif
2159
2160 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2161 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2162 bool block_flag = has_decoration(id: type.self, decoration: DecorationBlock);
2163 unset_decoration(id: type.self, decoration: DecorationBlock);
2164
2165 emit_struct(type);
2166
2167 if (block_flag)
2168 set_decoration(id: type.self, decoration: DecorationBlock);
2169
2170 emit_uniform(var);
2171 statement(ts: "");
2172}
2173
2174void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2175{
2176 auto &type = get<SPIRType>(id: var.basetype);
2177 bool ubo_block = var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock);
2178
2179 if (flattened_buffer_blocks.count(x: var.self))
2180 emit_buffer_block_flattened(type: var);
2181 else if (is_legacy() || (!options.es && options.version == 130) ||
2182 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2183 emit_buffer_block_legacy(var);
2184 else
2185 emit_buffer_block_native(var);
2186}
2187
2188void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2189{
2190 auto &type = get<SPIRType>(id: var.basetype);
2191 bool ssbo = var.storage == StorageClassStorageBuffer ||
2192 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2193 if (ssbo)
2194 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2195
2196 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2197 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2198 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2199 bool block_flag = block_flags.get(bit: DecorationBlock);
2200 block_flags.clear(bit: DecorationBlock);
2201 emit_struct(type);
2202 if (block_flag)
2203 block_flags.set(DecorationBlock);
2204 emit_uniform(var);
2205 statement(ts: "");
2206}
2207
2208void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
2209{
2210 auto &type = get<SPIRType>(id: type_id);
2211 string buffer_name;
2212
2213 if (forward_declaration)
2214 {
2215 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2216 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2217 // The names must match up.
2218 buffer_name = to_name(id: type.self, allow_alias: false);
2219
2220 // Shaders never use the block by interface name, so we don't
2221 // have to track this other than updating name caches.
2222 // If we have a collision for any reason, just fallback immediately.
2223 if (ir.meta[type.self].decoration.alias.empty() ||
2224 block_ssbo_names.find(x: buffer_name) != end(cont&: block_ssbo_names) ||
2225 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2226 {
2227 buffer_name = join(ts: "_", ts&: type.self);
2228 }
2229
2230 // Make sure we get something unique for both global name scope and block name scope.
2231 // See GLSL 4.5 spec: section 4.3.9 for details.
2232 add_variable(variables_primary&: block_ssbo_names, variables_secondary: resource_names, name&: buffer_name);
2233
2234 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2235 // This cannot conflict with anything else, so we're safe now.
2236 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2237 if (buffer_name.empty())
2238 buffer_name = join(ts: "_", ts&: type.self);
2239
2240 block_names.insert(x: buffer_name);
2241 block_ssbo_names.insert(x: buffer_name);
2242
2243 // Ensure we emit the correct name when emitting non-forward pointer type.
2244 ir.meta[type.self].decoration.alias = buffer_name;
2245 }
2246 else if (type.basetype != SPIRType::Struct)
2247 buffer_name = type_to_glsl(type);
2248 else
2249 buffer_name = to_name(id: type.self, allow_alias: false);
2250
2251 if (!forward_declaration)
2252 {
2253 auto itr = physical_storage_type_to_alignment.find(x: type_id);
2254 uint32_t alignment = 0;
2255 if (itr != physical_storage_type_to_alignment.end())
2256 alignment = itr->second.alignment;
2257
2258 if (type.basetype == SPIRType::Struct)
2259 {
2260 SmallVector<std::string> attributes;
2261 attributes.push_back(t: "buffer_reference");
2262 if (alignment)
2263 attributes.push_back(t: join(ts: "buffer_reference_align = ", ts&: alignment));
2264 attributes.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true));
2265
2266 auto flags = ir.get_buffer_block_type_flags(type);
2267 string decorations;
2268 if (flags.get(bit: DecorationRestrict))
2269 decorations += " restrict";
2270 if (flags.get(bit: DecorationCoherent))
2271 decorations += " coherent";
2272 if (flags.get(bit: DecorationNonReadable))
2273 decorations += " writeonly";
2274 if (flags.get(bit: DecorationNonWritable))
2275 decorations += " readonly";
2276
2277 statement(ts: "layout(", ts: merge(list: attributes), ts: ")", ts&: decorations, ts: " buffer ", ts&: buffer_name);
2278 }
2279 else if (alignment)
2280 statement(ts: "layout(buffer_reference, buffer_reference_align = ", ts&: alignment, ts: ") buffer ", ts&: buffer_name);
2281 else
2282 statement(ts: "layout(buffer_reference) buffer ", ts&: buffer_name);
2283
2284 begin_scope();
2285
2286 if (type.basetype == SPIRType::Struct)
2287 {
2288 type.member_name_cache.clear();
2289
2290 uint32_t i = 0;
2291 for (auto &member : type.member_types)
2292 {
2293 add_member_name(type, name: i);
2294 emit_struct_member(type, member_type_id: member, index: i);
2295 i++;
2296 }
2297 }
2298 else
2299 {
2300 auto &pointee_type = get_pointee_type(type);
2301 statement(ts: type_to_glsl(type: pointee_type), ts: " value", ts: type_to_array_glsl(type: pointee_type), ts: ";");
2302 }
2303
2304 end_scope_decl();
2305 statement(ts: "");
2306 }
2307 else
2308 {
2309 statement(ts: "layout(buffer_reference) buffer ", ts&: buffer_name, ts: ";");
2310 }
2311}
2312
2313void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2314{
2315 auto &type = get<SPIRType>(id: var.basetype);
2316
2317 Bitset flags = ir.get_buffer_block_flags(var);
2318 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2319 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2320 bool is_restrict = ssbo && flags.get(bit: DecorationRestrict);
2321 bool is_writeonly = ssbo && flags.get(bit: DecorationNonReadable);
2322 bool is_readonly = ssbo && flags.get(bit: DecorationNonWritable);
2323 bool is_coherent = ssbo && flags.get(bit: DecorationCoherent);
2324
2325 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2326 auto buffer_name = to_name(id: type.self, allow_alias: false);
2327
2328 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2329
2330 // Shaders never use the block by interface name, so we don't
2331 // have to track this other than updating name caches.
2332 // If we have a collision for any reason, just fallback immediately.
2333 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(x: buffer_name) != end(cont&: block_namespace) ||
2334 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2335 {
2336 buffer_name = get_block_fallback_name(id: var.self);
2337 }
2338
2339 // Make sure we get something unique for both global name scope and block name scope.
2340 // See GLSL 4.5 spec: section 4.3.9 for details.
2341 add_variable(variables_primary&: block_namespace, variables_secondary: resource_names, name&: buffer_name);
2342
2343 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2344 // This cannot conflict with anything else, so we're safe now.
2345 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2346 if (buffer_name.empty())
2347 buffer_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2348
2349 block_names.insert(x: buffer_name);
2350 block_namespace.insert(x: buffer_name);
2351
2352 // Save for post-reflection later.
2353 declared_block_names[var.self] = buffer_name;
2354
2355 statement(ts: layout_for_variable(var), ts: is_coherent ? "coherent " : "", ts: is_restrict ? "restrict " : "",
2356 ts: is_writeonly ? "writeonly " : "", ts: is_readonly ? "readonly " : "", ts: ssbo ? "buffer " : "uniform ",
2357 ts&: buffer_name);
2358
2359 begin_scope();
2360
2361 type.member_name_cache.clear();
2362
2363 uint32_t i = 0;
2364 for (auto &member : type.member_types)
2365 {
2366 add_member_name(type, name: i);
2367 emit_struct_member(type, member_type_id: member, index: i);
2368 i++;
2369 }
2370
2371 // var.self can be used as a backup name for the block name,
2372 // so we need to make sure we don't disturb the name here on a recompile.
2373 // It will need to be reset if we have to recompile.
2374 preserve_alias_on_reset(id: var.self);
2375 add_resource_name(id: var.self);
2376 end_scope_decl(decl: to_name(id: var.self) + type_to_array_glsl(type));
2377 statement(ts: "");
2378}
2379
2380void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2381{
2382 auto &type = get<SPIRType>(id: var.basetype);
2383
2384 // Block names should never alias.
2385 auto buffer_name = to_name(id: type.self, allow_alias: false);
2386 size_t buffer_size = (get_declared_struct_size(struct_type: type) + 15) / 16;
2387
2388 SPIRType::BaseType basic_type;
2389 if (get_common_basic_type(type, base_type&: basic_type))
2390 {
2391 SPIRType tmp;
2392 tmp.basetype = basic_type;
2393 tmp.vecsize = 4;
2394 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2395 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2396
2397 auto flags = ir.get_buffer_block_flags(var);
2398 statement(ts: "uniform ", ts: flags_to_qualifiers_glsl(type: tmp, flags), ts: type_to_glsl(type: tmp), ts: " ", ts&: buffer_name, ts: "[",
2399 ts&: buffer_size, ts: "];");
2400 }
2401 else
2402 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2403}
2404
2405const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2406{
2407 auto &execution = get_entry_point();
2408
2409 if (subpass_input_is_framebuffer_fetch(id: var.self))
2410 return "";
2411
2412 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2413 {
2414 if (is_legacy() && execution.model == ExecutionModelVertex)
2415 return var.storage == StorageClassInput ? "attribute " : "varying ";
2416 else if (is_legacy() && execution.model == ExecutionModelFragment)
2417 return "varying "; // Fragment outputs are renamed so they never hit this case.
2418 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2419 {
2420 uint32_t loc = get_decoration(id: var.self, decoration: DecorationLocation);
2421 bool is_inout = location_is_framebuffer_fetch(location: loc);
2422 if (is_inout)
2423 return "inout ";
2424 else
2425 return "out ";
2426 }
2427 else
2428 return var.storage == StorageClassInput ? "in " : "out ";
2429 }
2430 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2431 var.storage == StorageClassPushConstant)
2432 {
2433 return "uniform ";
2434 }
2435 else if (var.storage == StorageClassRayPayloadKHR)
2436 {
2437 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2438 }
2439 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2440 {
2441 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2442 }
2443 else if (var.storage == StorageClassHitAttributeKHR)
2444 {
2445 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2446 }
2447 else if (var.storage == StorageClassCallableDataKHR)
2448 {
2449 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2450 }
2451 else if (var.storage == StorageClassIncomingCallableDataKHR)
2452 {
2453 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2454 }
2455
2456 return "";
2457}
2458
2459void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2460 const SmallVector<uint32_t> &indices)
2461{
2462 uint32_t member_type_id = type.self;
2463 const SPIRType *member_type = &type;
2464 const SPIRType *parent_type = nullptr;
2465 auto flattened_name = basename;
2466 for (auto &index : indices)
2467 {
2468 flattened_name += "_";
2469 flattened_name += to_member_name(type: *member_type, index);
2470 parent_type = member_type;
2471 member_type_id = member_type->member_types[index];
2472 member_type = &get<SPIRType>(id: member_type_id);
2473 }
2474
2475 assert(member_type->basetype != SPIRType::Struct);
2476
2477 // We're overriding struct member names, so ensure we do so on the primary type.
2478 if (parent_type->type_alias)
2479 parent_type = &get<SPIRType>(id: parent_type->type_alias);
2480
2481 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2482 // which is not allowed.
2483 ParsedIR::sanitize_underscores(str&: flattened_name);
2484
2485 uint32_t last_index = indices.back();
2486
2487 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2488 // Replace member name while emitting it so it encodes both struct name and member name.
2489 auto backup_name = get_member_name(id: parent_type->self, index: last_index);
2490 auto member_name = to_member_name(type: *parent_type, index: last_index);
2491 set_member_name(id: parent_type->self, index: last_index, name: flattened_name);
2492 emit_struct_member(type: *parent_type, member_type_id, index: last_index, qualifier: qual);
2493 // Restore member name.
2494 set_member_name(id: parent_type->self, index: last_index, name: member_name);
2495}
2496
2497void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2498 const SmallVector<uint32_t> &indices)
2499{
2500 auto sub_indices = indices;
2501 sub_indices.push_back(t: 0);
2502
2503 const SPIRType *member_type = &type;
2504 for (auto &index : indices)
2505 member_type = &get<SPIRType>(id: member_type->member_types[index]);
2506
2507 assert(member_type->basetype == SPIRType::Struct);
2508
2509 if (!member_type->array.empty())
2510 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2511
2512 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2513 {
2514 sub_indices.back() = i;
2515 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
2516 emit_flattened_io_block_struct(basename, type, qual, indices: sub_indices);
2517 else
2518 emit_flattened_io_block_member(basename, type, qual, indices: sub_indices);
2519 }
2520}
2521
2522void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2523{
2524 auto &var_type = get<SPIRType>(id: var.basetype);
2525 if (!var_type.array.empty())
2526 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2527
2528 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2529 // struct declarations for aliased types.
2530 auto &type = var_type.type_alias ? get<SPIRType>(id: var_type.type_alias) : var_type;
2531
2532 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2533 // Emit the members as if they are part of a block to get all qualifiers.
2534 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2535
2536 type.member_name_cache.clear();
2537
2538 SmallVector<uint32_t> member_indices;
2539 member_indices.push_back(t: 0);
2540 auto basename = to_name(id: var.self);
2541
2542 uint32_t i = 0;
2543 for (auto &member : type.member_types)
2544 {
2545 add_member_name(type, name: i);
2546 auto &membertype = get<SPIRType>(id: member);
2547
2548 member_indices.back() = i;
2549 if (membertype.basetype == SPIRType::Struct)
2550 emit_flattened_io_block_struct(basename, type, qual, indices: member_indices);
2551 else
2552 emit_flattened_io_block_member(basename, type, qual, indices: member_indices);
2553 i++;
2554 }
2555
2556 ir.meta[type.self].decoration.decoration_flags = old_flags;
2557
2558 // Treat this variable as fully flattened from now on.
2559 flattened_structs[var.self] = true;
2560}
2561
2562void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2563{
2564 auto &type = get<SPIRType>(id: var.basetype);
2565
2566 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2567 !options.es && options.version < 410)
2568 {
2569 require_extension_internal(ext: "GL_ARB_vertex_attrib_64bit");
2570 }
2571
2572 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2573 bool block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock);
2574 const char *qual = to_storage_qualifiers_glsl(var);
2575
2576 if (block)
2577 {
2578 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2579 // I/O variables which are struct types.
2580 // To support this, flatten the struct into separate varyings instead.
2581 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2582 (!options.es && options.version < 150))
2583 {
2584 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2585 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2586 emit_flattened_io_block(var, qual);
2587 }
2588 else
2589 {
2590 if (options.es && options.version < 320)
2591 {
2592 // Geometry and tessellation extensions imply this extension.
2593 if (!has_extension(ext: "GL_EXT_geometry_shader") && !has_extension(ext: "GL_EXT_tessellation_shader"))
2594 require_extension_internal(ext: "GL_EXT_shader_io_blocks");
2595 }
2596
2597 // Workaround to make sure we can emit "patch in/out" correctly.
2598 fixup_io_block_patch_qualifiers(var);
2599
2600 // Block names should never alias.
2601 auto block_name = to_name(id: type.self, allow_alias: false);
2602
2603 // The namespace for I/O blocks is separate from other variables in GLSL.
2604 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2605
2606 // Shaders never use the block by interface name, so we don't
2607 // have to track this other than updating name caches.
2608 if (block_name.empty() || block_namespace.find(x: block_name) != end(cont&: block_namespace))
2609 block_name = get_fallback_name(id: type.self);
2610 else
2611 block_namespace.insert(x: block_name);
2612
2613 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2614 // This cannot conflict with anything else, so we're safe now.
2615 if (block_name.empty())
2616 block_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2617
2618 // Instance names cannot alias block names.
2619 resource_names.insert(x: block_name);
2620
2621 bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch);
2622 statement(ts: layout_for_variable(var), ts: (is_patch ? "patch " : ""), ts&: qual, ts&: block_name);
2623 begin_scope();
2624
2625 type.member_name_cache.clear();
2626
2627 uint32_t i = 0;
2628 for (auto &member : type.member_types)
2629 {
2630 add_member_name(type, name: i);
2631 emit_struct_member(type, member_type_id: member, index: i);
2632 i++;
2633 }
2634
2635 add_resource_name(id: var.self);
2636 end_scope_decl(decl: join(ts: to_name(id: var.self), ts: type_to_array_glsl(type)));
2637 statement(ts: "");
2638 }
2639 }
2640 else
2641 {
2642 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2643 // I/O variables which are struct types.
2644 // To support this, flatten the struct into separate varyings instead.
2645 if (type.basetype == SPIRType::Struct &&
2646 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2647 (!options.es && options.version < 150)))
2648 {
2649 emit_flattened_io_block(var, qual);
2650 }
2651 else
2652 {
2653 add_resource_name(id: var.self);
2654
2655 // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2656 // Opt for unsized as it's the more "correct" variant to use.
2657 bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2658 !has_decoration(id: var.self, decoration: DecorationPatch) &&
2659 (get_entry_point().model == ExecutionModelTessellationControl ||
2660 get_entry_point().model == ExecutionModelTessellationEvaluation);
2661
2662 uint32_t old_array_size = 0;
2663 bool old_array_size_literal = true;
2664
2665 if (control_point_input_array)
2666 {
2667 swap(a&: type.array.back(), b&: old_array_size);
2668 swap(a&: type.array_size_literal.back(), b&: old_array_size_literal);
2669 }
2670
2671 statement(ts: layout_for_variable(var), ts: to_qualifiers_glsl(id: var.self),
2672 ts: variable_decl(type, name: to_name(id: var.self), id: var.self), ts: ";");
2673
2674 if (control_point_input_array)
2675 {
2676 swap(a&: type.array.back(), b&: old_array_size);
2677 swap(a&: type.array_size_literal.back(), b&: old_array_size_literal);
2678 }
2679 }
2680 }
2681}
2682
2683void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2684{
2685 auto &type = get<SPIRType>(id: var.basetype);
2686 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2687 {
2688 if (!options.es && options.version < 420)
2689 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
2690 else if (options.es && options.version < 310)
2691 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2692 }
2693
2694 add_resource_name(id: var.self);
2695 statement(ts: layout_for_variable(var), ts: variable_decl(variable: var), ts: ";");
2696}
2697
2698string CompilerGLSL::constant_value_macro_name(uint32_t id)
2699{
2700 return join(ts: "SPIRV_CROSS_CONSTANT_ID_", ts&: id);
2701}
2702
2703void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2704{
2705 auto &type = get<SPIRType>(id: constant.basetype);
2706 add_resource_name(id: constant.self);
2707 auto name = to_name(id: constant.self);
2708 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_op_expression(cop: constant), ts: ";");
2709}
2710
2711int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
2712{
2713 auto &entry_point = get_entry_point();
2714 int index = -1;
2715
2716 // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
2717 // since the spec constant declarations are never explicitly declared.
2718 if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(bit: ExecutionModeLocalSizeId))
2719 {
2720 if (c.self == entry_point.workgroup_size.id_x)
2721 index = 0;
2722 else if (c.self == entry_point.workgroup_size.id_y)
2723 index = 1;
2724 else if (c.self == entry_point.workgroup_size.id_z)
2725 index = 2;
2726 }
2727
2728 return index;
2729}
2730
2731void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2732{
2733 auto &type = get<SPIRType>(id: constant.constant_type);
2734
2735 SpecializationConstant wg_x, wg_y, wg_z;
2736 ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
2737
2738 // This specialization constant is implicitly declared by emitting layout() in;
2739 if (constant.self == workgroup_size_id)
2740 return;
2741
2742 // These specialization constants are implicitly declared by emitting layout() in;
2743 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2744 // later can use macro overrides for work group size.
2745 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2746 ConstantID(constant.self) == wg_z.id;
2747
2748 if (options.vulkan_semantics && is_workgroup_size_constant)
2749 {
2750 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2751 return;
2752 }
2753 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2754 !has_decoration(id: constant.self, decoration: DecorationSpecId))
2755 {
2756 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2757 return;
2758 }
2759
2760 add_resource_name(id: constant.self);
2761 auto name = to_name(id: constant.self);
2762
2763 // Only scalars have constant IDs.
2764 if (has_decoration(id: constant.self, decoration: DecorationSpecId))
2765 {
2766 if (options.vulkan_semantics)
2767 {
2768 statement(ts: "layout(constant_id = ", ts: get_decoration(id: constant.self, decoration: DecorationSpecId), ts: ") const ",
2769 ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2770 }
2771 else
2772 {
2773 const string &macro_name = constant.specialization_constant_macro_name;
2774 statement(ts: "#ifndef ", ts: macro_name);
2775 statement(ts: "#define ", ts: macro_name, ts: " ", ts: constant_expression(c: constant));
2776 statement(ts: "#endif");
2777
2778 // For workgroup size constants, only emit the macros.
2779 if (!is_workgroup_size_constant)
2780 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: macro_name, ts: ";");
2781 }
2782 }
2783 else
2784 {
2785 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2786 }
2787}
2788
2789void CompilerGLSL::emit_entry_point_declarations()
2790{
2791}
2792
2793void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2794{
2795 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
2796 if (is_hidden_variable(var))
2797 return;
2798
2799 auto *meta = ir.find_meta(id: var.self);
2800 if (!meta)
2801 return;
2802
2803 auto &m = meta->decoration;
2804 if (keywords.find(x: m.alias) != end(cont: keywords))
2805 m.alias = join(ts: "_", ts&: m.alias);
2806 });
2807
2808 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, const SPIRFunction &func) {
2809 auto *meta = ir.find_meta(id: func.self);
2810 if (!meta)
2811 return;
2812
2813 auto &m = meta->decoration;
2814 if (keywords.find(x: m.alias) != end(cont: keywords))
2815 m.alias = join(ts: "_", ts&: m.alias);
2816 });
2817
2818 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
2819 auto *meta = ir.find_meta(id: type.self);
2820 if (!meta)
2821 return;
2822
2823 auto &m = meta->decoration;
2824 if (keywords.find(x: m.alias) != end(cont: keywords))
2825 m.alias = join(ts: "_", ts&: m.alias);
2826
2827 for (auto &memb : meta->members)
2828 if (keywords.find(x: memb.alias) != end(cont: keywords))
2829 memb.alias = join(ts: "_", ts&: memb.alias);
2830 });
2831}
2832
2833void CompilerGLSL::replace_illegal_names()
2834{
2835 // clang-format off
2836 static const unordered_set<string> keywords = {
2837 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2838 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2839 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2840 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2841 "ceil", "cos", "cosh", "cross", "degrees",
2842 "dFdx", "dFdxCoarse", "dFdxFine",
2843 "dFdy", "dFdyCoarse", "dFdyFine",
2844 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2845 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2846 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2847 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2848 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2849 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2850 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2851 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2852 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2853 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2854 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2855 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2856 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2857 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2858 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2859 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2860 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2861
2862 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2863 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2864 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2865 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2866 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2867 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2868 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2869 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2870 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2871 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2872 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2873 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2874 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2875 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2876 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2877 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2878 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2879 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2880 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2881 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2882 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2883 "while", "writeonly",
2884 };
2885 // clang-format on
2886
2887 replace_illegal_names(keywords);
2888}
2889
2890void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2891{
2892 auto &m = ir.meta[var.self].decoration;
2893 uint32_t location = 0;
2894 if (m.decoration_flags.get(bit: DecorationLocation))
2895 location = m.location;
2896
2897 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2898 // do the access chain part of this for us.
2899 auto &type = get<SPIRType>(id: var.basetype);
2900
2901 if (type.array.empty())
2902 {
2903 // Redirect the write to a specific render target in legacy GLSL.
2904 m.alias = join(ts: "gl_FragData[", ts&: location, ts: "]");
2905
2906 if (is_legacy_es() && location != 0)
2907 require_extension_internal(ext: "GL_EXT_draw_buffers");
2908 }
2909 else if (type.array.size() == 1)
2910 {
2911 // If location is non-zero, we probably have to add an offset.
2912 // This gets really tricky since we'd have to inject an offset in the access chain.
2913 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2914 m.alias = "gl_FragData";
2915 if (location != 0)
2916 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2917 "This is unimplemented in SPIRV-Cross.");
2918
2919 if (is_legacy_es())
2920 require_extension_internal(ext: "GL_EXT_draw_buffers");
2921 }
2922 else
2923 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2924
2925 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2926}
2927
2928void CompilerGLSL::replace_fragment_outputs()
2929{
2930 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
2931 auto &type = this->get<SPIRType>(id: var.basetype);
2932
2933 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2934 replace_fragment_output(var);
2935 });
2936}
2937
2938string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2939{
2940 if (out_type.vecsize == input_components)
2941 return expr;
2942 else if (input_components == 1 && !backend.can_swizzle_scalar)
2943 return join(ts: type_to_glsl(type: out_type), ts: "(", ts: expr, ts: ")");
2944 else
2945 {
2946 // FIXME: This will not work with packed expressions.
2947 auto e = enclose_expression(expr) + ".";
2948 // Just clamp the swizzle index if we have more outputs than inputs.
2949 for (uint32_t c = 0; c < out_type.vecsize; c++)
2950 e += index_to_swizzle(index: min(a: c, b: input_components - 1));
2951 if (backend.swizzle_is_function && out_type.vecsize > 1)
2952 e += "()";
2953
2954 remove_duplicate_swizzle(op&: e);
2955 return e;
2956 }
2957}
2958
2959void CompilerGLSL::emit_pls()
2960{
2961 auto &execution = get_entry_point();
2962 if (execution.model != ExecutionModelFragment)
2963 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2964
2965 if (!options.es)
2966 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2967
2968 if (options.version < 300)
2969 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2970
2971 if (!pls_inputs.empty())
2972 {
2973 statement(ts: "__pixel_local_inEXT _PLSIn");
2974 begin_scope();
2975 for (auto &input : pls_inputs)
2976 statement(ts: pls_decl(variable: input), ts: ";");
2977 end_scope_decl();
2978 statement(ts: "");
2979 }
2980
2981 if (!pls_outputs.empty())
2982 {
2983 statement(ts: "__pixel_local_outEXT _PLSOut");
2984 begin_scope();
2985 for (auto &output : pls_outputs)
2986 statement(ts: pls_decl(variable: output), ts: ";");
2987 end_scope_decl();
2988 statement(ts: "");
2989 }
2990}
2991
2992void CompilerGLSL::fixup_image_load_store_access()
2993{
2994 if (!options.enable_storage_image_qualifier_deduction)
2995 return;
2996
2997 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t var, const SPIRVariable &) {
2998 auto &vartype = expression_type(id: var);
2999 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
3000 {
3001 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
3002 // Solve this by making the image access as restricted as possible and loosen up if we need to.
3003 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
3004
3005 if (!has_decoration(id: var, decoration: DecorationNonWritable) && !has_decoration(id: var, decoration: DecorationNonReadable))
3006 {
3007 set_decoration(id: var, decoration: DecorationNonWritable);
3008 set_decoration(id: var, decoration: DecorationNonReadable);
3009 }
3010 }
3011 });
3012}
3013
3014static bool is_block_builtin(BuiltIn builtin)
3015{
3016 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
3017 builtin == BuiltInCullDistance;
3018}
3019
3020bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
3021{
3022 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
3023
3024 if (storage != StorageClassOutput)
3025 return false;
3026 bool should_force = false;
3027
3028 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3029 if (should_force)
3030 return;
3031
3032 auto &type = this->get<SPIRType>(id: var.basetype);
3033 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3034 if (var.storage == storage && block && is_builtin_variable(var))
3035 {
3036 uint32_t member_count = uint32_t(type.member_types.size());
3037 for (uint32_t i = 0; i < member_count; i++)
3038 {
3039 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) &&
3040 is_block_builtin(builtin: BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))) &&
3041 has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
3042 {
3043 should_force = true;
3044 }
3045 }
3046 }
3047 else if (var.storage == storage && !block && is_builtin_variable(var))
3048 {
3049 if (is_block_builtin(builtin: BuiltIn(get_decoration(id: type.self, decoration: DecorationBuiltIn))) &&
3050 has_decoration(id: var.self, decoration: DecorationOffset))
3051 {
3052 should_force = true;
3053 }
3054 }
3055 });
3056
3057 // If we're declaring clip/cull planes with control points we need to force block declaration.
3058 if (get_execution_model() == ExecutionModelTessellationControl &&
3059 (clip_distance_count || cull_distance_count))
3060 {
3061 should_force = true;
3062 }
3063
3064 return should_force;
3065}
3066
3067void CompilerGLSL::fixup_implicit_builtin_block_names()
3068{
3069 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3070 auto &type = this->get<SPIRType>(id: var.basetype);
3071 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3072 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
3073 is_builtin_variable(var))
3074 {
3075 // Make sure the array has a supported name in the code.
3076 if (var.storage == StorageClassOutput)
3077 set_name(id: var.self, name: "gl_out");
3078 else if (var.storage == StorageClassInput)
3079 set_name(id: var.self, name: "gl_in");
3080 }
3081 });
3082}
3083
3084void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
3085{
3086 Bitset emitted_builtins;
3087 Bitset global_builtins;
3088 const SPIRVariable *block_var = nullptr;
3089 bool emitted_block = false;
3090 bool builtin_array = false;
3091
3092 // Need to use declared size in the type.
3093 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3094 uint32_t cull_distance_size = 0;
3095 uint32_t clip_distance_size = 0;
3096
3097 bool have_xfb_buffer_stride = false;
3098 bool have_geom_stream = false;
3099 bool have_any_xfb_offset = false;
3100 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3101 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3102
3103 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3104 auto &type = this->get<SPIRType>(id: var.basetype);
3105 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3106 Bitset builtins;
3107
3108 if (var.storage == storage && block && is_builtin_variable(var))
3109 {
3110 uint32_t index = 0;
3111 for (auto &m : ir.meta[type.self].members)
3112 {
3113 if (m.builtin)
3114 {
3115 builtins.set(m.builtin_type);
3116 if (m.builtin_type == BuiltInCullDistance)
3117 cull_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3118 else if (m.builtin_type == BuiltInClipDistance)
3119 clip_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3120
3121 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationOffset))
3122 {
3123 have_any_xfb_offset = true;
3124 builtin_xfb_offsets[m.builtin_type] = m.offset;
3125 }
3126
3127 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3128 {
3129 uint32_t stream = m.stream;
3130 if (have_geom_stream && geom_stream != stream)
3131 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3132 have_geom_stream = true;
3133 geom_stream = stream;
3134 }
3135 }
3136 index++;
3137 }
3138
3139 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationXfbBuffer) &&
3140 has_decoration(id: var.self, decoration: DecorationXfbStride))
3141 {
3142 uint32_t buffer_index = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
3143 uint32_t stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
3144 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3145 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3146 if (have_xfb_buffer_stride && stride != xfb_stride)
3147 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3148 have_xfb_buffer_stride = true;
3149 xfb_buffer = buffer_index;
3150 xfb_stride = stride;
3151 }
3152
3153 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationStream))
3154 {
3155 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3156 if (have_geom_stream && geom_stream != stream)
3157 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3158 have_geom_stream = true;
3159 geom_stream = stream;
3160 }
3161 }
3162 else if (var.storage == storage && !block && is_builtin_variable(var))
3163 {
3164 // While we're at it, collect all declared global builtins (HLSL mostly ...).
3165 auto &m = ir.meta[var.self].decoration;
3166 if (m.builtin)
3167 {
3168 global_builtins.set(m.builtin_type);
3169 if (m.builtin_type == BuiltInCullDistance)
3170 cull_distance_size = to_array_size_literal(type);
3171 else if (m.builtin_type == BuiltInClipDistance)
3172 clip_distance_size = to_array_size_literal(type);
3173
3174 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationXfbStride) &&
3175 m.decoration_flags.get(bit: DecorationXfbBuffer) && m.decoration_flags.get(bit: DecorationOffset))
3176 {
3177 have_any_xfb_offset = true;
3178 builtin_xfb_offsets[m.builtin_type] = m.offset;
3179 uint32_t buffer_index = m.xfb_buffer;
3180 uint32_t stride = m.xfb_stride;
3181 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3182 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3183 if (have_xfb_buffer_stride && stride != xfb_stride)
3184 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3185 have_xfb_buffer_stride = true;
3186 xfb_buffer = buffer_index;
3187 xfb_stride = stride;
3188 }
3189
3190 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3191 {
3192 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3193 if (have_geom_stream && geom_stream != stream)
3194 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3195 have_geom_stream = true;
3196 geom_stream = stream;
3197 }
3198 }
3199 }
3200
3201 if (builtins.empty())
3202 return;
3203
3204 if (emitted_block)
3205 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3206
3207 emitted_builtins = builtins;
3208 emitted_block = true;
3209 builtin_array = !type.array.empty();
3210 block_var = &var;
3211 });
3212
3213 global_builtins =
3214 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3215 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3216
3217 // Try to collect all other declared builtins.
3218 if (!emitted_block)
3219 emitted_builtins = global_builtins;
3220
3221 // Can't declare an empty interface block.
3222 if (emitted_builtins.empty())
3223 return;
3224
3225 if (storage == StorageClassOutput)
3226 {
3227 SmallVector<string> attr;
3228 if (have_xfb_buffer_stride && have_any_xfb_offset)
3229 {
3230 if (!options.es)
3231 {
3232 if (options.version < 440 && options.version >= 140)
3233 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3234 else if (options.version < 140)
3235 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3236 if (!options.es && options.version < 440)
3237 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3238 }
3239 else if (options.es)
3240 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3241 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer, ts: ", xfb_stride = ", ts&: xfb_stride));
3242 }
3243
3244 if (have_geom_stream)
3245 {
3246 if (get_execution_model() != ExecutionModelGeometry)
3247 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3248 if (options.es)
3249 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3250 if (options.version < 400)
3251 require_extension_internal(ext: "GL_ARB_transform_feedback3");
3252 attr.push_back(t: join(ts: "stream = ", ts&: geom_stream));
3253 }
3254
3255 if (!attr.empty())
3256 statement(ts: "layout(", ts: merge(list: attr), ts: ") out gl_PerVertex");
3257 else
3258 statement(ts: "out gl_PerVertex");
3259 }
3260 else
3261 {
3262 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3263 if (get_entry_point().geometry_passthrough)
3264 statement(ts: "layout(passthrough) in gl_PerVertex");
3265 else
3266 statement(ts: "in gl_PerVertex");
3267 }
3268
3269 begin_scope();
3270 if (emitted_builtins.get(bit: BuiltInPosition))
3271 {
3272 auto itr = builtin_xfb_offsets.find(x: BuiltInPosition);
3273 if (itr != end(cont&: builtin_xfb_offsets))
3274 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") vec4 gl_Position;");
3275 else
3276 statement(ts: "vec4 gl_Position;");
3277 }
3278
3279 if (emitted_builtins.get(bit: BuiltInPointSize))
3280 {
3281 auto itr = builtin_xfb_offsets.find(x: BuiltInPointSize);
3282 if (itr != end(cont&: builtin_xfb_offsets))
3283 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_PointSize;");
3284 else
3285 statement(ts: "float gl_PointSize;");
3286 }
3287
3288 if (emitted_builtins.get(bit: BuiltInClipDistance))
3289 {
3290 auto itr = builtin_xfb_offsets.find(x: BuiltInClipDistance);
3291 if (itr != end(cont&: builtin_xfb_offsets))
3292 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3293 else
3294 statement(ts: "float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3295 }
3296
3297 if (emitted_builtins.get(bit: BuiltInCullDistance))
3298 {
3299 auto itr = builtin_xfb_offsets.find(x: BuiltInCullDistance);
3300 if (itr != end(cont&: builtin_xfb_offsets))
3301 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3302 else
3303 statement(ts: "float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3304 }
3305
3306 if (builtin_array)
3307 {
3308 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3309 end_scope_decl(decl: join(ts: to_name(id: block_var->self), ts: "[", ts&: get_entry_point().output_vertices, ts: "]"));
3310 else
3311 end_scope_decl(decl: join(ts: to_name(id: block_var->self), ts: "[]"));
3312 }
3313 else
3314 end_scope_decl();
3315 statement(ts: "");
3316}
3317
3318void CompilerGLSL::declare_undefined_values()
3319{
3320 bool emitted = false;
3321 ir.for_each_typed_id<SPIRUndef>(op: [&](uint32_t, const SPIRUndef &undef) {
3322 auto &type = this->get<SPIRType>(id: undef.basetype);
3323 // OpUndef can be void for some reason ...
3324 if (type.basetype == SPIRType::Void)
3325 return;
3326
3327 string initializer;
3328 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3329 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: undef.basetype));
3330
3331 statement(ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";");
3332 emitted = true;
3333 });
3334
3335 if (emitted)
3336 statement(ts: "");
3337}
3338
3339bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3340{
3341 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3342
3343 if (statically_assigned)
3344 {
3345 auto *constant = maybe_get<SPIRConstant>(id: var.static_expression);
3346 if (constant && constant->is_used_as_lut)
3347 return true;
3348 }
3349
3350 return false;
3351}
3352
3353void CompilerGLSL::emit_resources()
3354{
3355 auto &execution = get_entry_point();
3356
3357 replace_illegal_names();
3358
3359 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3360 // with builtins.
3361 if (execution.model == ExecutionModelFragment && is_legacy())
3362 replace_fragment_outputs();
3363
3364 // Emit PLS blocks if we have such variables.
3365 if (!pls_inputs.empty() || !pls_outputs.empty())
3366 emit_pls();
3367
3368 switch (execution.model)
3369 {
3370 case ExecutionModelGeometry:
3371 case ExecutionModelTessellationControl:
3372 case ExecutionModelTessellationEvaluation:
3373 fixup_implicit_builtin_block_names();
3374 break;
3375
3376 default:
3377 break;
3378 }
3379
3380 // Emit custom gl_PerVertex for SSO compatibility.
3381 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3382 {
3383 switch (execution.model)
3384 {
3385 case ExecutionModelGeometry:
3386 case ExecutionModelTessellationControl:
3387 case ExecutionModelTessellationEvaluation:
3388 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3389 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3390 break;
3391
3392 case ExecutionModelVertex:
3393 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3394 break;
3395
3396 default:
3397 break;
3398 }
3399 }
3400 else if (should_force_emit_builtin_block(storage: StorageClassOutput))
3401 {
3402 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3403 }
3404 else if (execution.geometry_passthrough)
3405 {
3406 // Need to declare gl_in with Passthrough.
3407 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3408 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3409 }
3410 else
3411 {
3412 // Need to redeclare clip/cull distance with explicit size to use them.
3413 // SPIR-V mandates these builtins have a size declared.
3414 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3415 if (clip_distance_count != 0)
3416 statement(ts&: storage, ts: " float gl_ClipDistance[", ts&: clip_distance_count, ts: "];");
3417 if (cull_distance_count != 0)
3418 statement(ts&: storage, ts: " float gl_CullDistance[", ts&: cull_distance_count, ts: "];");
3419 if (clip_distance_count != 0 || cull_distance_count != 0)
3420 statement(ts: "");
3421 }
3422
3423 if (position_invariant)
3424 {
3425 statement(ts: "invariant gl_Position;");
3426 statement(ts: "");
3427 }
3428
3429 bool emitted = false;
3430
3431 // If emitted Vulkan GLSL,
3432 // emit specialization constants as actual floats,
3433 // spec op expressions will redirect to the constant name.
3434 //
3435 {
3436 auto loop_lock = ir.create_loop_hard_lock();
3437 for (auto &id_ : ir.ids_for_constant_or_type)
3438 {
3439 auto &id = ir.ids[id_];
3440
3441 if (id.get_type() == TypeConstant)
3442 {
3443 auto &c = id.get<SPIRConstant>();
3444
3445 bool needs_declaration = c.specialization || c.is_used_as_lut;
3446
3447 if (needs_declaration)
3448 {
3449 if (!options.vulkan_semantics && c.specialization)
3450 {
3451 c.specialization_constant_macro_name =
3452 constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId));
3453 }
3454 emit_constant(constant: c);
3455 emitted = true;
3456 }
3457 }
3458 else if (id.get_type() == TypeConstantOp)
3459 {
3460 emit_specialization_constant_op(constant: id.get<SPIRConstantOp>());
3461 emitted = true;
3462 }
3463 else if (id.get_type() == TypeType)
3464 {
3465 auto *type = &id.get<SPIRType>();
3466
3467 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3468 (!has_decoration(id: type->self, decoration: DecorationBlock) &&
3469 !has_decoration(id: type->self, decoration: DecorationBufferBlock));
3470
3471 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3472 if (type->basetype == SPIRType::Struct && type->pointer &&
3473 has_decoration(id: type->self, decoration: DecorationBlock) &&
3474 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3475 type->storage == StorageClassHitAttributeKHR))
3476 {
3477 type = &get<SPIRType>(id: type->parent_type);
3478 is_natural_struct = true;
3479 }
3480
3481 if (is_natural_struct)
3482 {
3483 if (emitted)
3484 statement(ts: "");
3485 emitted = false;
3486
3487 emit_struct(type&: *type);
3488 }
3489 }
3490 }
3491 }
3492
3493 if (emitted)
3494 statement(ts: "");
3495
3496 // If we needed to declare work group size late, check here.
3497 // If the work group size depends on a specialization constant, we need to declare the layout() block
3498 // after constants (and their macros) have been declared.
3499 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3500 (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId)))
3501 {
3502 SpecializationConstant wg_x, wg_y, wg_z;
3503 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
3504
3505 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3506 {
3507 SmallVector<string> inputs;
3508 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
3509 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
3510 statement(ts: "");
3511 }
3512 }
3513
3514 emitted = false;
3515
3516 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3517 {
3518 for (auto type : physical_storage_non_block_pointer_types)
3519 {
3520 emit_buffer_reference_block(type_id: type, forward_declaration: false);
3521 }
3522
3523 // Output buffer reference blocks.
3524 // Do this in two stages, one with forward declaration,
3525 // and one without. Buffer reference blocks can reference themselves
3526 // to support things like linked lists.
3527 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) {
3528 if (type.basetype == SPIRType::Struct && type.pointer &&
3529 type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3530 type.storage == StorageClassPhysicalStorageBufferEXT)
3531 {
3532 emit_buffer_reference_block(type_id: self, forward_declaration: true);
3533 }
3534 });
3535
3536 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) {
3537 if (type.basetype == SPIRType::Struct &&
3538 type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3539 type.storage == StorageClassPhysicalStorageBufferEXT)
3540 {
3541 emit_buffer_reference_block(type_id: self, forward_declaration: false);
3542 }
3543 });
3544 }
3545
3546 // Output UBOs and SSBOs
3547 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3548 auto &type = this->get<SPIRType>(id: var.basetype);
3549
3550 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3551 type.storage == StorageClassShaderRecordBufferKHR;
3552 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
3553 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
3554
3555 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3556 has_block_flags)
3557 {
3558 emit_buffer_block(var);
3559 }
3560 });
3561
3562 // Output push constant blocks
3563 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3564 auto &type = this->get<SPIRType>(id: var.basetype);
3565 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3566 !is_hidden_variable(var))
3567 {
3568 emit_push_constant_block(var);
3569 }
3570 });
3571
3572 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3573
3574 // Output Uniform Constants (values, samplers, images, etc).
3575 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3576 auto &type = this->get<SPIRType>(id: var.basetype);
3577
3578 // If we're remapping separate samplers and images, only emit the combined samplers.
3579 if (skip_separate_image_sampler)
3580 {
3581 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3582 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3583 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3584 bool separate_sampler = type.basetype == SPIRType::Sampler;
3585 if (!sampler_buffer && (separate_image || separate_sampler))
3586 return;
3587 }
3588
3589 if (var.storage != StorageClassFunction && type.pointer &&
3590 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3591 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3592 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3593 type.storage == StorageClassHitAttributeKHR) &&
3594 !is_hidden_variable(var))
3595 {
3596 emit_uniform(var);
3597 emitted = true;
3598 }
3599 });
3600
3601 if (emitted)
3602 statement(ts: "");
3603 emitted = false;
3604
3605 bool emitted_base_instance = false;
3606
3607 // Output in/out interfaces.
3608 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3609 auto &type = this->get<SPIRType>(id: var.basetype);
3610
3611 bool is_hidden = is_hidden_variable(var);
3612
3613 // Unused output I/O variables might still be required to implement framebuffer fetch.
3614 if (var.storage == StorageClassOutput && !is_legacy() &&
3615 location_is_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)) != 0)
3616 {
3617 is_hidden = false;
3618 }
3619
3620 if (var.storage != StorageClassFunction && type.pointer &&
3621 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3622 interface_variable_exists_in_entry_point(id: var.self) && !is_hidden)
3623 {
3624 if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
3625 type.array.size() == 1)
3626 {
3627 SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
3628 }
3629 emit_interface_block(var);
3630 emitted = true;
3631 }
3632 else if (is_builtin_variable(var))
3633 {
3634 auto builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn));
3635 // For gl_InstanceIndex emulation on GLES, the API user needs to
3636 // supply this uniform.
3637
3638 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3639 if (!options.vulkan_semantics)
3640 {
3641 if (!emitted_base_instance &&
3642 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3643 (builtin == BuiltInBaseInstance)))
3644 {
3645 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3646 statement(ts: "#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3647 statement(ts: "#else");
3648 // A crude, but simple workaround which should be good enough for non-indirect draws.
3649 statement(ts: "uniform int SPIRV_Cross_BaseInstance;");
3650 statement(ts: "#endif");
3651 emitted = true;
3652 emitted_base_instance = true;
3653 }
3654 else if (builtin == BuiltInBaseVertex)
3655 {
3656 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3657 statement(ts: "#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3658 statement(ts: "#else");
3659 // A crude, but simple workaround which should be good enough for non-indirect draws.
3660 statement(ts: "uniform int SPIRV_Cross_BaseVertex;");
3661 statement(ts: "#endif");
3662 }
3663 else if (builtin == BuiltInDrawIndex)
3664 {
3665 statement(ts: "#ifndef GL_ARB_shader_draw_parameters");
3666 // Cannot really be worked around.
3667 statement(ts: "#error GL_ARB_shader_draw_parameters is not supported.");
3668 statement(ts: "#endif");
3669 }
3670 }
3671 }
3672 });
3673
3674 // Global variables.
3675 for (auto global : global_variables)
3676 {
3677 auto &var = get<SPIRVariable>(id: global);
3678 if (is_hidden_variable(var, include_builtins: true))
3679 continue;
3680
3681 if (var.storage != StorageClassOutput)
3682 {
3683 if (!variable_is_lut(var))
3684 {
3685 add_resource_name(id: var.self);
3686
3687 string initializer;
3688 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3689 !var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var)))
3690 {
3691 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var)));
3692 }
3693
3694 statement(ts: variable_decl(variable: var), ts&: initializer, ts: ";");
3695 emitted = true;
3696 }
3697 }
3698 else if (var.initializer && maybe_get<SPIRConstant>(id: var.initializer) != nullptr)
3699 {
3700 emit_output_variable_initializer(var);
3701 }
3702 }
3703
3704 if (emitted)
3705 statement(ts: "");
3706
3707 declare_undefined_values();
3708}
3709
3710void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3711{
3712 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3713 auto &entry_func = this->get<SPIRFunction>(id: ir.default_entry_point);
3714 auto &type = get<SPIRType>(id: var.basetype);
3715 bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch);
3716 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
3717 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3718
3719 if (is_block)
3720 {
3721 uint32_t member_count = uint32_t(type.member_types.size());
3722 bool type_is_array = type.array.size() == 1;
3723 uint32_t array_size = 1;
3724 if (type_is_array)
3725 array_size = to_array_size_literal(type);
3726 uint32_t iteration_count = is_control_point ? 1 : array_size;
3727
3728 // If the initializer is a block, we must initialize each block member one at a time.
3729 for (uint32_t i = 0; i < member_count; i++)
3730 {
3731 // These outputs might not have been properly declared, so don't initialize them in that case.
3732 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
3733 {
3734 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInCullDistance &&
3735 !cull_distance_count)
3736 continue;
3737
3738 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInClipDistance &&
3739 !clip_distance_count)
3740 continue;
3741 }
3742
3743 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3744 // This code path hits when we have an array of blocks.
3745 string lut_name;
3746 if (type_is_array)
3747 {
3748 lut_name = join(ts: "_", ts: var.self, ts: "_", ts&: i, ts: "_init");
3749 uint32_t member_type_id = get<SPIRType>(id: var.basetype).member_types[i];
3750 auto &member_type = get<SPIRType>(id: member_type_id);
3751 auto array_type = member_type;
3752 array_type.parent_type = member_type_id;
3753 array_type.array.push_back(t: array_size);
3754 array_type.array_size_literal.push_back(t: true);
3755
3756 SmallVector<string> exprs;
3757 exprs.reserve(count: array_size);
3758 auto &c = get<SPIRConstant>(id: var.initializer);
3759 for (uint32_t j = 0; j < array_size; j++)
3760 exprs.push_back(t: to_expression(id: get<SPIRConstant>(id: c.subconstants[j]).subconstants[i]));
3761 statement(ts: "const ", ts: type_to_glsl(type: array_type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type: array_type), ts: " = ",
3762 ts: type_to_glsl_constructor(type: array_type), ts: "(", ts: merge(list: exprs, between: ", "), ts: ");");
3763 }
3764
3765 for (uint32_t j = 0; j < iteration_count; j++)
3766 {
3767 entry_func.fixup_hooks_in.push_back(t: [=, &var]() {
3768 AccessChainMeta meta;
3769 auto &c = this->get<SPIRConstant>(id: var.initializer);
3770
3771 uint32_t invocation_id = 0;
3772 uint32_t member_index_id = 0;
3773 if (is_control_point)
3774 {
3775 uint32_t ids = ir.increase_bound_by(count: 3);
3776 SPIRType uint_type;
3777 uint_type.basetype = SPIRType::UInt;
3778 uint_type.width = 32;
3779 set<SPIRType>(id: ids, args&: uint_type);
3780 set<SPIRExpression>(id: ids + 1, args: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), args&: ids, args: true);
3781 set<SPIRConstant>(id: ids + 2, args&: ids, args: i, args: false);
3782 invocation_id = ids + 1;
3783 member_index_id = ids + 2;
3784 }
3785
3786 if (is_patch)
3787 {
3788 statement(ts: "if (gl_InvocationID == 0)");
3789 begin_scope();
3790 }
3791
3792 if (type_is_array && !is_control_point)
3793 {
3794 uint32_t indices[2] = { j, i };
3795 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
3796 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: j, ts: "];");
3797 }
3798 else if (is_control_point)
3799 {
3800 uint32_t indices[2] = { invocation_id, member_index_id };
3801 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: 0, meta: &meta);
3802 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), ts: "];");
3803 }
3804 else
3805 {
3806 auto chain =
3807 access_chain_internal(base: var.self, indices: &i, count: 1, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
3808 statement(ts&: chain, ts: " = ", ts: to_expression(id: c.subconstants[i]), ts: ";");
3809 }
3810
3811 if (is_patch)
3812 end_scope();
3813 });
3814 }
3815 }
3816 }
3817 else if (is_control_point)
3818 {
3819 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
3820 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type),
3821 ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
3822 entry_func.fixup_hooks_in.push_back(t: [&, lut_name]() {
3823 statement(ts: to_expression(id: var.self), ts: "[gl_InvocationID] = ", ts: lut_name, ts: "[gl_InvocationID];");
3824 });
3825 }
3826 else if (has_decoration(id: var.self, decoration: DecorationBuiltIn) &&
3827 BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)) == BuiltInSampleMask)
3828 {
3829 // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
3830 entry_func.fixup_hooks_in.push_back(t: [&] {
3831 auto &c = this->get<SPIRConstant>(id: var.initializer);
3832 uint32_t num_constants = uint32_t(c.subconstants.size());
3833 for (uint32_t i = 0; i < num_constants; i++)
3834 {
3835 // Don't use to_expression on constant since it might be uint, just fish out the raw int.
3836 statement(ts: to_expression(id: var.self), ts: "[", ts&: i, ts: "] = ",
3837 ts: convert_to_string(value: this->get<SPIRConstant>(id: c.subconstants[i]).scalar_i32()), ts: ";");
3838 }
3839 });
3840 }
3841 else
3842 {
3843 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
3844 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name,
3845 ts: type_to_array_glsl(type), ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
3846 entry_func.fixup_hooks_in.push_back(t: [&, lut_name, is_patch]() {
3847 if (is_patch)
3848 {
3849 statement(ts: "if (gl_InvocationID == 0)");
3850 begin_scope();
3851 }
3852 statement(ts: to_expression(id: var.self), ts: " = ", ts: lut_name, ts: ";");
3853 if (is_patch)
3854 end_scope();
3855 });
3856 }
3857}
3858
3859void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3860{
3861 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
3862 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
3863
3864 if (!options.vulkan_semantics)
3865 {
3866 using Supp = ShaderSubgroupSupportHelper;
3867 auto result = shader_subgroup_supporter.resolve();
3868
3869 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMask))
3870 {
3871 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupMask, r: result);
3872
3873 for (auto &e : exts)
3874 {
3875 const char *name = Supp::get_extension_name(c: e);
3876 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
3877
3878 switch (e)
3879 {
3880 case Supp::NV_shader_thread_group:
3881 statement(ts: "#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3882 statement(ts: "#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3883 statement(ts: "#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3884 statement(ts: "#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3885 statement(ts: "#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3886 break;
3887 case Supp::ARB_shader_ballot:
3888 statement(ts: "#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3889 statement(ts: "#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3890 statement(ts: "#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3891 statement(ts: "#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3892 statement(ts: "#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3893 break;
3894 default:
3895 break;
3896 }
3897 }
3898 statement(ts: "#endif");
3899 statement(ts: "");
3900 }
3901
3902 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupSize))
3903 {
3904 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupSize, r: result);
3905
3906 for (auto &e : exts)
3907 {
3908 const char *name = Supp::get_extension_name(c: e);
3909 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
3910
3911 switch (e)
3912 {
3913 case Supp::NV_shader_thread_group:
3914 statement(ts: "#define gl_SubgroupSize gl_WarpSizeNV");
3915 break;
3916 case Supp::ARB_shader_ballot:
3917 statement(ts: "#define gl_SubgroupSize gl_SubGroupSizeARB");
3918 break;
3919 case Supp::AMD_gcn_shader:
3920 statement(ts: "#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3921 break;
3922 default:
3923 break;
3924 }
3925 }
3926 statement(ts: "#endif");
3927 statement(ts: "");
3928 }
3929
3930 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInvocationID))
3931 {
3932 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupInvocationID, r: result);
3933
3934 for (auto &e : exts)
3935 {
3936 const char *name = Supp::get_extension_name(c: e);
3937 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
3938
3939 switch (e)
3940 {
3941 case Supp::NV_shader_thread_group:
3942 statement(ts: "#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3943 break;
3944 case Supp::ARB_shader_ballot:
3945 statement(ts: "#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3946 break;
3947 default:
3948 break;
3949 }
3950 }
3951 statement(ts: "#endif");
3952 statement(ts: "");
3953 }
3954
3955 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupID))
3956 {
3957 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupID, r: result);
3958
3959 for (auto &e : exts)
3960 {
3961 const char *name = Supp::get_extension_name(c: e);
3962 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
3963
3964 switch (e)
3965 {
3966 case Supp::NV_shader_thread_group:
3967 statement(ts: "#define gl_SubgroupID gl_WarpIDNV");
3968 break;
3969 default:
3970 break;
3971 }
3972 }
3973 statement(ts: "#endif");
3974 statement(ts: "");
3975 }
3976
3977 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::NumSubgroups))
3978 {
3979 auto exts = Supp::get_candidates_for_feature(ft: Supp::NumSubgroups, r: result);
3980
3981 for (auto &e : exts)
3982 {
3983 const char *name = Supp::get_extension_name(c: e);
3984 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
3985
3986 switch (e)
3987 {
3988 case Supp::NV_shader_thread_group:
3989 statement(ts: "#define gl_NumSubgroups gl_WarpsPerSMNV");
3990 break;
3991 default:
3992 break;
3993 }
3994 }
3995 statement(ts: "#endif");
3996 statement(ts: "");
3997 }
3998
3999 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBroadcast_First))
4000 {
4001 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBroadcast_First, r: result);
4002
4003 for (auto &e : exts)
4004 {
4005 const char *name = Supp::get_extension_name(c: e);
4006 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4007
4008 switch (e)
4009 {
4010 case Supp::NV_shader_thread_shuffle:
4011 for (const char *t : workaround_types)
4012 {
4013 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4014 ts: " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
4015 }
4016 for (const char *t : workaround_types)
4017 {
4018 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4019 ts: " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
4020 }
4021 break;
4022 case Supp::ARB_shader_ballot:
4023 for (const char *t : workaround_types)
4024 {
4025 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4026 ts: " value) { return readFirstInvocationARB(value); }");
4027 }
4028 for (const char *t : workaround_types)
4029 {
4030 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4031 ts: " value, uint id) { return readInvocationARB(value, id); }");
4032 }
4033 break;
4034 default:
4035 break;
4036 }
4037 }
4038 statement(ts: "#endif");
4039 statement(ts: "");
4040 }
4041
4042 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotFindLSB_MSB))
4043 {
4044 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallotFindLSB_MSB, r: result);
4045
4046 for (auto &e : exts)
4047 {
4048 const char *name = Supp::get_extension_name(c: e);
4049 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4050
4051 switch (e)
4052 {
4053 case Supp::NV_shader_thread_group:
4054 statement(ts: "uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
4055 statement(ts: "uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
4056 break;
4057 default:
4058 break;
4059 }
4060 }
4061 statement(ts: "#else");
4062 statement(ts: "uint subgroupBallotFindLSB(uvec4 value)");
4063 begin_scope();
4064 statement(ts: "int firstLive = findLSB(value.x);");
4065 statement(ts: "return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
4066 end_scope();
4067 statement(ts: "uint subgroupBallotFindMSB(uvec4 value)");
4068 begin_scope();
4069 statement(ts: "int firstLive = findMSB(value.y);");
4070 statement(ts: "return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
4071 end_scope();
4072 statement(ts: "#endif");
4073 statement(ts: "");
4074 }
4075
4076 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAll_Any_AllEqualBool))
4077 {
4078 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupAll_Any_AllEqualBool, r: result);
4079
4080 for (auto &e : exts)
4081 {
4082 const char *name = Supp::get_extension_name(c: e);
4083 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4084
4085 switch (e)
4086 {
4087 case Supp::NV_gpu_shader_5:
4088 statement(ts: "bool subgroupAll(bool value) { return allThreadsNV(value); }");
4089 statement(ts: "bool subgroupAny(bool value) { return anyThreadNV(value); }");
4090 statement(ts: "bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
4091 break;
4092 case Supp::ARB_shader_group_vote:
4093 statement(ts: "bool subgroupAll(bool v) { return allInvocationsARB(v); }");
4094 statement(ts: "bool subgroupAny(bool v) { return anyInvocationARB(v); }");
4095 statement(ts: "bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
4096 break;
4097 case Supp::AMD_gcn_shader:
4098 statement(ts: "bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4099 statement(ts: "bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4100 statement(ts: "bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4101 "b == ballotAMD(true); }");
4102 break;
4103 default:
4104 break;
4105 }
4106 }
4107 statement(ts: "#endif");
4108 statement(ts: "");
4109 }
4110
4111 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAllEqualT))
4112 {
4113 statement(ts: "#ifndef GL_KHR_shader_subgroup_vote");
4114 statement(
4115 ts: "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4116 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4117 for (const char *t : workaround_types)
4118 statement(ts: "_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", ts&: t, ts: ")");
4119 statement(ts: "#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4120 statement(ts: "#endif");
4121 statement(ts: "");
4122 }
4123
4124 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallot))
4125 {
4126 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallot, r: result);
4127
4128 for (auto &e : exts)
4129 {
4130 const char *name = Supp::get_extension_name(c: e);
4131 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4132
4133 switch (e)
4134 {
4135 case Supp::NV_shader_thread_group:
4136 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4137 break;
4138 case Supp::ARB_shader_ballot:
4139 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4140 break;
4141 default:
4142 break;
4143 }
4144 }
4145 statement(ts: "#endif");
4146 statement(ts: "");
4147 }
4148
4149 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupElect))
4150 {
4151 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4152 statement(ts: "bool subgroupElect()");
4153 begin_scope();
4154 statement(ts: "uvec4 activeMask = subgroupBallot(true);");
4155 statement(ts: "uint firstLive = subgroupBallotFindLSB(activeMask);");
4156 statement(ts: "return gl_SubgroupInvocationID == firstLive;");
4157 end_scope();
4158 statement(ts: "#endif");
4159 statement(ts: "");
4160 }
4161
4162 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBarrier))
4163 {
4164 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4165 // that subgroup execute in lockstep so this barrier is implicit.
4166 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4167 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
4168 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4169 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4170 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4171 statement(ts: "void subgroupBarrier() { memoryBarrierShared(); }");
4172 statement(ts: "#endif");
4173 statement(ts: "");
4174 }
4175
4176 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMemBarrier))
4177 {
4178 if (model == spv::ExecutionModelGLCompute)
4179 {
4180 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4181 statement(ts: "void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4182 statement(ts: "void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4183 statement(ts: "void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4184 statement(ts: "void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4185 statement(ts: "#endif");
4186 }
4187 else
4188 {
4189 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4190 statement(ts: "void subgroupMemoryBarrier() { memoryBarrier(); }");
4191 statement(ts: "void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4192 statement(ts: "void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4193 statement(ts: "#endif");
4194 }
4195 statement(ts: "");
4196 }
4197
4198 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4199 {
4200 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4201 statement(ts: "bool subgroupInverseBallot(uvec4 value)");
4202 begin_scope();
4203 statement(ts: "return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4204 end_scope();
4205
4206 statement(ts: "uint subgroupBallotInclusiveBitCount(uvec4 value)");
4207 begin_scope();
4208 statement(ts: "uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4209 statement(ts: "ivec2 c = bitCount(v);");
4210 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4211 statement(ts: "return uint(c.x);");
4212 statement_no_indent(ts: "#else");
4213 statement(ts: "return uint(c.x + c.y);");
4214 statement_no_indent(ts: "#endif");
4215 end_scope();
4216
4217 statement(ts: "uint subgroupBallotExclusiveBitCount(uvec4 value)");
4218 begin_scope();
4219 statement(ts: "uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4220 statement(ts: "ivec2 c = bitCount(v);");
4221 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4222 statement(ts: "return uint(c.x);");
4223 statement_no_indent(ts: "#else");
4224 statement(ts: "return uint(c.x + c.y);");
4225 statement_no_indent(ts: "#endif");
4226 end_scope();
4227 statement(ts: "#endif");
4228 statement(ts: "");
4229 }
4230
4231 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitCount))
4232 {
4233 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4234 statement(ts: "uint subgroupBallotBitCount(uvec4 value)");
4235 begin_scope();
4236 statement(ts: "ivec2 c = bitCount(value.xy);");
4237 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4238 statement(ts: "return uint(c.x);");
4239 statement_no_indent(ts: "#else");
4240 statement(ts: "return uint(c.x + c.y);");
4241 statement_no_indent(ts: "#endif");
4242 end_scope();
4243 statement(ts: "#endif");
4244 statement(ts: "");
4245 }
4246
4247 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitExtract))
4248 {
4249 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4250 statement(ts: "bool subgroupBallotBitExtract(uvec4 value, uint index)");
4251 begin_scope();
4252 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4253 statement(ts: "uint shifted = value.x >> index;");
4254 statement_no_indent(ts: "#else");
4255 statement(ts: "uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4256 statement_no_indent(ts: "#endif");
4257 statement(ts: "return (shifted & 1u) != 0u;");
4258 end_scope();
4259 statement(ts: "#endif");
4260 statement(ts: "");
4261 }
4262 }
4263
4264 if (!workaround_ubo_load_overload_types.empty())
4265 {
4266 for (auto &type_id : workaround_ubo_load_overload_types)
4267 {
4268 auto &type = get<SPIRType>(id: type_id);
4269 statement(ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(", ts: type_to_glsl(type),
4270 ts: " wrap) { return wrap; }");
4271 }
4272 statement(ts: "");
4273 }
4274
4275 if (requires_transpose_2x2)
4276 {
4277 statement(ts: "mat2 spvTranspose(mat2 m)");
4278 begin_scope();
4279 statement(ts: "return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4280 end_scope();
4281 statement(ts: "");
4282 }
4283
4284 if (requires_transpose_3x3)
4285 {
4286 statement(ts: "mat3 spvTranspose(mat3 m)");
4287 begin_scope();
4288 statement(ts: "return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4289 end_scope();
4290 statement(ts: "");
4291 }
4292
4293 if (requires_transpose_4x4)
4294 {
4295 statement(ts: "mat4 spvTranspose(mat4 m)");
4296 begin_scope();
4297 statement(ts: "return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4298 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4299 end_scope();
4300 statement(ts: "");
4301 }
4302}
4303
4304// Returns a string representation of the ID, usable as a function arg.
4305// Default is to simply return the expression representation fo the arg ID.
4306// Subclasses may override to modify the return value.
4307string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4308{
4309 // Make sure that we use the name of the original variable, and not the parameter alias.
4310 uint32_t name_id = id;
4311 auto *var = maybe_get<SPIRVariable>(id);
4312 if (var && var->basevariable)
4313 name_id = var->basevariable;
4314 return to_expression(id: name_id);
4315}
4316
4317void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
4318{
4319 auto res = forced_temporaries.insert(x: id);
4320
4321 // Forcing new temporaries guarantees forward progress.
4322 if (res.second)
4323 force_recompile_guarantee_forward_progress();
4324 else
4325 force_recompile();
4326}
4327
4328uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
4329{
4330 // Constants do not have innate precision.
4331 auto handle_type = ir.ids[id].get_type();
4332 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
4333 return id;
4334
4335 // Ignore anything that isn't 32-bit values.
4336 auto &type = get<SPIRType>(id: type_id);
4337 if (type.pointer)
4338 return id;
4339 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
4340 return id;
4341
4342 if (precision == Options::DontCare)
4343 {
4344 // If precision is consumed as don't care (operations only consisting of constants),
4345 // we need to bind the expression to a temporary,
4346 // otherwise we have no way of controlling the precision later.
4347 auto itr = forced_temporaries.insert(x: id);
4348 if (itr.second)
4349 force_recompile_guarantee_forward_progress();
4350 return id;
4351 }
4352
4353 auto current_precision = has_decoration(id, decoration: DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
4354 if (current_precision == precision)
4355 return id;
4356
4357 auto itr = temporary_to_mirror_precision_alias.find(x: id);
4358 if (itr == temporary_to_mirror_precision_alias.end())
4359 {
4360 uint32_t alias_id = ir.increase_bound_by(count: 1);
4361 auto &m = ir.meta[alias_id];
4362 if (auto *input_m = ir.find_meta(id))
4363 m = *input_m;
4364
4365 const char *prefix;
4366 if (precision == Options::Mediump)
4367 {
4368 set_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
4369 prefix = "mp_copy_";
4370 }
4371 else
4372 {
4373 unset_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
4374 prefix = "hp_copy_";
4375 }
4376
4377 auto alias_name = join(ts&: prefix, ts: to_name(id));
4378 ParsedIR::sanitize_underscores(str&: alias_name);
4379 set_name(id: alias_id, name: alias_name);
4380
4381 emit_op(result_type: type_id, result_id: alias_id, rhs: to_expression(id), forward_rhs: true);
4382 temporary_to_mirror_precision_alias[id] = alias_id;
4383 forced_temporaries.insert(x: id);
4384 forced_temporaries.insert(x: alias_id);
4385 force_recompile_guarantee_forward_progress();
4386 id = alias_id;
4387 }
4388 else
4389 {
4390 id = itr->second;
4391 }
4392
4393 return id;
4394}
4395
4396void CompilerGLSL::handle_invalid_expression(uint32_t id)
4397{
4398 // We tried to read an invalidated expression.
4399 // This means we need another pass at compilation, but next time,
4400 // force temporary variables so that they cannot be invalidated.
4401 force_temporary_and_recompile(id);
4402
4403 // If the invalid expression happened as a result of a CompositeInsert
4404 // overwrite, we must block this from happening next iteration.
4405 if (composite_insert_overwritten.count(x: id))
4406 block_composite_insert_overwrite.insert(x: id);
4407}
4408
4409// Converts the format of the current expression from packed to unpacked,
4410// by wrapping the expression in a constructor of the appropriate type.
4411// GLSL does not support packed formats, so simply return the expression.
4412// Subclasses that do will override.
4413string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4414{
4415 return expr_str;
4416}
4417
4418// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
4419void CompilerGLSL::strip_enclosed_expression(string &expr)
4420{
4421 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4422 return;
4423
4424 // Have to make sure that our first and last parens actually enclose everything inside it.
4425 uint32_t paren_count = 0;
4426 for (auto &c : expr)
4427 {
4428 if (c == '(')
4429 paren_count++;
4430 else if (c == ')')
4431 {
4432 paren_count--;
4433
4434 // If we hit 0 and this is not the final char, our first and final parens actually don't
4435 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4436 if (paren_count == 0 && &c != &expr.back())
4437 return;
4438 }
4439 }
4440 expr.erase(pos: expr.size() - 1, n: 1);
4441 expr.erase(position: begin(cont&: expr));
4442}
4443
4444string CompilerGLSL::enclose_expression(const string &expr)
4445{
4446 bool need_parens = false;
4447
4448 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4449 // unary expressions.
4450 if (!expr.empty())
4451 {
4452 auto c = expr.front();
4453 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4454 need_parens = true;
4455 }
4456
4457 if (!need_parens)
4458 {
4459 uint32_t paren_count = 0;
4460 for (auto c : expr)
4461 {
4462 if (c == '(' || c == '[')
4463 paren_count++;
4464 else if (c == ')' || c == ']')
4465 {
4466 assert(paren_count);
4467 paren_count--;
4468 }
4469 else if (c == ' ' && paren_count == 0)
4470 {
4471 need_parens = true;
4472 break;
4473 }
4474 }
4475 assert(paren_count == 0);
4476 }
4477
4478 // If this expression contains any spaces which are not enclosed by parentheses,
4479 // we need to enclose it so we can treat the whole string as an expression.
4480 // This happens when two expressions have been part of a binary op earlier.
4481 if (need_parens)
4482 return join(ts: '(', ts: expr, ts: ')');
4483 else
4484 return expr;
4485}
4486
4487string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4488{
4489 // If this expression starts with an address-of operator ('&'), then
4490 // just return the part after the operator.
4491 // TODO: Strip parens if unnecessary?
4492 if (expr.front() == '&')
4493 return expr.substr(pos: 1);
4494 else if (backend.native_pointers)
4495 return join(ts: '*', ts: expr);
4496 else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4497 expr_type.pointer_depth == 1)
4498 {
4499 return join(ts: enclose_expression(expr), ts: ".value");
4500 }
4501 else
4502 return expr;
4503}
4504
4505string CompilerGLSL::address_of_expression(const std::string &expr)
4506{
4507 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4508 {
4509 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4510 // the first two and last characters. We might have to enclose the expression.
4511 // This doesn't work for cases like (*foo + 10),
4512 // but this is an r-value expression which we cannot take the address of anyways.
4513 return enclose_expression(expr: expr.substr(pos: 2, n: expr.size() - 3));
4514 }
4515 else if (expr.front() == '*')
4516 {
4517 // If this expression starts with a dereference operator ('*'), then
4518 // just return the part after the operator.
4519 return expr.substr(pos: 1);
4520 }
4521 else
4522 return join(ts: '&', ts: enclose_expression(expr));
4523}
4524
4525// Just like to_expression except that we enclose the expression inside parentheses if needed.
4526string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4527{
4528 return enclose_expression(expr: to_expression(id, register_expression_read));
4529}
4530
4531// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4532// need_transpose must be forced to false.
4533string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4534{
4535 return unpack_expression_type(expr_str: to_expression(id), expression_type(id),
4536 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
4537 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), true);
4538}
4539
4540string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4541{
4542 // If we need to transpose, it will also take care of unpacking rules.
4543 auto *e = maybe_get<SPIRExpression>(id);
4544 bool need_transpose = e && e->need_transpose;
4545 bool is_remapped = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
4546 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
4547
4548 if (!need_transpose && (is_remapped || is_packed))
4549 {
4550 return unpack_expression_type(expr_str: to_expression(id, register_expression_read),
4551 get_pointee_type(type_id: expression_type_id(id)),
4552 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
4553 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), false);
4554 }
4555 else
4556 return to_expression(id, register_expression_read);
4557}
4558
4559string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4560{
4561 return enclose_expression(expr: to_unpacked_expression(id, register_expression_read));
4562}
4563
4564string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4565{
4566 auto &type = expression_type(id);
4567 if (type.pointer && should_dereference(id))
4568 return dereference_expression(expr_type: type, expr: to_enclosed_expression(id, register_expression_read));
4569 else
4570 return to_expression(id, register_expression_read);
4571}
4572
4573string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4574{
4575 auto &type = expression_type(id);
4576 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4577 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
4578 else
4579 return to_unpacked_expression(id, register_expression_read);
4580}
4581
4582string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4583{
4584 auto &type = expression_type(id);
4585 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4586 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
4587 else
4588 return to_enclosed_unpacked_expression(id, register_expression_read);
4589}
4590
4591string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4592{
4593 auto expr = to_enclosed_expression(id);
4594 if (has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked))
4595 return join(ts&: expr, ts: "[", ts&: index, ts: "]");
4596 else
4597 return join(ts&: expr, ts: ".", ts: index_to_swizzle(index));
4598}
4599
4600string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
4601 const uint32_t *chain, uint32_t length)
4602{
4603 // It is kinda silly if application actually enter this path since they know the constant up front.
4604 // It is useful here to extract the plain constant directly.
4605 SPIRConstant tmp;
4606 tmp.constant_type = result_type;
4607 auto &composite_type = get<SPIRType>(id: c.constant_type);
4608 assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
4609 assert(!c.specialization);
4610
4611 if (is_matrix(type: composite_type))
4612 {
4613 if (length == 2)
4614 {
4615 tmp.m.c[0].vecsize = 1;
4616 tmp.m.columns = 1;
4617 tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
4618 }
4619 else
4620 {
4621 assert(length == 1);
4622 tmp.m.c[0].vecsize = composite_type.vecsize;
4623 tmp.m.columns = 1;
4624 tmp.m.c[0] = c.m.c[chain[0]];
4625 }
4626 }
4627 else
4628 {
4629 assert(length == 1);
4630 tmp.m.c[0].vecsize = 1;
4631 tmp.m.columns = 1;
4632 tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
4633 }
4634
4635 return constant_expression(c: tmp);
4636}
4637
4638string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4639{
4640 uint32_t size = to_array_size_literal(type);
4641 auto &parent = get<SPIRType>(id: type.parent_type);
4642 string expr = "{ ";
4643
4644 for (uint32_t i = 0; i < size; i++)
4645 {
4646 auto subexpr = join(ts: base_expr, ts: "[", ts: convert_to_string(t: i), ts: "]");
4647 if (parent.array.empty())
4648 expr += subexpr;
4649 else
4650 expr += to_rerolled_array_expression(base_expr: subexpr, type: parent);
4651
4652 if (i + 1 < size)
4653 expr += ", ";
4654 }
4655
4656 expr += " }";
4657 return expr;
4658}
4659
4660string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type)
4661{
4662 auto &type = expression_type(id);
4663
4664 bool reroll_array = !type.array.empty() &&
4665 (!backend.array_is_value_type ||
4666 (block_like_type && !backend.array_is_value_type_in_buffer_blocks));
4667
4668 if (reroll_array)
4669 {
4670 // For this case, we need to "re-roll" an array initializer from a temporary.
4671 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
4672 // participate in a struct initializer. E.g.
4673 // float arr[2] = { 1.0, 2.0 };
4674 // Foo foo = { arr }; must be transformed to
4675 // Foo foo = { { arr[0], arr[1] } };
4676 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4677
4678 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4679 // as temporaries anyways.
4680 return to_rerolled_array_expression(base_expr: to_enclosed_expression(id), type);
4681 }
4682 else
4683 return to_unpacked_expression(id);
4684}
4685
4686string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
4687{
4688 string expr = to_expression(id);
4689
4690 if (has_decoration(id, decoration: DecorationNonUniform))
4691 convert_non_uniform_expression(expr, ptr_id: id);
4692
4693 return expr;
4694}
4695
4696string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4697{
4698 auto itr = invalid_expressions.find(x: id);
4699 if (itr != end(cont&: invalid_expressions))
4700 handle_invalid_expression(id);
4701
4702 if (ir.ids[id].get_type() == TypeExpression)
4703 {
4704 // We might have a more complex chain of dependencies.
4705 // A possible scenario is that we
4706 //
4707 // %1 = OpLoad
4708 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4709 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4710 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4711 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4712 //
4713 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4714 // and see that we should not forward reads of the original variable.
4715 auto &expr = get<SPIRExpression>(id);
4716 for (uint32_t dep : expr.expression_dependencies)
4717 if (invalid_expressions.find(x: dep) != end(cont&: invalid_expressions))
4718 handle_invalid_expression(id: dep);
4719 }
4720
4721 if (register_expression_read)
4722 track_expression_read(id);
4723
4724 switch (ir.ids[id].get_type())
4725 {
4726 case TypeExpression:
4727 {
4728 auto &e = get<SPIRExpression>(id);
4729 if (e.base_expression)
4730 return to_enclosed_expression(id: e.base_expression) + e.expression;
4731 else if (e.need_transpose)
4732 {
4733 // This should not be reached for access chains, since we always deal explicitly with transpose state
4734 // when consuming an access chain expression.
4735 uint32_t physical_type_id = get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
4736 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
4737 return convert_row_major_matrix(exp_str: e.expression, exp_type: get<SPIRType>(id: e.expression_type), physical_type_id,
4738 is_packed);
4739 }
4740 else if (flattened_structs.count(x: id))
4741 {
4742 return load_flattened_struct(basename: e.expression, type: get<SPIRType>(id: e.expression_type));
4743 }
4744 else
4745 {
4746 if (is_forcing_recompilation())
4747 {
4748 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4749 // Avoid this by returning dummy expressions during this phase.
4750 // Do not use empty expressions here, because those are sentinels for other cases.
4751 return "_";
4752 }
4753 else
4754 return e.expression;
4755 }
4756 }
4757
4758 case TypeConstant:
4759 {
4760 auto &c = get<SPIRConstant>(id);
4761 auto &type = get<SPIRType>(id: c.constant_type);
4762
4763 // WorkGroupSize may be a constant.
4764 if (has_decoration(id: c.self, decoration: DecorationBuiltIn))
4765 return builtin_to_glsl(builtin: BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)), storage: StorageClassGeneric);
4766 else if (c.specialization)
4767 {
4768 if (backend.workgroup_size_is_hidden)
4769 {
4770 int wg_index = get_constant_mapping_to_workgroup_component(c);
4771 if (wg_index >= 0)
4772 {
4773 auto wg_size = join(ts: builtin_to_glsl(builtin: BuiltInWorkgroupSize, storage: StorageClassInput), ts: vector_swizzle(vecsize: 1, index: wg_index));
4774 if (type.basetype != SPIRType::UInt)
4775 wg_size = bitcast_expression(target_type: type, expr_type: SPIRType::UInt, expr: wg_size);
4776 return wg_size;
4777 }
4778 }
4779
4780 return to_name(id);
4781 }
4782 else if (c.is_used_as_lut)
4783 return to_name(id);
4784 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4785 return to_name(id);
4786 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4787 return to_name(id);
4788 else
4789 return constant_expression(c);
4790 }
4791
4792 case TypeConstantOp:
4793 return to_name(id);
4794
4795 case TypeVariable:
4796 {
4797 auto &var = get<SPIRVariable>(id);
4798 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4799 // the variable has not been declared yet.
4800 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4801 {
4802 // We might try to load from a loop variable before it has been initialized.
4803 // Prefer static expression and fallback to initializer.
4804 if (var.static_expression)
4805 return to_expression(id: var.static_expression);
4806 else if (var.initializer)
4807 return to_expression(id: var.initializer);
4808 else
4809 {
4810 // We cannot declare the variable yet, so have to fake it.
4811 uint32_t undef_id = ir.increase_bound_by(count: 1);
4812 return emit_uninitialized_temporary_expression(type: get_variable_data_type_id(var), id: undef_id).expression;
4813 }
4814 }
4815 else if (var.deferred_declaration)
4816 {
4817 var.deferred_declaration = false;
4818 return variable_decl(variable: var);
4819 }
4820 else if (flattened_structs.count(x: id))
4821 {
4822 return load_flattened_struct(basename: to_name(id), type: get<SPIRType>(id: var.basetype));
4823 }
4824 else
4825 {
4826 auto &dec = ir.meta[var.self].decoration;
4827 if (dec.builtin)
4828 return builtin_to_glsl(builtin: dec.builtin_type, storage: var.storage);
4829 else
4830 return to_name(id);
4831 }
4832 }
4833
4834 case TypeCombinedImageSampler:
4835 // This type should never be taken the expression of directly.
4836 // The intention is that texture sampling functions will extract the image and samplers
4837 // separately and take their expressions as needed.
4838 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4839 // expression ala sampler2D(texture, sampler).
4840 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4841
4842 case TypeAccessChain:
4843 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4844 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4845
4846 default:
4847 return to_name(id);
4848 }
4849}
4850
4851string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4852{
4853 auto &type = get<SPIRType>(id: cop.basetype);
4854 bool binary = false;
4855 bool unary = false;
4856 string op;
4857
4858 if (is_legacy() && is_unsigned_opcode(op: cop.opcode))
4859 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4860
4861 // TODO: Find a clean way to reuse emit_instruction.
4862 switch (cop.opcode)
4863 {
4864 case OpSConvert:
4865 case OpUConvert:
4866 case OpFConvert:
4867 op = type_to_glsl_constructor(type);
4868 break;
4869
4870#define GLSL_BOP(opname, x) \
4871 case Op##opname: \
4872 binary = true; \
4873 op = x; \
4874 break
4875
4876#define GLSL_UOP(opname, x) \
4877 case Op##opname: \
4878 unary = true; \
4879 op = x; \
4880 break
4881
4882 GLSL_UOP(SNegate, "-");
4883 GLSL_UOP(Not, "~");
4884 GLSL_BOP(IAdd, "+");
4885 GLSL_BOP(ISub, "-");
4886 GLSL_BOP(IMul, "*");
4887 GLSL_BOP(SDiv, "/");
4888 GLSL_BOP(UDiv, "/");
4889 GLSL_BOP(UMod, "%");
4890 GLSL_BOP(SMod, "%");
4891 GLSL_BOP(ShiftRightLogical, ">>");
4892 GLSL_BOP(ShiftRightArithmetic, ">>");
4893 GLSL_BOP(ShiftLeftLogical, "<<");
4894 GLSL_BOP(BitwiseOr, "|");
4895 GLSL_BOP(BitwiseXor, "^");
4896 GLSL_BOP(BitwiseAnd, "&");
4897 GLSL_BOP(LogicalOr, "||");
4898 GLSL_BOP(LogicalAnd, "&&");
4899 GLSL_UOP(LogicalNot, "!");
4900 GLSL_BOP(LogicalEqual, "==");
4901 GLSL_BOP(LogicalNotEqual, "!=");
4902 GLSL_BOP(IEqual, "==");
4903 GLSL_BOP(INotEqual, "!=");
4904 GLSL_BOP(ULessThan, "<");
4905 GLSL_BOP(SLessThan, "<");
4906 GLSL_BOP(ULessThanEqual, "<=");
4907 GLSL_BOP(SLessThanEqual, "<=");
4908 GLSL_BOP(UGreaterThan, ">");
4909 GLSL_BOP(SGreaterThan, ">");
4910 GLSL_BOP(UGreaterThanEqual, ">=");
4911 GLSL_BOP(SGreaterThanEqual, ">=");
4912
4913 case OpSRem:
4914 {
4915 uint32_t op0 = cop.arguments[0];
4916 uint32_t op1 = cop.arguments[1];
4917 return join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(",
4918 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
4919 }
4920
4921 case OpSelect:
4922 {
4923 if (cop.arguments.size() < 3)
4924 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4925
4926 // This one is pretty annoying. It's triggered from
4927 // uint(bool), int(bool) from spec constants.
4928 // In order to preserve its compile-time constness in Vulkan GLSL,
4929 // we need to reduce the OpSelect expression back to this simplified model.
4930 // If we cannot, fail.
4931 if (to_trivial_mix_op(type, op, left: cop.arguments[2], right: cop.arguments[1], lerp: cop.arguments[0]))
4932 {
4933 // Implement as a simple cast down below.
4934 }
4935 else
4936 {
4937 // Implement a ternary and pray the compiler understands it :)
4938 return to_ternary_expression(result_type: type, select: cop.arguments[0], true_value: cop.arguments[1], false_value: cop.arguments[2]);
4939 }
4940 break;
4941 }
4942
4943 case OpVectorShuffle:
4944 {
4945 string expr = type_to_glsl_constructor(type);
4946 expr += "(";
4947
4948 uint32_t left_components = expression_type(id: cop.arguments[0]).vecsize;
4949 string left_arg = to_enclosed_expression(id: cop.arguments[0]);
4950 string right_arg = to_enclosed_expression(id: cop.arguments[1]);
4951
4952 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4953 {
4954 uint32_t index = cop.arguments[i];
4955 if (index >= left_components)
4956 expr += right_arg + "." + "xyzw"[index - left_components];
4957 else
4958 expr += left_arg + "." + "xyzw"[index];
4959
4960 if (i + 1 < uint32_t(cop.arguments.size()))
4961 expr += ", ";
4962 }
4963
4964 expr += ")";
4965 return expr;
4966 }
4967
4968 case OpCompositeExtract:
4969 {
4970 auto expr = access_chain_internal(base: cop.arguments[0], indices: &cop.arguments[1], count: uint32_t(cop.arguments.size() - 1),
4971 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
4972 return expr;
4973 }
4974
4975 case OpCompositeInsert:
4976 SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4977
4978 default:
4979 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4980 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4981 }
4982
4983 uint32_t bit_width = 0;
4984 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4985 bit_width = expression_type(id: cop.arguments[0]).width;
4986
4987 SPIRType::BaseType input_type;
4988 bool skip_cast_if_equal_type = opcode_is_sign_invariant(opcode: cop.opcode);
4989
4990 switch (cop.opcode)
4991 {
4992 case OpIEqual:
4993 case OpINotEqual:
4994 input_type = to_signed_basetype(width: bit_width);
4995 break;
4996
4997 case OpSLessThan:
4998 case OpSLessThanEqual:
4999 case OpSGreaterThan:
5000 case OpSGreaterThanEqual:
5001 case OpSMod:
5002 case OpSDiv:
5003 case OpShiftRightArithmetic:
5004 case OpSConvert:
5005 case OpSNegate:
5006 input_type = to_signed_basetype(width: bit_width);
5007 break;
5008
5009 case OpULessThan:
5010 case OpULessThanEqual:
5011 case OpUGreaterThan:
5012 case OpUGreaterThanEqual:
5013 case OpUMod:
5014 case OpUDiv:
5015 case OpShiftRightLogical:
5016 case OpUConvert:
5017 input_type = to_unsigned_basetype(width: bit_width);
5018 break;
5019
5020 default:
5021 input_type = type.basetype;
5022 break;
5023 }
5024
5025#undef GLSL_BOP
5026#undef GLSL_UOP
5027 if (binary)
5028 {
5029 if (cop.arguments.size() < 2)
5030 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5031
5032 string cast_op0;
5033 string cast_op1;
5034 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0: cop.arguments[0],
5035 op1: cop.arguments[1], skip_cast_if_equal_type);
5036
5037 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
5038 {
5039 expected_type.basetype = input_type;
5040 auto expr = bitcast_glsl_op(result_type: type, argument_type: expected_type);
5041 expr += '(';
5042 expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
5043 expr += ')';
5044 return expr;
5045 }
5046 else
5047 return join(ts: "(", ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1, ts: ")");
5048 }
5049 else if (unary)
5050 {
5051 if (cop.arguments.size() < 1)
5052 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5053
5054 // Auto-bitcast to result type as needed.
5055 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
5056 return join(ts: "(", ts&: op, ts: bitcast_glsl(result_type: type, arg: cop.arguments[0]), ts: ")");
5057 }
5058 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
5059 {
5060 if (cop.arguments.size() < 1)
5061 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5062
5063 auto &arg_type = expression_type(id: cop.arguments[0]);
5064 if (arg_type.width < type.width && input_type != arg_type.basetype)
5065 {
5066 auto expected = arg_type;
5067 expected.basetype = input_type;
5068 return join(ts&: op, ts: "(", ts: bitcast_glsl(result_type: expected, arg: cop.arguments[0]), ts: ")");
5069 }
5070 else
5071 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5072 }
5073 else
5074 {
5075 if (cop.arguments.size() < 1)
5076 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5077 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5078 }
5079}
5080
5081string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope)
5082{
5083 auto &type = get<SPIRType>(id: c.constant_type);
5084
5085 if (type.pointer)
5086 {
5087 return backend.null_pointer_literal;
5088 }
5089 else if (!c.subconstants.empty())
5090 {
5091 // Handles Arrays and structures.
5092 string res;
5093
5094 // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
5095 // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
5096 // Should look at ArrayStride here as well, but it's possible to declare a constant struct
5097 // with Offset = 0, using no ArrayStride on the enclosed array type.
5098 // A particular CTS test hits this scenario.
5099 bool array_type_decays = inside_block_like_struct_scope &&
5100 !type.array.empty() && !backend.array_is_value_type_in_buffer_blocks;
5101
5102 // Allow Metal to use the array<T> template to make arrays a value type
5103 bool needs_trailing_tracket = false;
5104 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
5105 type.array.empty())
5106 {
5107 res = type_to_glsl_constructor(type) + "{ ";
5108 }
5109 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
5110 !type.array.empty() && !array_type_decays)
5111 {
5112 res = type_to_glsl_constructor(type) + "({ ";
5113 needs_trailing_tracket = true;
5114 }
5115 else if (backend.use_initializer_list)
5116 {
5117 res = "{ ";
5118 }
5119 else
5120 {
5121 res = type_to_glsl_constructor(type) + "(";
5122 }
5123
5124 uint32_t subconstant_index = 0;
5125 for (auto &elem : c.subconstants)
5126 {
5127 auto &subc = get<SPIRConstant>(id: elem);
5128 if (subc.specialization)
5129 res += to_name(id: elem);
5130 else
5131 {
5132 if (type.array.empty() && type.basetype == SPIRType::Struct)
5133 {
5134 // When we get down to emitting struct members, override the block-like information.
5135 // For constants, we can freely mix and match block-like state.
5136 inside_block_like_struct_scope =
5137 has_member_decoration(id: type.self, index: subconstant_index, decoration: DecorationOffset);
5138 }
5139
5140 res += constant_expression(c: subc, inside_block_like_struct_scope);
5141 }
5142
5143 if (&elem != &c.subconstants.back())
5144 res += ", ";
5145
5146 subconstant_index++;
5147 }
5148
5149 res += backend.use_initializer_list ? " }" : ")";
5150 if (needs_trailing_tracket)
5151 res += ")";
5152
5153 return res;
5154 }
5155 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
5156 {
5157 // Metal tessellation likes empty structs which are then constant expressions.
5158 if (backend.supports_empty_struct)
5159 return "{ }";
5160 else if (backend.use_typed_initializer_list)
5161 return join(ts: type_to_glsl(type: get<SPIRType>(id: c.constant_type)), ts: "{ 0 }");
5162 else if (backend.use_initializer_list)
5163 return "{ 0 }";
5164 else
5165 return join(ts: type_to_glsl(type: get<SPIRType>(id: c.constant_type)), ts: "(0)");
5166 }
5167 else if (c.columns() == 1)
5168 {
5169 return constant_expression_vector(c, vector: 0);
5170 }
5171 else
5172 {
5173 string res = type_to_glsl(type: get<SPIRType>(id: c.constant_type)) + "(";
5174 for (uint32_t col = 0; col < c.columns(); col++)
5175 {
5176 if (c.specialization_constant_id(col) != 0)
5177 res += to_name(id: c.specialization_constant_id(col));
5178 else
5179 res += constant_expression_vector(c, vector: col);
5180
5181 if (col + 1 < c.columns())
5182 res += ", ";
5183 }
5184 res += ")";
5185 return res;
5186 }
5187}
5188
5189#ifdef _MSC_VER
5190// sprintf warning.
5191// We cannot rely on snprintf existing because, ..., MSVC.
5192#pragma warning(push)
5193#pragma warning(disable : 4996)
5194#endif
5195
5196string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5197{
5198 string res;
5199 float float_value = c.scalar_f16(col, row);
5200
5201 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
5202 // of complicated workarounds, just value-cast to the half type always.
5203 if (std::isnan(x: float_value) || std::isinf(x: float_value))
5204 {
5205 SPIRType type;
5206 type.basetype = SPIRType::Half;
5207 type.vecsize = 1;
5208 type.columns = 1;
5209
5210 if (float_value == numeric_limits<float>::infinity())
5211 res = join(ts: type_to_glsl(type), ts: "(1.0 / 0.0)");
5212 else if (float_value == -numeric_limits<float>::infinity())
5213 res = join(ts: type_to_glsl(type), ts: "(-1.0 / 0.0)");
5214 else if (std::isnan(x: float_value))
5215 res = join(ts: type_to_glsl(type), ts: "(0.0 / 0.0)");
5216 else
5217 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5218 }
5219 else
5220 {
5221 SPIRType type;
5222 type.basetype = SPIRType::Half;
5223 type.vecsize = 1;
5224 type.columns = 1;
5225 res = join(ts: type_to_glsl(type), ts: "(", ts: convert_to_string(t: float_value, locale_radix_point: current_locale_radix_character), ts: ")");
5226 }
5227
5228 return res;
5229}
5230
5231string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5232{
5233 string res;
5234 float float_value = c.scalar_f32(col, row);
5235
5236 if (std::isnan(x: float_value) || std::isinf(x: float_value))
5237 {
5238 // Use special representation.
5239 if (!is_legacy())
5240 {
5241 SPIRType out_type;
5242 SPIRType in_type;
5243 out_type.basetype = SPIRType::Float;
5244 in_type.basetype = SPIRType::UInt;
5245 out_type.vecsize = 1;
5246 in_type.vecsize = 1;
5247 out_type.width = 32;
5248 in_type.width = 32;
5249
5250 char print_buffer[32];
5251 sprintf(s: print_buffer, format: "0x%xu", c.scalar(col, row));
5252
5253 const char *comment = "inf";
5254 if (float_value == -numeric_limits<float>::infinity())
5255 comment = "-inf";
5256 else if (std::isnan(x: float_value))
5257 comment = "nan";
5258 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
5259 }
5260 else
5261 {
5262 if (float_value == numeric_limits<float>::infinity())
5263 {
5264 if (backend.float_literal_suffix)
5265 res = "(1.0f / 0.0f)";
5266 else
5267 res = "(1.0 / 0.0)";
5268 }
5269 else if (float_value == -numeric_limits<float>::infinity())
5270 {
5271 if (backend.float_literal_suffix)
5272 res = "(-1.0f / 0.0f)";
5273 else
5274 res = "(-1.0 / 0.0)";
5275 }
5276 else if (std::isnan(x: float_value))
5277 {
5278 if (backend.float_literal_suffix)
5279 res = "(0.0f / 0.0f)";
5280 else
5281 res = "(0.0 / 0.0)";
5282 }
5283 else
5284 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5285 }
5286 }
5287 else
5288 {
5289 res = convert_to_string(t: float_value, locale_radix_point: current_locale_radix_character);
5290 if (backend.float_literal_suffix)
5291 res += "f";
5292 }
5293
5294 return res;
5295}
5296
5297std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5298{
5299 string res;
5300 double double_value = c.scalar_f64(col, row);
5301
5302 if (std::isnan(x: double_value) || std::isinf(x: double_value))
5303 {
5304 // Use special representation.
5305 if (!is_legacy())
5306 {
5307 SPIRType out_type;
5308 SPIRType in_type;
5309 out_type.basetype = SPIRType::Double;
5310 in_type.basetype = SPIRType::UInt64;
5311 out_type.vecsize = 1;
5312 in_type.vecsize = 1;
5313 out_type.width = 64;
5314 in_type.width = 64;
5315
5316 uint64_t u64_value = c.scalar_u64(col, row);
5317
5318 if (options.es)
5319 SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
5320 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
5321
5322 char print_buffer[64];
5323 sprintf(s: print_buffer, format: "0x%llx%s", static_cast<unsigned long long>(u64_value),
5324 backend.long_long_literal_suffix ? "ull" : "ul");
5325
5326 const char *comment = "inf";
5327 if (double_value == -numeric_limits<double>::infinity())
5328 comment = "-inf";
5329 else if (std::isnan(x: double_value))
5330 comment = "nan";
5331 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
5332 }
5333 else
5334 {
5335 if (options.es)
5336 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
5337 if (options.version < 400)
5338 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
5339
5340 if (double_value == numeric_limits<double>::infinity())
5341 {
5342 if (backend.double_literal_suffix)
5343 res = "(1.0lf / 0.0lf)";
5344 else
5345 res = "(1.0 / 0.0)";
5346 }
5347 else if (double_value == -numeric_limits<double>::infinity())
5348 {
5349 if (backend.double_literal_suffix)
5350 res = "(-1.0lf / 0.0lf)";
5351 else
5352 res = "(-1.0 / 0.0)";
5353 }
5354 else if (std::isnan(x: double_value))
5355 {
5356 if (backend.double_literal_suffix)
5357 res = "(0.0lf / 0.0lf)";
5358 else
5359 res = "(0.0 / 0.0)";
5360 }
5361 else
5362 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5363 }
5364 }
5365 else
5366 {
5367 res = convert_to_string(t: double_value, locale_radix_point: current_locale_radix_character);
5368 if (backend.double_literal_suffix)
5369 res += "lf";
5370 }
5371
5372 return res;
5373}
5374
5375#ifdef _MSC_VER
5376#pragma warning(pop)
5377#endif
5378
5379string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
5380{
5381 auto type = get<SPIRType>(id: c.constant_type);
5382 type.columns = 1;
5383
5384 auto scalar_type = type;
5385 scalar_type.vecsize = 1;
5386
5387 string res;
5388 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
5389 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
5390
5391 if (!type_is_floating_point(type))
5392 {
5393 // Cannot swizzle literal integers as a special case.
5394 swizzle_splat = false;
5395 }
5396
5397 if (splat || swizzle_splat)
5398 {
5399 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
5400 for (uint32_t i = 0; i < c.vector_size(); i++)
5401 {
5402 if (c.specialization_constant_id(col: vector, row: i) != 0)
5403 {
5404 splat = false;
5405 swizzle_splat = false;
5406 break;
5407 }
5408 }
5409 }
5410
5411 if (splat || swizzle_splat)
5412 {
5413 if (type.width == 64)
5414 {
5415 uint64_t ident = c.scalar_u64(col: vector, row: 0);
5416 for (uint32_t i = 1; i < c.vector_size(); i++)
5417 {
5418 if (ident != c.scalar_u64(col: vector, row: i))
5419 {
5420 splat = false;
5421 swizzle_splat = false;
5422 break;
5423 }
5424 }
5425 }
5426 else
5427 {
5428 uint32_t ident = c.scalar(col: vector, row: 0);
5429 for (uint32_t i = 1; i < c.vector_size(); i++)
5430 {
5431 if (ident != c.scalar(col: vector, row: i))
5432 {
5433 splat = false;
5434 swizzle_splat = false;
5435 }
5436 }
5437 }
5438 }
5439
5440 if (c.vector_size() > 1 && !swizzle_splat)
5441 res += type_to_glsl(type) + "(";
5442
5443 switch (type.basetype)
5444 {
5445 case SPIRType::Half:
5446 if (splat || swizzle_splat)
5447 {
5448 res += convert_half_to_string(c, col: vector, row: 0);
5449 if (swizzle_splat)
5450 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
5451 }
5452 else
5453 {
5454 for (uint32_t i = 0; i < c.vector_size(); i++)
5455 {
5456 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5457 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5458 else
5459 res += convert_half_to_string(c, col: vector, row: i);
5460
5461 if (i + 1 < c.vector_size())
5462 res += ", ";
5463 }
5464 }
5465 break;
5466
5467 case SPIRType::Float:
5468 if (splat || swizzle_splat)
5469 {
5470 res += convert_float_to_string(c, col: vector, row: 0);
5471 if (swizzle_splat)
5472 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
5473 }
5474 else
5475 {
5476 for (uint32_t i = 0; i < c.vector_size(); i++)
5477 {
5478 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5479 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5480 else
5481 res += convert_float_to_string(c, col: vector, row: i);
5482
5483 if (i + 1 < c.vector_size())
5484 res += ", ";
5485 }
5486 }
5487 break;
5488
5489 case SPIRType::Double:
5490 if (splat || swizzle_splat)
5491 {
5492 res += convert_double_to_string(c, col: vector, row: 0);
5493 if (swizzle_splat)
5494 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
5495 }
5496 else
5497 {
5498 for (uint32_t i = 0; i < c.vector_size(); i++)
5499 {
5500 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5501 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5502 else
5503 res += convert_double_to_string(c, col: vector, row: i);
5504
5505 if (i + 1 < c.vector_size())
5506 res += ", ";
5507 }
5508 }
5509 break;
5510
5511 case SPIRType::Int64:
5512 {
5513 auto tmp = type;
5514 tmp.vecsize = 1;
5515 tmp.columns = 1;
5516 auto int64_type = type_to_glsl(type: tmp);
5517
5518 if (splat)
5519 {
5520 res += convert_to_string(value: c.scalar_i64(col: vector, row: 0), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
5521 }
5522 else
5523 {
5524 for (uint32_t i = 0; i < c.vector_size(); i++)
5525 {
5526 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5527 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5528 else
5529 res += convert_to_string(value: c.scalar_i64(col: vector, row: i), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
5530
5531 if (i + 1 < c.vector_size())
5532 res += ", ";
5533 }
5534 }
5535 break;
5536 }
5537
5538 case SPIRType::UInt64:
5539 if (splat)
5540 {
5541 res += convert_to_string(t: c.scalar_u64(col: vector, row: 0));
5542 if (backend.long_long_literal_suffix)
5543 res += "ull";
5544 else
5545 res += "ul";
5546 }
5547 else
5548 {
5549 for (uint32_t i = 0; i < c.vector_size(); i++)
5550 {
5551 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5552 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5553 else
5554 {
5555 res += convert_to_string(t: c.scalar_u64(col: vector, row: i));
5556 if (backend.long_long_literal_suffix)
5557 res += "ull";
5558 else
5559 res += "ul";
5560 }
5561
5562 if (i + 1 < c.vector_size())
5563 res += ", ";
5564 }
5565 }
5566 break;
5567
5568 case SPIRType::UInt:
5569 if (splat)
5570 {
5571 res += convert_to_string(t: c.scalar(col: vector, row: 0));
5572 if (is_legacy())
5573 {
5574 // Fake unsigned constant literals with signed ones if possible.
5575 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5576 if (c.scalar_i32(col: vector, row: 0) < 0)
5577 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5578 }
5579 else if (backend.uint32_t_literal_suffix)
5580 res += "u";
5581 }
5582 else
5583 {
5584 for (uint32_t i = 0; i < c.vector_size(); i++)
5585 {
5586 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5587 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5588 else
5589 {
5590 res += convert_to_string(t: c.scalar(col: vector, row: i));
5591 if (is_legacy())
5592 {
5593 // Fake unsigned constant literals with signed ones if possible.
5594 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5595 if (c.scalar_i32(col: vector, row: i) < 0)
5596 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5597 "the literal negative.");
5598 }
5599 else if (backend.uint32_t_literal_suffix)
5600 res += "u";
5601 }
5602
5603 if (i + 1 < c.vector_size())
5604 res += ", ";
5605 }
5606 }
5607 break;
5608
5609 case SPIRType::Int:
5610 if (splat)
5611 res += convert_to_string(value: c.scalar_i32(col: vector, row: 0));
5612 else
5613 {
5614 for (uint32_t i = 0; i < c.vector_size(); i++)
5615 {
5616 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5617 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5618 else
5619 res += convert_to_string(value: c.scalar_i32(col: vector, row: i));
5620 if (i + 1 < c.vector_size())
5621 res += ", ";
5622 }
5623 }
5624 break;
5625
5626 case SPIRType::UShort:
5627 if (splat)
5628 {
5629 res += convert_to_string(t: c.scalar(col: vector, row: 0));
5630 }
5631 else
5632 {
5633 for (uint32_t i = 0; i < c.vector_size(); i++)
5634 {
5635 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5636 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5637 else
5638 {
5639 if (*backend.uint16_t_literal_suffix)
5640 {
5641 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
5642 res += backend.uint16_t_literal_suffix;
5643 }
5644 else
5645 {
5646 // If backend doesn't have a literal suffix, we need to value cast.
5647 res += type_to_glsl(type: scalar_type);
5648 res += "(";
5649 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
5650 res += ")";
5651 }
5652 }
5653
5654 if (i + 1 < c.vector_size())
5655 res += ", ";
5656 }
5657 }
5658 break;
5659
5660 case SPIRType::Short:
5661 if (splat)
5662 {
5663 res += convert_to_string(t: c.scalar_i16(col: vector, row: 0));
5664 }
5665 else
5666 {
5667 for (uint32_t i = 0; i < c.vector_size(); i++)
5668 {
5669 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5670 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5671 else
5672 {
5673 if (*backend.int16_t_literal_suffix)
5674 {
5675 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
5676 res += backend.int16_t_literal_suffix;
5677 }
5678 else
5679 {
5680 // If backend doesn't have a literal suffix, we need to value cast.
5681 res += type_to_glsl(type: scalar_type);
5682 res += "(";
5683 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
5684 res += ")";
5685 }
5686 }
5687
5688 if (i + 1 < c.vector_size())
5689 res += ", ";
5690 }
5691 }
5692 break;
5693
5694 case SPIRType::UByte:
5695 if (splat)
5696 {
5697 res += convert_to_string(t: c.scalar_u8(col: vector, row: 0));
5698 }
5699 else
5700 {
5701 for (uint32_t i = 0; i < c.vector_size(); i++)
5702 {
5703 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5704 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5705 else
5706 {
5707 res += type_to_glsl(type: scalar_type);
5708 res += "(";
5709 res += convert_to_string(t: c.scalar_u8(col: vector, row: i));
5710 res += ")";
5711 }
5712
5713 if (i + 1 < c.vector_size())
5714 res += ", ";
5715 }
5716 }
5717 break;
5718
5719 case SPIRType::SByte:
5720 if (splat)
5721 {
5722 res += convert_to_string(t: c.scalar_i8(col: vector, row: 0));
5723 }
5724 else
5725 {
5726 for (uint32_t i = 0; i < c.vector_size(); i++)
5727 {
5728 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5729 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5730 else
5731 {
5732 res += type_to_glsl(type: scalar_type);
5733 res += "(";
5734 res += convert_to_string(t: c.scalar_i8(col: vector, row: i));
5735 res += ")";
5736 }
5737
5738 if (i + 1 < c.vector_size())
5739 res += ", ";
5740 }
5741 }
5742 break;
5743
5744 case SPIRType::Boolean:
5745 if (splat)
5746 res += c.scalar(col: vector, row: 0) ? "true" : "false";
5747 else
5748 {
5749 for (uint32_t i = 0; i < c.vector_size(); i++)
5750 {
5751 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
5752 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
5753 else
5754 res += c.scalar(col: vector, row: i) ? "true" : "false";
5755
5756 if (i + 1 < c.vector_size())
5757 res += ", ";
5758 }
5759 }
5760 break;
5761
5762 default:
5763 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5764 }
5765
5766 if (c.vector_size() > 1 && !swizzle_splat)
5767 res += ")";
5768
5769 return res;
5770}
5771
5772SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5773{
5774 forced_temporaries.insert(x: id);
5775 emit_uninitialized_temporary(type, id);
5776 return set<SPIRExpression>(id, args: to_name(id), args&: type, args: true);
5777}
5778
5779void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5780{
5781 // If we're declaring temporaries inside continue blocks,
5782 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5783 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
5784 {
5785 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
5786 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
5787 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5788 return tmp.first == result_type && tmp.second == result_id;
5789 }) == end(cont&: header.declare_temporary))
5790 {
5791 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
5792 hoisted_temporaries.insert(x: result_id);
5793 force_recompile();
5794 }
5795 }
5796 else if (hoisted_temporaries.count(x: result_id) == 0)
5797 {
5798 auto &type = get<SPIRType>(id: result_type);
5799 auto &flags = get_decoration_bitset(id: result_id);
5800
5801 // The result_id has not been made into an expression yet, so use flags interface.
5802 add_local_variable_name(id: result_id);
5803
5804 string initializer;
5805 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5806 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: result_type));
5807
5808 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts&: initializer, ts: ";");
5809 }
5810}
5811
5812string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5813{
5814 auto &type = get<SPIRType>(id: result_type);
5815
5816 // If we're declaring temporaries inside continue blocks,
5817 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5818 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
5819 {
5820 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
5821 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
5822 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5823 return tmp.first == result_type && tmp.second == result_id;
5824 }) == end(cont&: header.declare_temporary))
5825 {
5826 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
5827 hoisted_temporaries.insert(x: result_id);
5828 force_recompile_guarantee_forward_progress();
5829 }
5830
5831 return join(ts: to_name(id: result_id), ts: " = ");
5832 }
5833 else if (hoisted_temporaries.count(x: result_id))
5834 {
5835 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5836 return join(ts: to_name(id: result_id), ts: " = ");
5837 }
5838 else
5839 {
5840 // The result_id has not been made into an expression yet, so use flags interface.
5841 add_local_variable_name(id: result_id);
5842 auto &flags = get_decoration_bitset(id: result_id);
5843 return join(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts: " = ");
5844 }
5845}
5846
5847bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5848{
5849 return forwarded_temporaries.count(x: id) != 0;
5850}
5851
5852bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5853{
5854 return suppressed_usage_tracking.count(x: id) != 0;
5855}
5856
5857bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5858{
5859 auto *expr = maybe_get<SPIRExpression>(id);
5860 if (!expr)
5861 return false;
5862
5863 // If we're emitting code at a deeper loop level than when we emitted the expression,
5864 // we're probably reading the same expression over and over.
5865 return current_loop_level > expr->emitted_loop_level;
5866}
5867
5868SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5869 bool suppress_usage_tracking)
5870{
5871 if (forwarding && (forced_temporaries.find(x: result_id) == end(cont&: forced_temporaries)))
5872 {
5873 // Just forward it without temporary.
5874 // If the forward is trivial, we do not force flushing to temporary for this expression.
5875 forwarded_temporaries.insert(x: result_id);
5876 if (suppress_usage_tracking)
5877 suppressed_usage_tracking.insert(x: result_id);
5878
5879 return set<SPIRExpression>(id: result_id, args: rhs, args&: result_type, args: true);
5880 }
5881 else
5882 {
5883 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5884 statement(ts: declare_temporary(result_type, result_id), ts: rhs, ts: ";");
5885 return set<SPIRExpression>(id: result_id, args: to_name(id: result_id), args&: result_type, args: true);
5886 }
5887}
5888
5889void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5890{
5891 bool forward = should_forward(id: op0);
5892 emit_op(result_type, result_id, rhs: join(ts&: op, ts: to_enclosed_unpacked_expression(id: op0)), forwarding: forward);
5893 inherit_expression_dependencies(dst: result_id, source: op0);
5894}
5895
5896void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5897{
5898 // Various FP arithmetic opcodes such as add, sub, mul will hit this.
5899 bool force_temporary_precise = backend.support_precise_qualifier &&
5900 has_decoration(id: result_id, decoration: DecorationNoContraction) &&
5901 type_is_floating_point(type: get<SPIRType>(id: result_type));
5902 bool forward = should_forward(id: op0) && should_forward(id: op1) && !force_temporary_precise;
5903
5904 emit_op(result_type, result_id,
5905 rhs: join(ts: to_enclosed_unpacked_expression(id: op0), ts: " ", ts&: op, ts: " ", ts: to_enclosed_unpacked_expression(id: op1)), forwarding: forward);
5906
5907 inherit_expression_dependencies(dst: result_id, source: op0);
5908 inherit_expression_dependencies(dst: result_id, source: op1);
5909}
5910
5911void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5912{
5913 auto &type = get<SPIRType>(id: result_type);
5914 auto expr = type_to_glsl_constructor(type);
5915 expr += '(';
5916 for (uint32_t i = 0; i < type.vecsize; i++)
5917 {
5918 // Make sure to call to_expression multiple times to ensure
5919 // that these expressions are properly flushed to temporaries if needed.
5920 expr += op;
5921 expr += to_extract_component_expression(id: operand, index: i);
5922
5923 if (i + 1 < type.vecsize)
5924 expr += ", ";
5925 }
5926 expr += ')';
5927 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand));
5928
5929 inherit_expression_dependencies(dst: result_id, source: operand);
5930}
5931
5932void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5933 const char *op, bool negate, SPIRType::BaseType expected_type)
5934{
5935 auto &type0 = expression_type(id: op0);
5936 auto &type1 = expression_type(id: op1);
5937
5938 SPIRType target_type0 = type0;
5939 SPIRType target_type1 = type1;
5940 target_type0.basetype = expected_type;
5941 target_type1.basetype = expected_type;
5942 target_type0.vecsize = 1;
5943 target_type1.vecsize = 1;
5944
5945 auto &type = get<SPIRType>(id: result_type);
5946 auto expr = type_to_glsl_constructor(type);
5947 expr += '(';
5948 for (uint32_t i = 0; i < type.vecsize; i++)
5949 {
5950 // Make sure to call to_expression multiple times to ensure
5951 // that these expressions are properly flushed to temporaries if needed.
5952 if (negate)
5953 expr += "!(";
5954
5955 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5956 expr += bitcast_expression(target_type: target_type0, expr_type: type0.basetype, expr: to_extract_component_expression(id: op0, index: i));
5957 else
5958 expr += to_extract_component_expression(id: op0, index: i);
5959
5960 expr += ' ';
5961 expr += op;
5962 expr += ' ';
5963
5964 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5965 expr += bitcast_expression(target_type: target_type1, expr_type: type1.basetype, expr: to_extract_component_expression(id: op1, index: i));
5966 else
5967 expr += to_extract_component_expression(id: op1, index: i);
5968
5969 if (negate)
5970 expr += ")";
5971
5972 if (i + 1 < type.vecsize)
5973 expr += ", ";
5974 }
5975 expr += ')';
5976 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
5977
5978 inherit_expression_dependencies(dst: result_id, source: op0);
5979 inherit_expression_dependencies(dst: result_id, source: op1);
5980}
5981
5982SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5983 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5984{
5985 auto &type0 = expression_type(id: op0);
5986 auto &type1 = expression_type(id: op1);
5987
5988 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5989 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5990 // since equality test is exactly the same.
5991 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5992
5993 // Create a fake type so we can bitcast to it.
5994 // We only deal with regular arithmetic types here like int, uints and so on.
5995 SPIRType expected_type;
5996 expected_type.basetype = input_type;
5997 expected_type.vecsize = type0.vecsize;
5998 expected_type.columns = type0.columns;
5999 expected_type.width = type0.width;
6000
6001 if (cast)
6002 {
6003 cast_op0 = bitcast_glsl(result_type: expected_type, arg: op0);
6004 cast_op1 = bitcast_glsl(result_type: expected_type, arg: op1);
6005 }
6006 else
6007 {
6008 // If we don't cast, our actual input type is that of the first (or second) argument.
6009 cast_op0 = to_enclosed_unpacked_expression(id: op0);
6010 cast_op1 = to_enclosed_unpacked_expression(id: op1);
6011 input_type = type0.basetype;
6012 }
6013
6014 return expected_type;
6015}
6016
6017bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
6018{
6019 // Some bitcasts may require complex casting sequences, and are implemented here.
6020 // Otherwise a simply unary function will do with bitcast_glsl_op.
6021
6022 auto &output_type = get<SPIRType>(id: result_type);
6023 auto &input_type = expression_type(id: op0);
6024 string expr;
6025
6026 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
6027 expr = join(ts: "unpackFloat2x16(floatBitsToUint(", ts: to_unpacked_expression(id: op0), ts: "))");
6028 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
6029 input_type.vecsize == 2)
6030 expr = join(ts: "uintBitsToFloat(packFloat2x16(", ts: to_unpacked_expression(id: op0), ts: "))");
6031 else
6032 return false;
6033
6034 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: op0));
6035 return true;
6036}
6037
6038void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6039 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
6040{
6041 string cast_op0, cast_op1;
6042 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6043 auto &out_type = get<SPIRType>(id: result_type);
6044
6045 // We might have casted away from the result type, so bitcast again.
6046 // For example, arithmetic right shift with uint inputs.
6047 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6048 string expr;
6049 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6050 {
6051 expected_type.basetype = input_type;
6052 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6053 expr += '(';
6054 expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
6055 expr += ')';
6056 }
6057 else
6058 expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
6059
6060 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6061 inherit_expression_dependencies(dst: result_id, source: op0);
6062 inherit_expression_dependencies(dst: result_id, source: op1);
6063}
6064
6065void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6066{
6067 bool forward = should_forward(id: op0);
6068 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ")"), forwarding: forward);
6069 inherit_expression_dependencies(dst: result_id, source: op0);
6070}
6071
6072void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6073 const char *op)
6074{
6075 bool forward = should_forward(id: op0) && should_forward(id: op1);
6076 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ")"),
6077 forwarding: forward);
6078 inherit_expression_dependencies(dst: result_id, source: op0);
6079 inherit_expression_dependencies(dst: result_id, source: op1);
6080}
6081
6082void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6083 const char *op)
6084{
6085 auto &type = get<SPIRType>(id: result_type);
6086 if (type_is_floating_point(type))
6087 {
6088 if (!options.vulkan_semantics)
6089 SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
6090 if (options.es)
6091 SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
6092 require_extension_internal(ext: "GL_EXT_shader_atomic_float");
6093 }
6094
6095 forced_temporaries.insert(x: result_id);
6096 emit_op(result_type, result_id,
6097 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
6098 ts: to_unpacked_expression(id: op1), ts: ")"), forwarding: false);
6099 flush_all_atomic_capable_variables();
6100}
6101
6102void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
6103 uint32_t op0, uint32_t op1, uint32_t op2,
6104 const char *op)
6105{
6106 forced_temporaries.insert(x: result_id);
6107 emit_op(result_type, result_id,
6108 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
6109 ts: to_unpacked_expression(id: op1), ts: ", ", ts: to_unpacked_expression(id: op2), ts: ")"), forwarding: false);
6110 flush_all_atomic_capable_variables();
6111}
6112
6113void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
6114 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
6115{
6116 auto &out_type = get<SPIRType>(id: result_type);
6117 auto &expr_type = expression_type(id: op0);
6118 auto expected_type = out_type;
6119
6120 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
6121 expected_type.basetype = input_type;
6122 expected_type.width = expr_type.width;
6123
6124 string cast_op;
6125 if (expr_type.basetype != input_type)
6126 {
6127 if (expr_type.basetype == SPIRType::Boolean)
6128 cast_op = join(ts: type_to_glsl(type: expected_type), ts: "(", ts: to_unpacked_expression(id: op0), ts: ")");
6129 else
6130 cast_op = bitcast_glsl(result_type: expected_type, arg: op0);
6131 }
6132 else
6133 cast_op = to_unpacked_expression(id: op0);
6134
6135 string expr;
6136 if (out_type.basetype != expected_result_type)
6137 {
6138 expected_type.basetype = expected_result_type;
6139 expected_type.width = out_type.width;
6140 if (out_type.basetype == SPIRType::Boolean)
6141 expr = type_to_glsl(type: out_type);
6142 else
6143 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6144 expr += '(';
6145 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
6146 expr += ')';
6147 }
6148 else
6149 {
6150 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
6151 }
6152
6153 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
6154 inherit_expression_dependencies(dst: result_id, source: op0);
6155}
6156
6157// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
6158// and different vector sizes all at once. Need a special purpose method here.
6159void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6160 uint32_t op2, const char *op,
6161 SPIRType::BaseType expected_result_type,
6162 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
6163 SPIRType::BaseType input_type2)
6164{
6165 auto &out_type = get<SPIRType>(id: result_type);
6166 auto expected_type = out_type;
6167 expected_type.basetype = input_type0;
6168
6169 string cast_op0 =
6170 expression_type(id: op0).basetype != input_type0 ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
6171
6172 auto op1_expr = to_unpacked_expression(id: op1);
6173 auto op2_expr = to_unpacked_expression(id: op2);
6174
6175 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
6176 expected_type.basetype = input_type1;
6177 expected_type.vecsize = 1;
6178 string cast_op1 = expression_type(id: op1).basetype != input_type1 ?
6179 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op1_expr, ts: ")") :
6180 op1_expr;
6181
6182 expected_type.basetype = input_type2;
6183 expected_type.vecsize = 1;
6184 string cast_op2 = expression_type(id: op2).basetype != input_type2 ?
6185 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op2_expr, ts: ")") :
6186 op2_expr;
6187
6188 string expr;
6189 if (out_type.basetype != expected_result_type)
6190 {
6191 expected_type.vecsize = out_type.vecsize;
6192 expected_type.basetype = expected_result_type;
6193 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6194 expr += '(';
6195 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
6196 expr += ')';
6197 }
6198 else
6199 {
6200 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
6201 }
6202
6203 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
6204 inherit_expression_dependencies(dst: result_id, source: op0);
6205 inherit_expression_dependencies(dst: result_id, source: op1);
6206 inherit_expression_dependencies(dst: result_id, source: op2);
6207}
6208
6209void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6210 uint32_t op2, const char *op, SPIRType::BaseType input_type)
6211{
6212 auto &out_type = get<SPIRType>(id: result_type);
6213 auto expected_type = out_type;
6214 expected_type.basetype = input_type;
6215 string cast_op0 =
6216 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
6217 string cast_op1 =
6218 expression_type(id: op1).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op1) : to_unpacked_expression(id: op1);
6219 string cast_op2 =
6220 expression_type(id: op2).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op2) : to_unpacked_expression(id: op2);
6221
6222 string expr;
6223 if (out_type.basetype != input_type)
6224 {
6225 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6226 expr += '(';
6227 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
6228 expr += ')';
6229 }
6230 else
6231 {
6232 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
6233 }
6234
6235 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
6236 inherit_expression_dependencies(dst: result_id, source: op0);
6237 inherit_expression_dependencies(dst: result_id, source: op1);
6238 inherit_expression_dependencies(dst: result_id, source: op2);
6239}
6240
6241void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
6242 uint32_t op1, const char *op, SPIRType::BaseType input_type)
6243{
6244 // Special purpose method for implementing clustered subgroup opcodes.
6245 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
6246 auto &out_type = get<SPIRType>(id: result_type);
6247 auto expected_type = out_type;
6248 expected_type.basetype = input_type;
6249 string cast_op0 =
6250 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
6251
6252 string expr;
6253 if (out_type.basetype != input_type)
6254 {
6255 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6256 expr += '(';
6257 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
6258 expr += ')';
6259 }
6260 else
6261 {
6262 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
6263 }
6264
6265 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
6266 inherit_expression_dependencies(dst: result_id, source: op0);
6267}
6268
6269void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6270 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
6271{
6272 string cast_op0, cast_op1;
6273 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6274 auto &out_type = get<SPIRType>(id: result_type);
6275
6276 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6277 string expr;
6278 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6279 {
6280 expected_type.basetype = input_type;
6281 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
6282 expr += '(';
6283 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
6284 expr += ')';
6285 }
6286 else
6287 {
6288 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
6289 }
6290
6291 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6292 inherit_expression_dependencies(dst: result_id, source: op0);
6293 inherit_expression_dependencies(dst: result_id, source: op1);
6294}
6295
6296void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6297 uint32_t op2, const char *op)
6298{
6299 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2);
6300 emit_op(result_type, result_id,
6301 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
6302 ts: to_unpacked_expression(id: op2), ts: ")"),
6303 forwarding: forward);
6304
6305 inherit_expression_dependencies(dst: result_id, source: op0);
6306 inherit_expression_dependencies(dst: result_id, source: op1);
6307 inherit_expression_dependencies(dst: result_id, source: op2);
6308}
6309
6310void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6311 uint32_t op2, uint32_t op3, const char *op)
6312{
6313 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
6314 emit_op(result_type, result_id,
6315 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
6316 ts: to_unpacked_expression(id: op2), ts: ", ", ts: to_unpacked_expression(id: op3), ts: ")"),
6317 forwarding: forward);
6318
6319 inherit_expression_dependencies(dst: result_id, source: op0);
6320 inherit_expression_dependencies(dst: result_id, source: op1);
6321 inherit_expression_dependencies(dst: result_id, source: op2);
6322 inherit_expression_dependencies(dst: result_id, source: op3);
6323}
6324
6325void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6326 uint32_t op2, uint32_t op3, const char *op,
6327 SPIRType::BaseType offset_count_type)
6328{
6329 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
6330 // and bitfieldInsert is sign invariant.
6331 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
6332
6333 auto op0_expr = to_unpacked_expression(id: op0);
6334 auto op1_expr = to_unpacked_expression(id: op1);
6335 auto op2_expr = to_unpacked_expression(id: op2);
6336 auto op3_expr = to_unpacked_expression(id: op3);
6337
6338 SPIRType target_type;
6339 target_type.vecsize = 1;
6340 target_type.basetype = offset_count_type;
6341
6342 if (expression_type(id: op2).basetype != offset_count_type)
6343 {
6344 // Value-cast here. Input might be 16-bit. GLSL requires int.
6345 op2_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op2_expr, ts: ")");
6346 }
6347
6348 if (expression_type(id: op3).basetype != offset_count_type)
6349 {
6350 // Value-cast here. Input might be 16-bit. GLSL requires int.
6351 op3_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op3_expr, ts: ")");
6352 }
6353
6354 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts&: op0_expr, ts: ", ", ts&: op1_expr, ts: ", ", ts&: op2_expr, ts: ", ", ts&: op3_expr, ts: ")"),
6355 forwarding: forward);
6356
6357 inherit_expression_dependencies(dst: result_id, source: op0);
6358 inherit_expression_dependencies(dst: result_id, source: op1);
6359 inherit_expression_dependencies(dst: result_id, source: op2);
6360 inherit_expression_dependencies(dst: result_id, source: op3);
6361}
6362
6363string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
6364{
6365 const char *type;
6366 switch (imgtype.image.dim)
6367 {
6368 case spv::Dim1D:
6369 // Force 2D path for ES.
6370 if (options.es)
6371 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6372 else
6373 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
6374 break;
6375 case spv::Dim2D:
6376 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6377 break;
6378 case spv::Dim3D:
6379 type = "3D";
6380 break;
6381 case spv::DimCube:
6382 type = "Cube";
6383 break;
6384 case spv::DimRect:
6385 type = "2DRect";
6386 break;
6387 case spv::DimBuffer:
6388 type = "Buffer";
6389 break;
6390 case spv::DimSubpassData:
6391 type = "2D";
6392 break;
6393 default:
6394 type = "";
6395 break;
6396 }
6397
6398 // In legacy GLSL, an extension is required for textureLod in the fragment
6399 // shader or textureGrad anywhere.
6400 bool legacy_lod_ext = false;
6401 auto &execution = get_entry_point();
6402 if (op == "textureGrad" || op == "textureProjGrad" ||
6403 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
6404 {
6405 if (is_legacy_es())
6406 {
6407 legacy_lod_ext = true;
6408 require_extension_internal(ext: "GL_EXT_shader_texture_lod");
6409 }
6410 else if (is_legacy_desktop())
6411 require_extension_internal(ext: "GL_ARB_shader_texture_lod");
6412 }
6413
6414 if (op == "textureLodOffset" || op == "textureProjLodOffset")
6415 {
6416 if (is_legacy_es())
6417 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
6418
6419 require_extension_internal(ext: "GL_EXT_gpu_shader4");
6420 }
6421
6422 // GLES has very limited support for shadow samplers.
6423 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
6424 // everything else can just throw
6425 bool is_comparison = is_depth_image(type: imgtype, id: tex);
6426 if (is_comparison && is_legacy_es())
6427 {
6428 if (op == "texture" || op == "textureProj")
6429 require_extension_internal(ext: "GL_EXT_shadow_samplers");
6430 else
6431 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
6432 }
6433
6434 if (op == "textureSize")
6435 {
6436 if (is_legacy_es())
6437 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
6438 if (is_comparison)
6439 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
6440 require_extension_internal(ext: "GL_EXT_gpu_shader4");
6441 }
6442
6443 if (op == "texelFetch" && is_legacy_es())
6444 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
6445
6446 bool is_es_and_depth = is_legacy_es() && is_comparison;
6447 std::string type_prefix = is_comparison ? "shadow" : "texture";
6448
6449 if (op == "texture")
6450 return is_es_and_depth ? join(ts&: type_prefix, ts&: type, ts: "EXT") : join(ts&: type_prefix, ts&: type);
6451 else if (op == "textureLod")
6452 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "LodEXT" : "Lod");
6453 else if (op == "textureProj")
6454 return join(ts&: type_prefix, ts&: type, ts: is_es_and_depth ? "ProjEXT" : "Proj");
6455 else if (op == "textureGrad")
6456 return join(ts&: type_prefix, ts&: type, ts: is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
6457 else if (op == "textureProjLod")
6458 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
6459 else if (op == "textureLodOffset")
6460 return join(ts&: type_prefix, ts&: type, ts: "LodOffset");
6461 else if (op == "textureProjGrad")
6462 return join(ts&: type_prefix, ts&: type,
6463 ts: is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
6464 else if (op == "textureProjLodOffset")
6465 return join(ts&: type_prefix, ts&: type, ts: "ProjLodOffset");
6466 else if (op == "textureSize")
6467 return join(ts: "textureSize", ts&: type);
6468 else if (op == "texelFetch")
6469 return join(ts: "texelFetch", ts&: type);
6470 else
6471 {
6472 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
6473 }
6474}
6475
6476bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
6477{
6478 auto *cleft = maybe_get<SPIRConstant>(id: left);
6479 auto *cright = maybe_get<SPIRConstant>(id: right);
6480 auto &lerptype = expression_type(id: lerp);
6481
6482 // If our targets aren't constants, we cannot use construction.
6483 if (!cleft || !cright)
6484 return false;
6485
6486 // If our targets are spec constants, we cannot use construction.
6487 if (cleft->specialization || cright->specialization)
6488 return false;
6489
6490 auto &value_type = get<SPIRType>(id: cleft->constant_type);
6491
6492 if (lerptype.basetype != SPIRType::Boolean)
6493 return false;
6494 if (value_type.basetype == SPIRType::Struct || is_array(type: value_type))
6495 return false;
6496 if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
6497 return false;
6498
6499 // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
6500 // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
6501 // Just avoid this case.
6502 if (value_type.columns > 1)
6503 return false;
6504
6505 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6506 bool ret = true;
6507 for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
6508 {
6509 switch (type.basetype)
6510 {
6511 case SPIRType::Short:
6512 case SPIRType::UShort:
6513 ret = cleft->scalar_u16(col: 0, row) == 0 && cright->scalar_u16(col: 0, row) == 1;
6514 break;
6515
6516 case SPIRType::Int:
6517 case SPIRType::UInt:
6518 ret = cleft->scalar(col: 0, row) == 0 && cright->scalar(col: 0, row) == 1;
6519 break;
6520
6521 case SPIRType::Half:
6522 ret = cleft->scalar_f16(col: 0, row) == 0.0f && cright->scalar_f16(col: 0, row) == 1.0f;
6523 break;
6524
6525 case SPIRType::Float:
6526 ret = cleft->scalar_f32(col: 0, row) == 0.0f && cright->scalar_f32(col: 0, row) == 1.0f;
6527 break;
6528
6529 case SPIRType::Double:
6530 ret = cleft->scalar_f64(col: 0, row) == 0.0 && cright->scalar_f64(col: 0, row) == 1.0;
6531 break;
6532
6533 case SPIRType::Int64:
6534 case SPIRType::UInt64:
6535 ret = cleft->scalar_u64(col: 0, row) == 0 && cright->scalar_u64(col: 0, row) == 1;
6536 break;
6537
6538 default:
6539 ret = false;
6540 break;
6541 }
6542 }
6543
6544 if (ret)
6545 op = type_to_glsl_constructor(type);
6546 return ret;
6547}
6548
6549string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6550 uint32_t false_value)
6551{
6552 string expr;
6553 auto &lerptype = expression_type(id: select);
6554
6555 if (lerptype.vecsize == 1)
6556 expr = join(ts: to_enclosed_expression(id: select), ts: " ? ", ts: to_enclosed_pointer_expression(id: true_value), ts: " : ",
6557 ts: to_enclosed_pointer_expression(id: false_value));
6558 else
6559 {
6560 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(id: expression, index: i); };
6561
6562 expr = type_to_glsl_constructor(type: restype);
6563 expr += "(";
6564 for (uint32_t i = 0; i < restype.vecsize; i++)
6565 {
6566 expr += swiz(select, i);
6567 expr += " ? ";
6568 expr += swiz(true_value, i);
6569 expr += " : ";
6570 expr += swiz(false_value, i);
6571 if (i + 1 < restype.vecsize)
6572 expr += ", ";
6573 }
6574 expr += ")";
6575 }
6576
6577 return expr;
6578}
6579
6580void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6581{
6582 auto &lerptype = expression_type(id: lerp);
6583 auto &restype = get<SPIRType>(id: result_type);
6584
6585 // If this results in a variable pointer, assume it may be written through.
6586 if (restype.pointer)
6587 {
6588 register_write(chain: left);
6589 register_write(chain: right);
6590 }
6591
6592 string mix_op;
6593 bool has_boolean_mix = *backend.boolean_mix_function &&
6594 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6595 bool trivial_mix = to_trivial_mix_op(type: restype, op&: mix_op, left, right, lerp);
6596
6597 // Cannot use boolean mix when the lerp argument is just one boolean,
6598 // fall back to regular trinary statements.
6599 if (lerptype.vecsize == 1)
6600 has_boolean_mix = false;
6601
6602 // If we can reduce the mix to a simple cast, do so.
6603 // This helps for cases like int(bool), uint(bool) which is implemented with
6604 // OpSelect bool 1 0.
6605 if (trivial_mix)
6606 {
6607 emit_unary_func_op(result_type, result_id: id, op0: lerp, op: mix_op.c_str());
6608 }
6609 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6610 {
6611 // Boolean mix not supported on desktop without extension.
6612 // Was added in OpenGL 4.5 with ES 3.1 compat.
6613 //
6614 // Could use GL_EXT_shader_integer_mix on desktop at least,
6615 // but Apple doesn't support it. :(
6616 // Just implement it as ternary expressions.
6617 auto expr = to_ternary_expression(restype: get<SPIRType>(id: result_type), select: lerp, true_value: right, false_value: left);
6618 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: left) && should_forward(id: right) && should_forward(id: lerp));
6619 inherit_expression_dependencies(dst: id, source: left);
6620 inherit_expression_dependencies(dst: id, source: right);
6621 inherit_expression_dependencies(dst: id, source: lerp);
6622 }
6623 else if (lerptype.basetype == SPIRType::Boolean)
6624 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: backend.boolean_mix_function);
6625 else
6626 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: "mix");
6627}
6628
6629string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6630{
6631 // Keep track of the array indices we have used to load the image.
6632 // We'll need to use the same array index into the combined image sampler array.
6633 auto image_expr = to_non_uniform_aware_expression(id: image_id);
6634 string array_expr;
6635 auto array_index = image_expr.find_first_of(c: '[');
6636 if (array_index != string::npos)
6637 array_expr = image_expr.substr(pos: array_index, n: string::npos);
6638
6639 auto &args = current_function->arguments;
6640
6641 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6642 // all possible combinations into new sampler2D uniforms.
6643 auto *image = maybe_get_backing_variable(chain: image_id);
6644 auto *samp = maybe_get_backing_variable(chain: samp_id);
6645 if (image)
6646 image_id = image->self;
6647 if (samp)
6648 samp_id = samp->self;
6649
6650 auto image_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
6651 pred: [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
6652
6653 auto sampler_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
6654 pred: [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
6655
6656 if (image_itr != end(cont&: args) || sampler_itr != end(cont&: args))
6657 {
6658 // If any parameter originates from a parameter, we will find it in our argument list.
6659 bool global_image = image_itr == end(cont&: args);
6660 bool global_sampler = sampler_itr == end(cont&: args);
6661 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(cont&: args)));
6662 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(cont&: args)));
6663
6664 auto &combined = current_function->combined_parameters;
6665 auto itr = find_if(first: begin(cont&: combined), last: end(cont&: combined), pred: [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6666 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6667 p.sampler_id == sid;
6668 });
6669
6670 if (itr != end(cont&: combined))
6671 return to_expression(id: itr->id) + array_expr;
6672 else
6673 {
6674 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6675 "build_combined_image_samplers() used "
6676 "before compile() was called?");
6677 }
6678 }
6679 else
6680 {
6681 // For global sampler2D, look directly at the global remapping table.
6682 auto &mapping = combined_image_samplers;
6683 auto itr = find_if(first: begin(cont&: mapping), last: end(cont&: mapping), pred: [image_id, samp_id](const CombinedImageSampler &combined) {
6684 return combined.image_id == image_id && combined.sampler_id == samp_id;
6685 });
6686
6687 if (itr != end(cont&: combined_image_samplers))
6688 return to_expression(id: itr->combined_id) + array_expr;
6689 else
6690 {
6691 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6692 "before compile() was called?");
6693 }
6694 }
6695}
6696
6697bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6698{
6699 switch (op)
6700 {
6701 case OpGroupNonUniformElect:
6702 case OpGroupNonUniformBallot:
6703 case OpGroupNonUniformBallotFindLSB:
6704 case OpGroupNonUniformBallotFindMSB:
6705 case OpGroupNonUniformBroadcast:
6706 case OpGroupNonUniformBroadcastFirst:
6707 case OpGroupNonUniformAll:
6708 case OpGroupNonUniformAny:
6709 case OpGroupNonUniformAllEqual:
6710 case OpControlBarrier:
6711 case OpMemoryBarrier:
6712 case OpGroupNonUniformBallotBitCount:
6713 case OpGroupNonUniformBallotBitExtract:
6714 case OpGroupNonUniformInverseBallot:
6715 return true;
6716 default:
6717 return false;
6718 }
6719}
6720
6721void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6722{
6723 if (options.vulkan_semantics && combined_image_samplers.empty())
6724 {
6725 emit_binary_func_op(result_type, result_id, op0: image_id, op1: samp_id,
6726 op: type_to_glsl(type: get<SPIRType>(id: result_type), id: result_id).c_str());
6727 }
6728 else
6729 {
6730 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6731 emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forwarding: true, suppress_usage_tracking: true);
6732 }
6733
6734 // Make sure to suppress usage tracking and any expression invalidation.
6735 // It is illegal to create temporaries of opaque types.
6736 forwarded_temporaries.erase(x: result_id);
6737}
6738
6739static inline bool image_opcode_is_sample_no_dref(Op op)
6740{
6741 switch (op)
6742 {
6743 case OpImageSampleExplicitLod:
6744 case OpImageSampleImplicitLod:
6745 case OpImageSampleProjExplicitLod:
6746 case OpImageSampleProjImplicitLod:
6747 case OpImageFetch:
6748 case OpImageRead:
6749 case OpImageSparseSampleExplicitLod:
6750 case OpImageSparseSampleImplicitLod:
6751 case OpImageSparseSampleProjExplicitLod:
6752 case OpImageSparseSampleProjImplicitLod:
6753 case OpImageSparseFetch:
6754 case OpImageSparseRead:
6755 return true;
6756
6757 default:
6758 return false;
6759 }
6760}
6761
6762void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6763 uint32_t &texel_id)
6764{
6765 // Need to allocate two temporaries.
6766 if (options.es)
6767 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6768 require_extension_internal(ext: "GL_ARB_sparse_texture2");
6769
6770 auto &temps = extra_sub_expressions[id];
6771 if (temps == 0)
6772 temps = ir.increase_bound_by(count: 2);
6773
6774 feedback_id = temps + 0;
6775 texel_id = temps + 1;
6776
6777 auto &return_type = get<SPIRType>(id: result_type_id);
6778 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6779 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6780 emit_uninitialized_temporary(result_type: return_type.member_types[0], result_id: feedback_id);
6781 emit_uninitialized_temporary(result_type: return_type.member_types[1], result_id: texel_id);
6782}
6783
6784uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6785{
6786 auto itr = extra_sub_expressions.find(x: id);
6787 if (itr == extra_sub_expressions.end())
6788 return 0;
6789 else
6790 return itr->second + 1;
6791}
6792
6793void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6794{
6795 auto *ops = stream(instr: i);
6796 auto op = static_cast<Op>(i.op);
6797
6798 SmallVector<uint32_t> inherited_expressions;
6799
6800 uint32_t result_type_id = ops[0];
6801 uint32_t id = ops[1];
6802 auto &return_type = get<SPIRType>(id: result_type_id);
6803
6804 uint32_t sparse_code_id = 0;
6805 uint32_t sparse_texel_id = 0;
6806 if (sparse)
6807 emit_sparse_feedback_temporaries(result_type_id, id, feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
6808
6809 bool forward = false;
6810 string expr = to_texture_op(i, sparse, forward: &forward, inherited_expressions);
6811
6812 if (sparse)
6813 {
6814 statement(ts: to_expression(id: sparse_code_id), ts: " = ", ts&: expr, ts: ";");
6815 expr = join(ts: type_to_glsl(type: return_type), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ", ts: to_expression(id: sparse_texel_id),
6816 ts: ")");
6817 forward = true;
6818 inherited_expressions.clear();
6819 }
6820
6821 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
6822 for (auto &inherit : inherited_expressions)
6823 inherit_expression_dependencies(dst: id, source: inherit);
6824
6825 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
6826 switch (op)
6827 {
6828 case OpImageSampleDrefImplicitLod:
6829 case OpImageSampleImplicitLod:
6830 case OpImageSampleProjImplicitLod:
6831 case OpImageSampleProjDrefImplicitLod:
6832 register_control_dependent_expression(expr: id);
6833 break;
6834
6835 default:
6836 break;
6837 }
6838}
6839
6840std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6841 SmallVector<uint32_t> &inherited_expressions)
6842{
6843 auto *ops = stream(instr: i);
6844 auto op = static_cast<Op>(i.op);
6845 uint32_t length = i.length;
6846
6847 uint32_t result_type_id = ops[0];
6848 VariableID img = ops[2];
6849 uint32_t coord = ops[3];
6850 uint32_t dref = 0;
6851 uint32_t comp = 0;
6852 bool gather = false;
6853 bool proj = false;
6854 bool fetch = false;
6855 bool nonuniform_expression = false;
6856 const uint32_t *opt = nullptr;
6857
6858 auto &result_type = get<SPIRType>(id: result_type_id);
6859
6860 inherited_expressions.push_back(t: coord);
6861 if (has_decoration(id: img, decoration: DecorationNonUniform) && !maybe_get_backing_variable(chain: img))
6862 nonuniform_expression = true;
6863
6864 switch (op)
6865 {
6866 case OpImageSampleDrefImplicitLod:
6867 case OpImageSampleDrefExplicitLod:
6868 case OpImageSparseSampleDrefImplicitLod:
6869 case OpImageSparseSampleDrefExplicitLod:
6870 dref = ops[4];
6871 opt = &ops[5];
6872 length -= 5;
6873 break;
6874
6875 case OpImageSampleProjDrefImplicitLod:
6876 case OpImageSampleProjDrefExplicitLod:
6877 case OpImageSparseSampleProjDrefImplicitLod:
6878 case OpImageSparseSampleProjDrefExplicitLod:
6879 dref = ops[4];
6880 opt = &ops[5];
6881 length -= 5;
6882 proj = true;
6883 break;
6884
6885 case OpImageDrefGather:
6886 case OpImageSparseDrefGather:
6887 dref = ops[4];
6888 opt = &ops[5];
6889 length -= 5;
6890 gather = true;
6891 if (options.es && options.version < 310)
6892 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6893 else if (!options.es && options.version < 400)
6894 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6895 break;
6896
6897 case OpImageGather:
6898 case OpImageSparseGather:
6899 comp = ops[4];
6900 opt = &ops[5];
6901 length -= 5;
6902 gather = true;
6903 if (options.es && options.version < 310)
6904 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6905 else if (!options.es && options.version < 400)
6906 {
6907 if (!expression_is_constant_null(id: comp))
6908 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6909 require_extension_internal(ext: "GL_ARB_texture_gather");
6910 }
6911 break;
6912
6913 case OpImageFetch:
6914 case OpImageSparseFetch:
6915 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6916 opt = &ops[4];
6917 length -= 4;
6918 fetch = true;
6919 break;
6920
6921 case OpImageSampleProjImplicitLod:
6922 case OpImageSampleProjExplicitLod:
6923 case OpImageSparseSampleProjImplicitLod:
6924 case OpImageSparseSampleProjExplicitLod:
6925 opt = &ops[4];
6926 length -= 4;
6927 proj = true;
6928 break;
6929
6930 default:
6931 opt = &ops[4];
6932 length -= 4;
6933 break;
6934 }
6935
6936 // Bypass pointers because we need the real image struct
6937 auto &type = expression_type(id: img);
6938 auto &imgtype = get<SPIRType>(id: type.self);
6939
6940 uint32_t coord_components = 0;
6941 switch (imgtype.image.dim)
6942 {
6943 case spv::Dim1D:
6944 coord_components = 1;
6945 break;
6946 case spv::Dim2D:
6947 coord_components = 2;
6948 break;
6949 case spv::Dim3D:
6950 coord_components = 3;
6951 break;
6952 case spv::DimCube:
6953 coord_components = 3;
6954 break;
6955 case spv::DimBuffer:
6956 coord_components = 1;
6957 break;
6958 default:
6959 coord_components = 2;
6960 break;
6961 }
6962
6963 if (dref)
6964 inherited_expressions.push_back(t: dref);
6965
6966 if (proj)
6967 coord_components++;
6968 if (imgtype.image.arrayed)
6969 coord_components++;
6970
6971 uint32_t bias = 0;
6972 uint32_t lod = 0;
6973 uint32_t grad_x = 0;
6974 uint32_t grad_y = 0;
6975 uint32_t coffset = 0;
6976 uint32_t offset = 0;
6977 uint32_t coffsets = 0;
6978 uint32_t sample = 0;
6979 uint32_t minlod = 0;
6980 uint32_t flags = 0;
6981
6982 if (length)
6983 {
6984 flags = *opt++;
6985 length--;
6986 }
6987
6988 auto test = [&](uint32_t &v, uint32_t flag) {
6989 if (length && (flags & flag))
6990 {
6991 v = *opt++;
6992 inherited_expressions.push_back(t: v);
6993 length--;
6994 }
6995 };
6996
6997 test(bias, ImageOperandsBiasMask);
6998 test(lod, ImageOperandsLodMask);
6999 test(grad_x, ImageOperandsGradMask);
7000 test(grad_y, ImageOperandsGradMask);
7001 test(coffset, ImageOperandsConstOffsetMask);
7002 test(offset, ImageOperandsOffsetMask);
7003 test(coffsets, ImageOperandsConstOffsetsMask);
7004 test(sample, ImageOperandsSampleMask);
7005 test(minlod, ImageOperandsMinLodMask);
7006
7007 TextureFunctionBaseArguments base_args = {};
7008 base_args.img = img;
7009 base_args.imgtype = &imgtype;
7010 base_args.is_fetch = fetch != 0;
7011 base_args.is_gather = gather != 0;
7012 base_args.is_proj = proj != 0;
7013
7014 string expr;
7015 TextureFunctionNameArguments name_args = {};
7016
7017 name_args.base = base_args;
7018 name_args.has_array_offsets = coffsets != 0;
7019 name_args.has_offset = coffset != 0 || offset != 0;
7020 name_args.has_grad = grad_x != 0 || grad_y != 0;
7021 name_args.has_dref = dref != 0;
7022 name_args.is_sparse_feedback = sparse;
7023 name_args.has_min_lod = minlod != 0;
7024 name_args.lod = lod;
7025 expr += to_function_name(args: name_args);
7026 expr += "(";
7027
7028 uint32_t sparse_texel_id = 0;
7029 if (sparse)
7030 sparse_texel_id = get_sparse_feedback_texel_id(id: ops[1]);
7031
7032 TextureFunctionArguments args = {};
7033 args.base = base_args;
7034 args.coord = coord;
7035 args.coord_components = coord_components;
7036 args.dref = dref;
7037 args.grad_x = grad_x;
7038 args.grad_y = grad_y;
7039 args.lod = lod;
7040 args.coffset = coffset;
7041 args.offset = offset;
7042 args.bias = bias;
7043 args.component = comp;
7044 args.sample = sample;
7045 args.sparse_texel = sparse_texel_id;
7046 args.min_lod = minlod;
7047 args.nonuniform_expression = nonuniform_expression;
7048 expr += to_function_args(args, p_forward: forward);
7049 expr += ")";
7050
7051 // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
7052 if (is_legacy() && !options.es && is_depth_image(type: imgtype, id: img))
7053 expr += ".r";
7054
7055 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
7056 // Remap back to 4 components as sampling opcodes expect.
7057 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
7058 {
7059 bool image_is_depth = false;
7060 const auto *combined = maybe_get<SPIRCombinedImageSampler>(id: img);
7061 VariableID image_id = combined ? combined->image : img;
7062
7063 if (combined && is_depth_image(type: imgtype, id: combined->image))
7064 image_is_depth = true;
7065 else if (is_depth_image(type: imgtype, id: img))
7066 image_is_depth = true;
7067
7068 // We must also check the backing variable for the image.
7069 // We might have loaded an OpImage, and used that handle for two different purposes.
7070 // Once with comparison, once without.
7071 auto *image_variable = maybe_get_backing_variable(chain: image_id);
7072 if (image_variable && is_depth_image(type: get<SPIRType>(id: image_variable->basetype), id: image_variable->self))
7073 image_is_depth = true;
7074
7075 if (image_is_depth)
7076 expr = remap_swizzle(out_type: result_type, input_components: 1, expr);
7077 }
7078
7079 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
7080 {
7081 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
7082 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
7083 expr = join(ts: type_to_glsl_constructor(type: result_type), ts: "(", ts&: expr, ts: ")");
7084 }
7085
7086 // Deals with reads from MSL. We might need to downconvert to fewer components.
7087 if (op == OpImageRead)
7088 expr = remap_swizzle(out_type: result_type, input_components: 4, expr);
7089
7090 return expr;
7091}
7092
7093bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
7094{
7095 auto *c = maybe_get<SPIRConstant>(id);
7096 if (!c)
7097 return false;
7098 return c->constant_is_null();
7099}
7100
7101bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
7102{
7103 auto &type = expression_type(id: ptr);
7104 if (type.array.empty())
7105 return false;
7106
7107 if (!backend.array_is_value_type)
7108 return true;
7109
7110 auto *var = maybe_get_backing_variable(chain: ptr);
7111 if (!var)
7112 return false;
7113
7114 auto &backed_type = get<SPIRType>(id: var->basetype);
7115 return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
7116 has_member_decoration(id: backed_type.self, index: 0, decoration: DecorationOffset);
7117}
7118
7119// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
7120// For some subclasses, the function is a method on the specified image.
7121string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
7122{
7123 if (args.has_min_lod)
7124 {
7125 if (options.es)
7126 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
7127 require_extension_internal(ext: "GL_ARB_sparse_texture_clamp");
7128 }
7129
7130 string fname;
7131 auto &imgtype = *args.base.imgtype;
7132 VariableID tex = args.base.img;
7133
7134 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7135 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
7136 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7137 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7138 bool workaround_lod_array_shadow_as_grad = false;
7139 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7140 is_depth_image(type: imgtype, id: tex) && args.lod && !args.base.is_fetch)
7141 {
7142 if (!expression_is_constant_null(id: args.lod))
7143 {
7144 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
7145 "expressed in GLSL.");
7146 }
7147 workaround_lod_array_shadow_as_grad = true;
7148 }
7149
7150 if (args.is_sparse_feedback)
7151 fname += "sparse";
7152
7153 if (args.base.is_fetch)
7154 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
7155 else
7156 {
7157 fname += args.is_sparse_feedback ? "Texture" : "texture";
7158
7159 if (args.base.is_gather)
7160 fname += "Gather";
7161 if (args.has_array_offsets)
7162 fname += "Offsets";
7163 if (args.base.is_proj)
7164 fname += "Proj";
7165 if (args.has_grad || workaround_lod_array_shadow_as_grad)
7166 fname += "Grad";
7167 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
7168 fname += "Lod";
7169 }
7170
7171 if (args.has_offset)
7172 fname += "Offset";
7173
7174 if (args.has_min_lod)
7175 fname += "Clamp";
7176
7177 if (args.is_sparse_feedback || args.has_min_lod)
7178 fname += "ARB";
7179
7180 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(op: fname, imgtype, tex) : fname;
7181}
7182
7183std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
7184{
7185 auto *var = maybe_get_backing_variable(chain: id);
7186
7187 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
7188 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
7189 if (var)
7190 {
7191 auto &type = get<SPIRType>(id: var->basetype);
7192 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
7193 {
7194 if (options.vulkan_semantics)
7195 {
7196 if (dummy_sampler_id)
7197 {
7198 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
7199 auto sampled_type = type;
7200 sampled_type.basetype = SPIRType::SampledImage;
7201 return join(ts: type_to_glsl(type: sampled_type), ts: "(", ts: to_non_uniform_aware_expression(id), ts: ", ",
7202 ts: to_expression(id: dummy_sampler_id), ts: ")");
7203 }
7204 else
7205 {
7206 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
7207 require_extension_internal(ext: "GL_EXT_samplerless_texture_functions");
7208 }
7209 }
7210 else
7211 {
7212 if (!dummy_sampler_id)
7213 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
7214 "build_dummy_sampler_for_combined_images() called?");
7215
7216 return to_combined_image_sampler(image_id: id, samp_id: dummy_sampler_id);
7217 }
7218 }
7219 }
7220
7221 return to_non_uniform_aware_expression(id);
7222}
7223
7224// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
7225string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
7226{
7227 VariableID img = args.base.img;
7228 auto &imgtype = *args.base.imgtype;
7229
7230 string farg_str;
7231 if (args.base.is_fetch)
7232 farg_str = convert_separate_image_to_expression(id: img);
7233 else
7234 farg_str = to_non_uniform_aware_expression(id: img);
7235
7236 if (args.nonuniform_expression && farg_str.find_first_of(c: '[') != string::npos)
7237 {
7238 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
7239 farg_str = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: farg_str, ts: ")");
7240 }
7241
7242 bool swizz_func = backend.swizzle_is_function;
7243 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
7244 if (comps == in_comps)
7245 return "";
7246
7247 switch (comps)
7248 {
7249 case 1:
7250 return ".x";
7251 case 2:
7252 return swizz_func ? ".xy()" : ".xy";
7253 case 3:
7254 return swizz_func ? ".xyz()" : ".xyz";
7255 default:
7256 return "";
7257 }
7258 };
7259
7260 bool forward = should_forward(id: args.coord);
7261
7262 // The IR can give us more components than we need, so chop them off as needed.
7263 auto swizzle_expr = swizzle(args.coord_components, expression_type(id: args.coord).vecsize);
7264 // Only enclose the UV expression if needed.
7265 auto coord_expr =
7266 (*swizzle_expr == '\0') ? to_expression(id: args.coord) : (to_enclosed_expression(id: args.coord) + swizzle_expr);
7267
7268 // texelFetch only takes int, not uint.
7269 auto &coord_type = expression_type(id: args.coord);
7270 if (coord_type.basetype == SPIRType::UInt)
7271 {
7272 auto expected_type = coord_type;
7273 expected_type.vecsize = args.coord_components;
7274 expected_type.basetype = SPIRType::Int;
7275 coord_expr = bitcast_expression(target_type: expected_type, expr_type: coord_type.basetype, expr: coord_expr);
7276 }
7277
7278 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7279 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
7280 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7281 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7282 bool workaround_lod_array_shadow_as_grad =
7283 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7284 is_depth_image(type: imgtype, id: img) && args.lod != 0 && !args.base.is_fetch;
7285
7286 if (args.dref)
7287 {
7288 forward = forward && should_forward(id: args.dref);
7289
7290 // SPIR-V splits dref and coordinate.
7291 if (args.base.is_gather ||
7292 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
7293 {
7294 farg_str += ", ";
7295 farg_str += to_expression(id: args.coord);
7296 farg_str += ", ";
7297 farg_str += to_expression(id: args.dref);
7298 }
7299 else if (args.base.is_proj)
7300 {
7301 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
7302 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
7303 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
7304 farg_str += ", vec4(";
7305
7306 if (imgtype.image.dim == Dim1D)
7307 {
7308 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7309 farg_str += to_enclosed_expression(id: args.coord) + ".x";
7310 farg_str += ", ";
7311 farg_str += "0.0, ";
7312 farg_str += to_expression(id: args.dref);
7313 farg_str += ", ";
7314 farg_str += to_enclosed_expression(id: args.coord) + ".y)";
7315 }
7316 else if (imgtype.image.dim == Dim2D)
7317 {
7318 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7319 farg_str += to_enclosed_expression(id: args.coord) + (swizz_func ? ".xy()" : ".xy");
7320 farg_str += ", ";
7321 farg_str += to_expression(id: args.dref);
7322 farg_str += ", ";
7323 farg_str += to_enclosed_expression(id: args.coord) + ".z)";
7324 }
7325 else
7326 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
7327 }
7328 else
7329 {
7330 // Create a composite which merges coord/dref into a single vector.
7331 auto type = expression_type(id: args.coord);
7332 type.vecsize = args.coord_components + 1;
7333 if (imgtype.image.dim == Dim1D && options.es)
7334 type.vecsize++;
7335 farg_str += ", ";
7336 farg_str += type_to_glsl_constructor(type);
7337 farg_str += "(";
7338
7339 if (imgtype.image.dim == Dim1D && options.es)
7340 {
7341 if (imgtype.image.arrayed)
7342 {
7343 farg_str += enclose_expression(expr: coord_expr) + ".x";
7344 farg_str += ", 0.0, ";
7345 farg_str += enclose_expression(expr: coord_expr) + ".y";
7346 }
7347 else
7348 {
7349 farg_str += coord_expr;
7350 farg_str += ", 0.0";
7351 }
7352 }
7353 else
7354 farg_str += coord_expr;
7355
7356 farg_str += ", ";
7357 farg_str += to_expression(id: args.dref);
7358 farg_str += ")";
7359 }
7360 }
7361 else
7362 {
7363 if (imgtype.image.dim == Dim1D && options.es)
7364 {
7365 // Have to fake a second coordinate.
7366 if (type_is_floating_point(type: coord_type))
7367 {
7368 // Cannot mix proj and array.
7369 if (imgtype.image.arrayed || args.base.is_proj)
7370 {
7371 coord_expr = join(ts: "vec3(", ts: enclose_expression(expr: coord_expr), ts: ".x, 0.0, ",
7372 ts: enclose_expression(expr: coord_expr), ts: ".y)");
7373 }
7374 else
7375 coord_expr = join(ts: "vec2(", ts&: coord_expr, ts: ", 0.0)");
7376 }
7377 else
7378 {
7379 if (imgtype.image.arrayed)
7380 {
7381 coord_expr = join(ts: "ivec3(", ts: enclose_expression(expr: coord_expr),
7382 ts: ".x, 0, ",
7383 ts: enclose_expression(expr: coord_expr), ts: ".y)");
7384 }
7385 else
7386 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
7387 }
7388 }
7389
7390 farg_str += ", ";
7391 farg_str += coord_expr;
7392 }
7393
7394 if (args.grad_x || args.grad_y)
7395 {
7396 forward = forward && should_forward(id: args.grad_x);
7397 forward = forward && should_forward(id: args.grad_y);
7398 farg_str += ", ";
7399 farg_str += to_expression(id: args.grad_x);
7400 farg_str += ", ";
7401 farg_str += to_expression(id: args.grad_y);
7402 }
7403
7404 if (args.lod)
7405 {
7406 if (workaround_lod_array_shadow_as_grad)
7407 {
7408 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
7409 // Implementing this as plain texture() is not safe on some implementations.
7410 if (imgtype.image.dim == Dim2D)
7411 farg_str += ", vec2(0.0), vec2(0.0)";
7412 else if (imgtype.image.dim == DimCube)
7413 farg_str += ", vec3(0.0), vec3(0.0)";
7414 }
7415 else
7416 {
7417 forward = forward && should_forward(id: args.lod);
7418 farg_str += ", ";
7419
7420 // Lod expression for TexelFetch in GLSL must be int, and only int.
7421 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7422 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.lod);
7423 else
7424 farg_str += to_expression(id: args.lod);
7425 }
7426 }
7427 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7428 {
7429 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
7430 farg_str += ", 0";
7431 }
7432
7433 if (args.coffset)
7434 {
7435 forward = forward && should_forward(id: args.coffset);
7436 farg_str += ", ";
7437 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.coffset);
7438 }
7439 else if (args.offset)
7440 {
7441 forward = forward && should_forward(id: args.offset);
7442 farg_str += ", ";
7443 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.offset);
7444 }
7445
7446 if (args.sample)
7447 {
7448 farg_str += ", ";
7449 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.sample);
7450 }
7451
7452 if (args.min_lod)
7453 {
7454 farg_str += ", ";
7455 farg_str += to_expression(id: args.min_lod);
7456 }
7457
7458 if (args.sparse_texel)
7459 {
7460 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
7461 farg_str += ", ";
7462 farg_str += to_expression(id: args.sparse_texel);
7463 }
7464
7465 if (args.bias)
7466 {
7467 forward = forward && should_forward(id: args.bias);
7468 farg_str += ", ";
7469 farg_str += to_expression(id: args.bias);
7470 }
7471
7472 if (args.component && !expression_is_constant_null(id: args.component))
7473 {
7474 forward = forward && should_forward(id: args.component);
7475 farg_str += ", ";
7476 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.component);
7477 }
7478
7479 *p_forward = forward;
7480
7481 return farg_str;
7482}
7483
7484Op CompilerGLSL::get_remapped_spirv_op(Op op) const
7485{
7486 if (options.relax_nan_checks)
7487 {
7488 switch (op)
7489 {
7490 case OpFUnordLessThan:
7491 op = OpFOrdLessThan;
7492 break;
7493 case OpFUnordLessThanEqual:
7494 op = OpFOrdLessThanEqual;
7495 break;
7496 case OpFUnordGreaterThan:
7497 op = OpFOrdGreaterThan;
7498 break;
7499 case OpFUnordGreaterThanEqual:
7500 op = OpFOrdGreaterThanEqual;
7501 break;
7502 case OpFUnordEqual:
7503 op = OpFOrdEqual;
7504 break;
7505 case OpFOrdNotEqual:
7506 op = OpFUnordNotEqual;
7507 break;
7508
7509 default:
7510 break;
7511 }
7512 }
7513
7514 return op;
7515}
7516
7517GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
7518{
7519 // Relax to non-NaN aware opcodes.
7520 if (options.relax_nan_checks)
7521 {
7522 switch (std450_op)
7523 {
7524 case GLSLstd450NClamp:
7525 std450_op = GLSLstd450FClamp;
7526 break;
7527 case GLSLstd450NMin:
7528 std450_op = GLSLstd450FMin;
7529 break;
7530 case GLSLstd450NMax:
7531 std450_op = GLSLstd450FMax;
7532 break;
7533 default:
7534 break;
7535 }
7536 }
7537
7538 return std450_op;
7539}
7540
7541void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
7542{
7543 auto op = static_cast<GLSLstd450>(eop);
7544
7545 if (is_legacy() && is_unsigned_glsl_opcode(op))
7546 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
7547
7548 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7549 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length);
7550 auto int_type = to_signed_basetype(width: integer_width);
7551 auto uint_type = to_unsigned_basetype(width: integer_width);
7552
7553 op = get_remapped_glsl_op(std450_op: op);
7554
7555 switch (op)
7556 {
7557 // FP fiddling
7558 case GLSLstd450Round:
7559 if (!is_legacy())
7560 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
7561 else
7562 {
7563 auto op0 = to_enclosed_expression(id: args[0]);
7564 auto &op0_type = expression_type(id: args[0]);
7565 auto expr = join(ts: "floor(", ts&: op0, ts: " + ", ts: type_to_glsl_constructor(type: op0_type), ts: "(0.5))");
7566 bool forward = should_forward(id: args[0]);
7567 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
7568 inherit_expression_dependencies(dst: id, source: args[0]);
7569 }
7570 break;
7571
7572 case GLSLstd450RoundEven:
7573 if (!is_legacy())
7574 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "roundEven");
7575 else if (!options.es)
7576 {
7577 // This extension provides round() with round-to-even semantics.
7578 require_extension_internal(ext: "GL_EXT_gpu_shader4");
7579 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
7580 }
7581 else
7582 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7583 break;
7584
7585 case GLSLstd450Trunc:
7586 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "trunc");
7587 break;
7588 case GLSLstd450SAbs:
7589 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "abs", input_type: int_type, expected_result_type: int_type);
7590 break;
7591 case GLSLstd450FAbs:
7592 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "abs");
7593 break;
7594 case GLSLstd450SSign:
7595 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "sign", input_type: int_type, expected_result_type: int_type);
7596 break;
7597 case GLSLstd450FSign:
7598 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sign");
7599 break;
7600 case GLSLstd450Floor:
7601 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "floor");
7602 break;
7603 case GLSLstd450Ceil:
7604 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "ceil");
7605 break;
7606 case GLSLstd450Fract:
7607 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "fract");
7608 break;
7609 case GLSLstd450Radians:
7610 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "radians");
7611 break;
7612 case GLSLstd450Degrees:
7613 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "degrees");
7614 break;
7615 case GLSLstd450Fma:
7616 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7617 {
7618 auto expr = join(ts: to_enclosed_expression(id: args[0]), ts: " * ", ts: to_enclosed_expression(id: args[1]), ts: " + ",
7619 ts: to_enclosed_expression(id: args[2]));
7620
7621 emit_op(result_type, result_id: id, rhs: expr,
7622 forwarding: should_forward(id: args[0]) && should_forward(id: args[1]) && should_forward(id: args[2]));
7623 for (uint32_t i = 0; i < 3; i++)
7624 inherit_expression_dependencies(dst: id, source: args[i]);
7625 }
7626 else
7627 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "fma");
7628 break;
7629 case GLSLstd450Modf:
7630 register_call_out_argument(id: args[1]);
7631 forced_temporaries.insert(x: id);
7632 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "modf");
7633 break;
7634
7635 case GLSLstd450ModfStruct:
7636 {
7637 auto &type = get<SPIRType>(id: result_type);
7638 emit_uninitialized_temporary_expression(type: result_type, id);
7639 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "modf(", ts: to_expression(id: args[0]), ts: ", ",
7640 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
7641 break;
7642 }
7643
7644 // Minmax
7645 case GLSLstd450UMin:
7646 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: uint_type, skip_cast_if_equal_type: false);
7647 break;
7648
7649 case GLSLstd450SMin:
7650 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: int_type, skip_cast_if_equal_type: false);
7651 break;
7652
7653 case GLSLstd450FMin:
7654 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "min");
7655 break;
7656
7657 case GLSLstd450FMax:
7658 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "max");
7659 break;
7660
7661 case GLSLstd450UMax:
7662 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: uint_type, skip_cast_if_equal_type: false);
7663 break;
7664
7665 case GLSLstd450SMax:
7666 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: int_type, skip_cast_if_equal_type: false);
7667 break;
7668
7669 case GLSLstd450FClamp:
7670 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp");
7671 break;
7672
7673 case GLSLstd450UClamp:
7674 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: uint_type);
7675 break;
7676
7677 case GLSLstd450SClamp:
7678 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: int_type);
7679 break;
7680
7681 // Trig
7682 case GLSLstd450Sin:
7683 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sin");
7684 break;
7685 case GLSLstd450Cos:
7686 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cos");
7687 break;
7688 case GLSLstd450Tan:
7689 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tan");
7690 break;
7691 case GLSLstd450Asin:
7692 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asin");
7693 break;
7694 case GLSLstd450Acos:
7695 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acos");
7696 break;
7697 case GLSLstd450Atan:
7698 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atan");
7699 break;
7700 case GLSLstd450Sinh:
7701 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sinh");
7702 break;
7703 case GLSLstd450Cosh:
7704 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cosh");
7705 break;
7706 case GLSLstd450Tanh:
7707 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tanh");
7708 break;
7709 case GLSLstd450Asinh:
7710 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asinh");
7711 break;
7712 case GLSLstd450Acosh:
7713 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acosh");
7714 break;
7715 case GLSLstd450Atanh:
7716 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atanh");
7717 break;
7718 case GLSLstd450Atan2:
7719 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "atan");
7720 break;
7721
7722 // Exponentials
7723 case GLSLstd450Pow:
7724 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "pow");
7725 break;
7726 case GLSLstd450Exp:
7727 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp");
7728 break;
7729 case GLSLstd450Log:
7730 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log");
7731 break;
7732 case GLSLstd450Exp2:
7733 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp2");
7734 break;
7735 case GLSLstd450Log2:
7736 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log2");
7737 break;
7738 case GLSLstd450Sqrt:
7739 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sqrt");
7740 break;
7741 case GLSLstd450InverseSqrt:
7742 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inversesqrt");
7743 break;
7744
7745 // Matrix math
7746 case GLSLstd450Determinant:
7747 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "determinant");
7748 break;
7749 case GLSLstd450MatrixInverse:
7750 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inverse");
7751 break;
7752
7753 // Lerping
7754 case GLSLstd450FMix:
7755 case GLSLstd450IMix:
7756 {
7757 emit_mix_op(result_type, id, left: args[0], right: args[1], lerp: args[2]);
7758 break;
7759 }
7760 case GLSLstd450Step:
7761 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "step");
7762 break;
7763 case GLSLstd450SmoothStep:
7764 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "smoothstep");
7765 break;
7766
7767 // Packing
7768 case GLSLstd450Frexp:
7769 register_call_out_argument(id: args[1]);
7770 forced_temporaries.insert(x: id);
7771 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "frexp");
7772 break;
7773
7774 case GLSLstd450FrexpStruct:
7775 {
7776 auto &type = get<SPIRType>(id: result_type);
7777 emit_uninitialized_temporary_expression(type: result_type, id);
7778 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "frexp(", ts: to_expression(id: args[0]), ts: ", ",
7779 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
7780 break;
7781 }
7782
7783 case GLSLstd450Ldexp:
7784 {
7785 bool forward = should_forward(id: args[0]) && should_forward(id: args[1]);
7786
7787 auto op0 = to_unpacked_expression(id: args[0]);
7788 auto op1 = to_unpacked_expression(id: args[1]);
7789 auto &op1_type = expression_type(id: args[1]);
7790 if (op1_type.basetype != SPIRType::Int)
7791 {
7792 // Need a value cast here.
7793 auto target_type = op1_type;
7794 target_type.basetype = SPIRType::Int;
7795 op1 = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op1, ts: ")");
7796 }
7797
7798 auto expr = join(ts: "ldexp(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
7799
7800 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
7801 inherit_expression_dependencies(dst: id, source: args[0]);
7802 inherit_expression_dependencies(dst: id, source: args[1]);
7803 break;
7804 }
7805
7806 case GLSLstd450PackSnorm4x8:
7807 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm4x8");
7808 break;
7809 case GLSLstd450PackUnorm4x8:
7810 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm4x8");
7811 break;
7812 case GLSLstd450PackSnorm2x16:
7813 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm2x16");
7814 break;
7815 case GLSLstd450PackUnorm2x16:
7816 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm2x16");
7817 break;
7818 case GLSLstd450PackHalf2x16:
7819 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packHalf2x16");
7820 break;
7821 case GLSLstd450UnpackSnorm4x8:
7822 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm4x8");
7823 break;
7824 case GLSLstd450UnpackUnorm4x8:
7825 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm4x8");
7826 break;
7827 case GLSLstd450UnpackSnorm2x16:
7828 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm2x16");
7829 break;
7830 case GLSLstd450UnpackUnorm2x16:
7831 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm2x16");
7832 break;
7833 case GLSLstd450UnpackHalf2x16:
7834 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackHalf2x16");
7835 break;
7836
7837 case GLSLstd450PackDouble2x32:
7838 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packDouble2x32");
7839 break;
7840 case GLSLstd450UnpackDouble2x32:
7841 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackDouble2x32");
7842 break;
7843
7844 // Vector math
7845 case GLSLstd450Length:
7846 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "length");
7847 break;
7848 case GLSLstd450Distance:
7849 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "distance");
7850 break;
7851 case GLSLstd450Cross:
7852 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "cross");
7853 break;
7854 case GLSLstd450Normalize:
7855 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "normalize");
7856 break;
7857 case GLSLstd450FaceForward:
7858 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "faceforward");
7859 break;
7860 case GLSLstd450Reflect:
7861 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "reflect");
7862 break;
7863 case GLSLstd450Refract:
7864 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "refract");
7865 break;
7866
7867 // Bit-fiddling
7868 case GLSLstd450FindILsb:
7869 // findLSB always returns int.
7870 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findLSB", input_type: expression_type(id: args[0]).basetype, expected_result_type: int_type);
7871 break;
7872
7873 case GLSLstd450FindSMsb:
7874 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: int_type, expected_result_type: int_type);
7875 break;
7876
7877 case GLSLstd450FindUMsb:
7878 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: uint_type,
7879 expected_result_type: int_type); // findMSB always returns int.
7880 break;
7881
7882 // Multisampled varying
7883 case GLSLstd450InterpolateAtCentroid:
7884 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "interpolateAtCentroid");
7885 break;
7886 case GLSLstd450InterpolateAtSample:
7887 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtSample");
7888 break;
7889 case GLSLstd450InterpolateAtOffset:
7890 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtOffset");
7891 break;
7892
7893 case GLSLstd450NMin:
7894 case GLSLstd450NMax:
7895 {
7896 emit_nminmax_op(result_type, id, op0: args[0], op1: args[1], op);
7897 break;
7898 }
7899
7900 case GLSLstd450NClamp:
7901 {
7902 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7903 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7904 uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
7905 if (!max_id)
7906 max_id = ir.increase_bound_by(count: 1);
7907
7908 // Inherit precision qualifiers.
7909 ir.meta[max_id] = ir.meta[id];
7910
7911 emit_nminmax_op(result_type, id: max_id, op0: args[0], op1: args[1], op: GLSLstd450NMax);
7912 emit_nminmax_op(result_type, id, op0: max_id, op1: args[2], op: GLSLstd450NMin);
7913 break;
7914 }
7915
7916 default:
7917 statement(ts: "// unimplemented GLSL op ", ts&: eop);
7918 break;
7919 }
7920}
7921
7922void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7923{
7924 // Need to emulate this call.
7925 uint32_t &ids = extra_sub_expressions[id];
7926 if (!ids)
7927 {
7928 ids = ir.increase_bound_by(count: 5);
7929 auto btype = get<SPIRType>(id: result_type);
7930 btype.basetype = SPIRType::Boolean;
7931 set<SPIRType>(id: ids, args&: btype);
7932 }
7933
7934 uint32_t btype_id = ids + 0;
7935 uint32_t left_nan_id = ids + 1;
7936 uint32_t right_nan_id = ids + 2;
7937 uint32_t tmp_id = ids + 3;
7938 uint32_t mixed_first_id = ids + 4;
7939
7940 // Inherit precision qualifiers.
7941 ir.meta[tmp_id] = ir.meta[id];
7942 ir.meta[mixed_first_id] = ir.meta[id];
7943
7944 emit_unary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op: "isnan");
7945 emit_unary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op: "isnan");
7946 emit_binary_func_op(result_type, result_id: tmp_id, op0, op1, op: op == GLSLstd450NMin ? "min" : "max");
7947 emit_mix_op(result_type, id: mixed_first_id, left: tmp_id, right: op1, lerp: left_nan_id);
7948 emit_mix_op(result_type, id, left: mixed_first_id, right: op0, lerp: right_nan_id);
7949}
7950
7951void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7952 uint32_t)
7953{
7954 require_extension_internal(ext: "GL_AMD_shader_ballot");
7955
7956 enum AMDShaderBallot
7957 {
7958 SwizzleInvocationsAMD = 1,
7959 SwizzleInvocationsMaskedAMD = 2,
7960 WriteInvocationAMD = 3,
7961 MbcntAMD = 4
7962 };
7963
7964 auto op = static_cast<AMDShaderBallot>(eop);
7965
7966 switch (op)
7967 {
7968 case SwizzleInvocationsAMD:
7969 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsAMD");
7970 register_control_dependent_expression(expr: id);
7971 break;
7972
7973 case SwizzleInvocationsMaskedAMD:
7974 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsMaskedAMD");
7975 register_control_dependent_expression(expr: id);
7976 break;
7977
7978 case WriteInvocationAMD:
7979 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "writeInvocationAMD");
7980 register_control_dependent_expression(expr: id);
7981 break;
7982
7983 case MbcntAMD:
7984 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "mbcntAMD");
7985 register_control_dependent_expression(expr: id);
7986 break;
7987
7988 default:
7989 statement(ts: "// unimplemented SPV AMD shader ballot op ", ts&: eop);
7990 break;
7991 }
7992}
7993
7994void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7995 const uint32_t *args, uint32_t)
7996{
7997 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
7998
7999 enum AMDShaderExplicitVertexParameter
8000 {
8001 InterpolateAtVertexAMD = 1
8002 };
8003
8004 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
8005
8006 switch (op)
8007 {
8008 case InterpolateAtVertexAMD:
8009 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtVertexAMD");
8010 break;
8011
8012 default:
8013 statement(ts: "// unimplemented SPV AMD shader explicit vertex parameter op ", ts&: eop);
8014 break;
8015 }
8016}
8017
8018void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
8019 const uint32_t *args, uint32_t)
8020{
8021 require_extension_internal(ext: "GL_AMD_shader_trinary_minmax");
8022
8023 enum AMDShaderTrinaryMinMax
8024 {
8025 FMin3AMD = 1,
8026 UMin3AMD = 2,
8027 SMin3AMD = 3,
8028 FMax3AMD = 4,
8029 UMax3AMD = 5,
8030 SMax3AMD = 6,
8031 FMid3AMD = 7,
8032 UMid3AMD = 8,
8033 SMid3AMD = 9
8034 };
8035
8036 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
8037
8038 switch (op)
8039 {
8040 case FMin3AMD:
8041 case UMin3AMD:
8042 case SMin3AMD:
8043 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "min3");
8044 break;
8045
8046 case FMax3AMD:
8047 case UMax3AMD:
8048 case SMax3AMD:
8049 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "max3");
8050 break;
8051
8052 case FMid3AMD:
8053 case UMid3AMD:
8054 case SMid3AMD:
8055 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "mid3");
8056 break;
8057
8058 default:
8059 statement(ts: "// unimplemented SPV AMD shader trinary minmax op ", ts&: eop);
8060 break;
8061 }
8062}
8063
8064void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
8065 uint32_t)
8066{
8067 require_extension_internal(ext: "GL_AMD_gcn_shader");
8068
8069 enum AMDGCNShader
8070 {
8071 CubeFaceIndexAMD = 1,
8072 CubeFaceCoordAMD = 2,
8073 TimeAMD = 3
8074 };
8075
8076 auto op = static_cast<AMDGCNShader>(eop);
8077
8078 switch (op)
8079 {
8080 case CubeFaceIndexAMD:
8081 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceIndexAMD");
8082 break;
8083 case CubeFaceCoordAMD:
8084 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceCoordAMD");
8085 break;
8086 case TimeAMD:
8087 {
8088 string expr = "timeAMD()";
8089 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
8090 register_control_dependent_expression(expr: id);
8091 break;
8092 }
8093
8094 default:
8095 statement(ts: "// unimplemented SPV AMD gcn shader op ", ts&: eop);
8096 break;
8097 }
8098}
8099
8100void CompilerGLSL::emit_subgroup_op(const Instruction &i)
8101{
8102 const uint32_t *ops = stream(instr: i);
8103 auto op = static_cast<Op>(i.op);
8104
8105 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
8106 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
8107
8108 // If we need to do implicit bitcasts, make sure we do it with the correct type.
8109 uint32_t integer_width = get_integer_width_for_instruction(instr: i);
8110 auto int_type = to_signed_basetype(width: integer_width);
8111 auto uint_type = to_unsigned_basetype(width: integer_width);
8112
8113 switch (op)
8114 {
8115 case OpGroupNonUniformElect:
8116 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupElect);
8117 break;
8118
8119 case OpGroupNonUniformBallotBitCount:
8120 {
8121 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
8122 if (operation == GroupOperationReduce)
8123 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
8124 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
8125 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
8126 }
8127 break;
8128
8129 case OpGroupNonUniformBallotBitExtract:
8130 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
8131 break;
8132
8133 case OpGroupNonUniformInverseBallot:
8134 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
8135 break;
8136
8137 case OpGroupNonUniformBallot:
8138 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallot);
8139 break;
8140
8141 case OpGroupNonUniformBallotFindLSB:
8142 case OpGroupNonUniformBallotFindMSB:
8143 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
8144 break;
8145
8146 case OpGroupNonUniformBroadcast:
8147 case OpGroupNonUniformBroadcastFirst:
8148 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
8149 break;
8150
8151 case OpGroupNonUniformShuffle:
8152 case OpGroupNonUniformShuffleXor:
8153 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle");
8154 break;
8155
8156 case OpGroupNonUniformShuffleUp:
8157 case OpGroupNonUniformShuffleDown:
8158 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle_relative");
8159 break;
8160
8161 case OpGroupNonUniformAll:
8162 case OpGroupNonUniformAny:
8163 case OpGroupNonUniformAllEqual:
8164 {
8165 const SPIRType &type = expression_type(id: ops[3]);
8166 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
8167 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
8168 else
8169 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAllEqualT);
8170 }
8171 break;
8172
8173 case OpGroupNonUniformFAdd:
8174 case OpGroupNonUniformFMul:
8175 case OpGroupNonUniformFMin:
8176 case OpGroupNonUniformFMax:
8177 case OpGroupNonUniformIAdd:
8178 case OpGroupNonUniformIMul:
8179 case OpGroupNonUniformSMin:
8180 case OpGroupNonUniformSMax:
8181 case OpGroupNonUniformUMin:
8182 case OpGroupNonUniformUMax:
8183 case OpGroupNonUniformBitwiseAnd:
8184 case OpGroupNonUniformBitwiseOr:
8185 case OpGroupNonUniformBitwiseXor:
8186 case OpGroupNonUniformLogicalAnd:
8187 case OpGroupNonUniformLogicalOr:
8188 case OpGroupNonUniformLogicalXor:
8189 {
8190 auto operation = static_cast<GroupOperation>(ops[3]);
8191 if (operation == GroupOperationClusteredReduce)
8192 {
8193 require_extension_internal(ext: "GL_KHR_shader_subgroup_clustered");
8194 }
8195 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
8196 operation == GroupOperationReduce)
8197 {
8198 require_extension_internal(ext: "GL_KHR_shader_subgroup_arithmetic");
8199 }
8200 else
8201 SPIRV_CROSS_THROW("Invalid group operation.");
8202 break;
8203 }
8204
8205 case OpGroupNonUniformQuadSwap:
8206 case OpGroupNonUniformQuadBroadcast:
8207 require_extension_internal(ext: "GL_KHR_shader_subgroup_quad");
8208 break;
8209
8210 default:
8211 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8212 }
8213
8214 uint32_t result_type = ops[0];
8215 uint32_t id = ops[1];
8216
8217 auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2]));
8218 if (scope != ScopeSubgroup)
8219 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
8220
8221 switch (op)
8222 {
8223 case OpGroupNonUniformElect:
8224 emit_op(result_type, result_id: id, rhs: "subgroupElect()", forwarding: true);
8225 break;
8226
8227 case OpGroupNonUniformBroadcast:
8228 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBroadcast");
8229 break;
8230
8231 case OpGroupNonUniformBroadcastFirst:
8232 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBroadcastFirst");
8233 break;
8234
8235 case OpGroupNonUniformBallot:
8236 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallot");
8237 break;
8238
8239 case OpGroupNonUniformInverseBallot:
8240 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupInverseBallot");
8241 break;
8242
8243 case OpGroupNonUniformBallotBitExtract:
8244 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBallotBitExtract");
8245 break;
8246
8247 case OpGroupNonUniformBallotFindLSB:
8248 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindLSB");
8249 break;
8250
8251 case OpGroupNonUniformBallotFindMSB:
8252 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindMSB");
8253 break;
8254
8255 case OpGroupNonUniformBallotBitCount:
8256 {
8257 auto operation = static_cast<GroupOperation>(ops[3]);
8258 if (operation == GroupOperationReduce)
8259 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotBitCount");
8260 else if (operation == GroupOperationInclusiveScan)
8261 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotInclusiveBitCount");
8262 else if (operation == GroupOperationExclusiveScan)
8263 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotExclusiveBitCount");
8264 else
8265 SPIRV_CROSS_THROW("Invalid BitCount operation.");
8266 break;
8267 }
8268
8269 case OpGroupNonUniformShuffle:
8270 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffle");
8271 break;
8272
8273 case OpGroupNonUniformShuffleXor:
8274 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleXor");
8275 break;
8276
8277 case OpGroupNonUniformShuffleUp:
8278 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleUp");
8279 break;
8280
8281 case OpGroupNonUniformShuffleDown:
8282 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleDown");
8283 break;
8284
8285 case OpGroupNonUniformAll:
8286 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAll");
8287 break;
8288
8289 case OpGroupNonUniformAny:
8290 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAny");
8291 break;
8292
8293 case OpGroupNonUniformAllEqual:
8294 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAllEqual");
8295 break;
8296
8297 // clang-format off
8298#define GLSL_GROUP_OP(op, glsl_op) \
8299case OpGroupNonUniform##op: \
8300 { \
8301 auto operation = static_cast<GroupOperation>(ops[3]); \
8302 if (operation == GroupOperationReduce) \
8303 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
8304 else if (operation == GroupOperationInclusiveScan) \
8305 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
8306 else if (operation == GroupOperationExclusiveScan) \
8307 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
8308 else if (operation == GroupOperationClusteredReduce) \
8309 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
8310 else \
8311 SPIRV_CROSS_THROW("Invalid group operation."); \
8312 break; \
8313 }
8314
8315#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
8316case OpGroupNonUniform##op: \
8317 { \
8318 auto operation = static_cast<GroupOperation>(ops[3]); \
8319 if (operation == GroupOperationReduce) \
8320 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
8321 else if (operation == GroupOperationInclusiveScan) \
8322 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
8323 else if (operation == GroupOperationExclusiveScan) \
8324 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
8325 else if (operation == GroupOperationClusteredReduce) \
8326 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
8327 else \
8328 SPIRV_CROSS_THROW("Invalid group operation."); \
8329 break; \
8330 }
8331
8332 GLSL_GROUP_OP(FAdd, Add)
8333 GLSL_GROUP_OP(FMul, Mul)
8334 GLSL_GROUP_OP(FMin, Min)
8335 GLSL_GROUP_OP(FMax, Max)
8336 GLSL_GROUP_OP(IAdd, Add)
8337 GLSL_GROUP_OP(IMul, Mul)
8338 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
8339 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
8340 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
8341 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
8342 GLSL_GROUP_OP(BitwiseAnd, And)
8343 GLSL_GROUP_OP(BitwiseOr, Or)
8344 GLSL_GROUP_OP(BitwiseXor, Xor)
8345 GLSL_GROUP_OP(LogicalAnd, And)
8346 GLSL_GROUP_OP(LogicalOr, Or)
8347 GLSL_GROUP_OP(LogicalXor, Xor)
8348#undef GLSL_GROUP_OP
8349#undef GLSL_GROUP_OP_CAST
8350 // clang-format on
8351
8352 case OpGroupNonUniformQuadSwap:
8353 {
8354 uint32_t direction = evaluate_constant_u32(id: ops[4]);
8355 if (direction == 0)
8356 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapHorizontal");
8357 else if (direction == 1)
8358 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapVertical");
8359 else if (direction == 2)
8360 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapDiagonal");
8361 else
8362 SPIRV_CROSS_THROW("Invalid quad swap direction.");
8363 break;
8364 }
8365
8366 case OpGroupNonUniformQuadBroadcast:
8367 {
8368 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupQuadBroadcast");
8369 break;
8370 }
8371
8372 default:
8373 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8374 }
8375
8376 register_control_dependent_expression(expr: id);
8377}
8378
8379string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
8380{
8381 // OpBitcast can deal with pointers.
8382 if (out_type.pointer || in_type.pointer)
8383 {
8384 if (out_type.vecsize == 2 || in_type.vecsize == 2)
8385 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
8386 return type_to_glsl(type: out_type);
8387 }
8388
8389 if (out_type.basetype == in_type.basetype)
8390 return "";
8391
8392 assert(out_type.basetype != SPIRType::Boolean);
8393 assert(in_type.basetype != SPIRType::Boolean);
8394
8395 bool integral_cast = type_is_integral(type: out_type) && type_is_integral(type: in_type);
8396 bool same_size_cast = out_type.width == in_type.width;
8397
8398 // Trivial bitcast case, casts between integers.
8399 if (integral_cast && same_size_cast)
8400 return type_to_glsl(type: out_type);
8401
8402 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
8403 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
8404 return "unpack8";
8405 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
8406 return "pack16";
8407 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
8408 return "pack32";
8409
8410 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
8411 // 16-bit, 32-bit and 64-bit floats.
8412 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
8413 {
8414 if (is_legacy_es())
8415 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
8416 else if (!options.es && options.version < 330)
8417 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
8418 return "floatBitsToUint";
8419 }
8420 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
8421 {
8422 if (is_legacy_es())
8423 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
8424 else if (!options.es && options.version < 330)
8425 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
8426 return "floatBitsToInt";
8427 }
8428 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
8429 {
8430 if (is_legacy_es())
8431 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
8432 else if (!options.es && options.version < 330)
8433 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
8434 return "uintBitsToFloat";
8435 }
8436 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
8437 {
8438 if (is_legacy_es())
8439 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
8440 else if (!options.es && options.version < 330)
8441 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
8442 return "intBitsToFloat";
8443 }
8444
8445 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
8446 return "doubleBitsToInt64";
8447 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
8448 return "doubleBitsToUint64";
8449 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
8450 return "int64BitsToDouble";
8451 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
8452 return "uint64BitsToDouble";
8453 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
8454 return "float16BitsToInt16";
8455 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
8456 return "float16BitsToUint16";
8457 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
8458 return "int16BitsToFloat16";
8459 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
8460 return "uint16BitsToFloat16";
8461
8462 // And finally, some even more special purpose casts.
8463 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
8464 return "packUint2x32";
8465 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
8466 return "unpackUint2x32";
8467 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8468 return "unpackFloat2x16";
8469 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
8470 return "packFloat2x16";
8471 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
8472 return "packInt2x16";
8473 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
8474 return "unpackInt2x16";
8475 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
8476 return "packUint2x16";
8477 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8478 return "unpackUint2x16";
8479 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
8480 return "packInt4x16";
8481 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
8482 return "unpackInt4x16";
8483 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
8484 return "packUint4x16";
8485 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
8486 return "unpackUint4x16";
8487
8488 return "";
8489}
8490
8491string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
8492{
8493 auto op = bitcast_glsl_op(out_type: result_type, in_type: expression_type(id: argument));
8494 if (op.empty())
8495 return to_enclosed_unpacked_expression(id: argument);
8496 else
8497 return join(ts&: op, ts: "(", ts: to_unpacked_expression(id: argument), ts: ")");
8498}
8499
8500std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
8501{
8502 auto expr = to_expression(id: arg);
8503 auto &src_type = expression_type(id: arg);
8504 if (src_type.basetype != target_type)
8505 {
8506 auto target = src_type;
8507 target.basetype = target_type;
8508 expr = join(ts: bitcast_glsl_op(out_type: target, in_type: src_type), ts: "(", ts&: expr, ts: ")");
8509 }
8510
8511 return expr;
8512}
8513
8514std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
8515 const std::string &expr)
8516{
8517 if (target_type.basetype == expr_type)
8518 return expr;
8519
8520 auto src_type = target_type;
8521 src_type.basetype = expr_type;
8522 return join(ts: bitcast_glsl_op(out_type: target_type, in_type: src_type), ts: "(", ts: expr, ts: ")");
8523}
8524
8525string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
8526{
8527 switch (builtin)
8528 {
8529 case BuiltInPosition:
8530 return "gl_Position";
8531 case BuiltInPointSize:
8532 return "gl_PointSize";
8533 case BuiltInClipDistance:
8534 return "gl_ClipDistance";
8535 case BuiltInCullDistance:
8536 return "gl_CullDistance";
8537 case BuiltInVertexId:
8538 if (options.vulkan_semantics)
8539 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
8540 "with GL semantics.");
8541 return "gl_VertexID";
8542 case BuiltInInstanceId:
8543 if (options.vulkan_semantics)
8544 {
8545 auto model = get_entry_point().model;
8546 switch (model)
8547 {
8548 case spv::ExecutionModelIntersectionKHR:
8549 case spv::ExecutionModelAnyHitKHR:
8550 case spv::ExecutionModelClosestHitKHR:
8551 // gl_InstanceID is allowed in these shaders.
8552 break;
8553
8554 default:
8555 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
8556 "created with GL semantics.");
8557 }
8558 }
8559 if (!options.es && options.version < 140)
8560 {
8561 require_extension_internal(ext: "GL_ARB_draw_instanced");
8562 }
8563 return "gl_InstanceID";
8564 case BuiltInVertexIndex:
8565 if (options.vulkan_semantics)
8566 return "gl_VertexIndex";
8567 else
8568 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
8569 case BuiltInInstanceIndex:
8570 if (options.vulkan_semantics)
8571 return "gl_InstanceIndex";
8572
8573 if (!options.es && options.version < 140)
8574 {
8575 require_extension_internal(ext: "GL_ARB_draw_instanced");
8576 }
8577
8578 if (options.vertex.support_nonzero_base_instance)
8579 {
8580 if (!options.vulkan_semantics)
8581 {
8582 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
8583 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8584 }
8585 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
8586 }
8587 else
8588 return "gl_InstanceID";
8589 case BuiltInPrimitiveId:
8590 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8591 return "gl_PrimitiveIDIn";
8592 else
8593 return "gl_PrimitiveID";
8594 case BuiltInInvocationId:
8595 return "gl_InvocationID";
8596 case BuiltInLayer:
8597 return "gl_Layer";
8598 case BuiltInViewportIndex:
8599 return "gl_ViewportIndex";
8600 case BuiltInTessLevelOuter:
8601 return "gl_TessLevelOuter";
8602 case BuiltInTessLevelInner:
8603 return "gl_TessLevelInner";
8604 case BuiltInTessCoord:
8605 return "gl_TessCoord";
8606 case BuiltInFragCoord:
8607 return "gl_FragCoord";
8608 case BuiltInPointCoord:
8609 return "gl_PointCoord";
8610 case BuiltInFrontFacing:
8611 return "gl_FrontFacing";
8612 case BuiltInFragDepth:
8613 return "gl_FragDepth";
8614 case BuiltInNumWorkgroups:
8615 return "gl_NumWorkGroups";
8616 case BuiltInWorkgroupSize:
8617 return "gl_WorkGroupSize";
8618 case BuiltInWorkgroupId:
8619 return "gl_WorkGroupID";
8620 case BuiltInLocalInvocationId:
8621 return "gl_LocalInvocationID";
8622 case BuiltInGlobalInvocationId:
8623 return "gl_GlobalInvocationID";
8624 case BuiltInLocalInvocationIndex:
8625 return "gl_LocalInvocationIndex";
8626 case BuiltInHelperInvocation:
8627 return "gl_HelperInvocation";
8628
8629 case BuiltInBaseVertex:
8630 if (options.es)
8631 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8632
8633 if (options.vulkan_semantics)
8634 {
8635 if (options.version < 460)
8636 {
8637 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8638 return "gl_BaseVertexARB";
8639 }
8640 return "gl_BaseVertex";
8641 }
8642 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8643 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8644 return "SPIRV_Cross_BaseVertex";
8645
8646 case BuiltInBaseInstance:
8647 if (options.es)
8648 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8649
8650 if (options.vulkan_semantics)
8651 {
8652 if (options.version < 460)
8653 {
8654 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8655 return "gl_BaseInstanceARB";
8656 }
8657 return "gl_BaseInstance";
8658 }
8659 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8660 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8661 return "SPIRV_Cross_BaseInstance";
8662
8663 case BuiltInDrawIndex:
8664 if (options.es)
8665 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8666
8667 if (options.vulkan_semantics)
8668 {
8669 if (options.version < 460)
8670 {
8671 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8672 return "gl_DrawIDARB";
8673 }
8674 return "gl_DrawID";
8675 }
8676 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8677 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
8678 return "gl_DrawIDARB";
8679
8680 case BuiltInSampleId:
8681 if (options.es && options.version < 320)
8682 require_extension_internal(ext: "GL_OES_sample_variables");
8683 if (!options.es && options.version < 400)
8684 SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8685 return "gl_SampleID";
8686
8687 case BuiltInSampleMask:
8688 if (options.es && options.version < 320)
8689 require_extension_internal(ext: "GL_OES_sample_variables");
8690 if (!options.es && options.version < 400)
8691 SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8692
8693 if (storage == StorageClassInput)
8694 return "gl_SampleMaskIn";
8695 else
8696 return "gl_SampleMask";
8697
8698 case BuiltInSamplePosition:
8699 if (options.es && options.version < 320)
8700 require_extension_internal(ext: "GL_OES_sample_variables");
8701 if (!options.es && options.version < 400)
8702 SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8703 return "gl_SamplePosition";
8704
8705 case BuiltInViewIndex:
8706 if (options.vulkan_semantics)
8707 return "gl_ViewIndex";
8708 else
8709 return "gl_ViewID_OVR";
8710
8711 case BuiltInNumSubgroups:
8712 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::NumSubgroups);
8713 return "gl_NumSubgroups";
8714
8715 case BuiltInSubgroupId:
8716 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupID);
8717 return "gl_SubgroupID";
8718
8719 case BuiltInSubgroupSize:
8720 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupSize);
8721 return "gl_SubgroupSize";
8722
8723 case BuiltInSubgroupLocalInvocationId:
8724 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInvocationID);
8725 return "gl_SubgroupInvocationID";
8726
8727 case BuiltInSubgroupEqMask:
8728 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
8729 return "gl_SubgroupEqMask";
8730
8731 case BuiltInSubgroupGeMask:
8732 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
8733 return "gl_SubgroupGeMask";
8734
8735 case BuiltInSubgroupGtMask:
8736 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
8737 return "gl_SubgroupGtMask";
8738
8739 case BuiltInSubgroupLeMask:
8740 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
8741 return "gl_SubgroupLeMask";
8742
8743 case BuiltInSubgroupLtMask:
8744 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
8745 return "gl_SubgroupLtMask";
8746
8747 case BuiltInLaunchIdKHR:
8748 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8749 case BuiltInLaunchSizeKHR:
8750 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8751 case BuiltInWorldRayOriginKHR:
8752 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8753 case BuiltInWorldRayDirectionKHR:
8754 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8755 case BuiltInObjectRayOriginKHR:
8756 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8757 case BuiltInObjectRayDirectionKHR:
8758 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8759 case BuiltInRayTminKHR:
8760 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8761 case BuiltInRayTmaxKHR:
8762 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8763 case BuiltInInstanceCustomIndexKHR:
8764 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8765 case BuiltInObjectToWorldKHR:
8766 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8767 case BuiltInWorldToObjectKHR:
8768 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8769 case BuiltInHitTNV:
8770 // gl_HitTEXT is an alias of RayTMax in KHR.
8771 return "gl_HitTNV";
8772 case BuiltInHitKindKHR:
8773 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8774 case BuiltInIncomingRayFlagsKHR:
8775 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8776
8777 case BuiltInBaryCoordKHR:
8778 {
8779 if (options.es && options.version < 320)
8780 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
8781 else if (!options.es && options.version < 450)
8782 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
8783
8784 if (barycentric_is_nv)
8785 {
8786 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
8787 return "gl_BaryCoordNV";
8788 }
8789 else
8790 {
8791 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
8792 return "gl_BaryCoordEXT";
8793 }
8794 }
8795
8796 case BuiltInBaryCoordNoPerspNV:
8797 {
8798 if (options.es && options.version < 320)
8799 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
8800 else if (!options.es && options.version < 450)
8801 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
8802
8803 if (barycentric_is_nv)
8804 {
8805 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
8806 return "gl_BaryCoordNoPerspNV";
8807 }
8808 else
8809 {
8810 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
8811 return "gl_BaryCoordNoPerspEXT";
8812 }
8813 }
8814
8815 case BuiltInFragStencilRefEXT:
8816 {
8817 if (!options.es)
8818 {
8819 require_extension_internal(ext: "GL_ARB_shader_stencil_export");
8820 return "gl_FragStencilRefARB";
8821 }
8822 else
8823 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8824 }
8825
8826 case BuiltInPrimitiveShadingRateKHR:
8827 {
8828 if (!options.vulkan_semantics)
8829 SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
8830 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
8831 return "gl_PrimitiveShadingRateEXT";
8832 }
8833
8834 case BuiltInShadingRateKHR:
8835 {
8836 if (!options.vulkan_semantics)
8837 SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
8838 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
8839 return "gl_ShadingRateEXT";
8840 }
8841
8842 case BuiltInDeviceIndex:
8843 if (!options.vulkan_semantics)
8844 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8845 require_extension_internal(ext: "GL_EXT_device_group");
8846 return "gl_DeviceIndex";
8847
8848 case BuiltInFullyCoveredEXT:
8849 if (!options.es)
8850 require_extension_internal(ext: "GL_NV_conservative_raster_underestimation");
8851 else
8852 SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
8853 return "gl_FragFullyCoveredNV";
8854
8855 default:
8856 return join(ts: "gl_BuiltIn_", ts: convert_to_string(t: builtin));
8857 }
8858}
8859
8860const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8861{
8862 switch (index)
8863 {
8864 case 0:
8865 return "x";
8866 case 1:
8867 return "y";
8868 case 2:
8869 return "z";
8870 case 3:
8871 return "w";
8872 default:
8873 return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
8874 }
8875}
8876
8877void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
8878 AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8879 uint32_t index)
8880{
8881 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8882 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8883
8884 expr += "[";
8885
8886 if (index_is_literal)
8887 expr += convert_to_string(t: index);
8888 else
8889 expr += to_unpacked_expression(id: index, register_expression_read);
8890
8891 expr += "]";
8892}
8893
8894bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
8895{
8896 return true;
8897}
8898
8899string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8900 AccessChainFlags flags, AccessChainMeta *meta)
8901{
8902 string expr;
8903
8904 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8905 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8906 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8907 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8908 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8909 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8910
8911 if (!chain_only)
8912 {
8913 // We handle transpose explicitly, so don't resolve that here.
8914 auto *e = maybe_get<SPIRExpression>(id: base);
8915 bool old_transpose = e && e->need_transpose;
8916 if (e)
8917 e->need_transpose = false;
8918 expr = to_enclosed_expression(id: base, register_expression_read);
8919 if (e)
8920 e->need_transpose = old_transpose;
8921 }
8922
8923 // Start traversing type hierarchy at the proper non-pointer types,
8924 // but keep type_id referencing the original pointer for use below.
8925 uint32_t type_id = expression_type_id(id: base);
8926
8927 if (!backend.native_pointers)
8928 {
8929 if (ptr_chain)
8930 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8931
8932 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8933 // continuing the access chain.
8934 if (should_dereference(id: base))
8935 {
8936 auto &type = get<SPIRType>(id: type_id);
8937 expr = dereference_expression(expr_type: type, expr);
8938 }
8939 }
8940
8941 const auto *type = &get_pointee_type(type_id);
8942
8943 bool access_chain_is_arrayed = expr.find_first_of(c: '[') != string::npos;
8944 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(id: base);
8945 bool is_packed = has_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypePacked);
8946 uint32_t physical_type = get_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypeID);
8947 bool is_invariant = has_decoration(id: base, decoration: DecorationInvariant);
8948 bool relaxed_precision = has_decoration(id: base, decoration: DecorationRelaxedPrecision);
8949 bool pending_array_enclose = false;
8950 bool dimension_flatten = false;
8951
8952 const auto append_index = [&](uint32_t index, bool is_literal) {
8953 AccessChainFlags mod_flags = flags;
8954 if (!is_literal)
8955 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8956 access_chain_internal_append_index(expr, base, type, flags: mod_flags, access_chain_is_arrayed, index);
8957 };
8958
8959 for (uint32_t i = 0; i < count; i++)
8960 {
8961 uint32_t index = indices[i];
8962
8963 bool is_literal = index_is_literal;
8964 if (is_literal && msb_is_id && (index >> 31u) != 0u)
8965 {
8966 is_literal = false;
8967 index &= 0x7fffffffu;
8968 }
8969
8970 // Pointer chains
8971 if (ptr_chain && i == 0)
8972 {
8973 // If we are flattening multidimensional arrays, only create opening bracket on first
8974 // array index.
8975 if (options.flatten_multidimensional_arrays)
8976 {
8977 dimension_flatten = type->array.size() >= 1;
8978 pending_array_enclose = dimension_flatten;
8979 if (pending_array_enclose)
8980 expr += "[";
8981 }
8982
8983 if (options.flatten_multidimensional_arrays && dimension_flatten)
8984 {
8985 // If we are flattening multidimensional arrays, do manual stride computation.
8986 if (is_literal)
8987 expr += convert_to_string(t: index);
8988 else
8989 expr += to_enclosed_expression(id: index, register_expression_read);
8990
8991 for (auto j = uint32_t(type->array.size()); j; j--)
8992 {
8993 expr += " * ";
8994 expr += enclose_expression(expr: to_array_size(type: *type, index: j - 1));
8995 }
8996
8997 if (type->array.empty())
8998 pending_array_enclose = false;
8999 else
9000 expr += " + ";
9001
9002 if (!pending_array_enclose)
9003 expr += "]";
9004 }
9005 else
9006 {
9007 append_index(index, is_literal);
9008 }
9009
9010 if (type->basetype == SPIRType::ControlPointArray)
9011 {
9012 type_id = type->parent_type;
9013 type = &get<SPIRType>(id: type_id);
9014 }
9015
9016 access_chain_is_arrayed = true;
9017 }
9018 // Arrays
9019 else if (!type->array.empty())
9020 {
9021 // If we are flattening multidimensional arrays, only create opening bracket on first
9022 // array index.
9023 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
9024 {
9025 dimension_flatten = type->array.size() > 1;
9026 pending_array_enclose = dimension_flatten;
9027 if (pending_array_enclose)
9028 expr += "[";
9029 }
9030
9031 assert(type->parent_type);
9032
9033 auto *var = maybe_get<SPIRVariable>(id: base);
9034 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(var: *var) &&
9035 !has_decoration(id: type->self, decoration: DecorationBlock))
9036 {
9037 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
9038 // Normally, these variables live in blocks when compiled from GLSL,
9039 // but HLSL seems to just emit straight arrays here.
9040 // We must pretend this access goes through gl_in/gl_out arrays
9041 // to be able to access certain builtins as arrays.
9042 auto builtin = ir.meta[base].decoration.builtin_type;
9043 switch (builtin)
9044 {
9045 // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
9046 // case BuiltInClipDistance:
9047 case BuiltInPosition:
9048 case BuiltInPointSize:
9049 if (var->storage == StorageClassInput)
9050 expr = join(ts: "gl_in[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
9051 else if (var->storage == StorageClassOutput)
9052 expr = join(ts: "gl_out[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
9053 else
9054 append_index(index, is_literal);
9055 break;
9056
9057 default:
9058 append_index(index, is_literal);
9059 break;
9060 }
9061 }
9062 else if (options.flatten_multidimensional_arrays && dimension_flatten)
9063 {
9064 // If we are flattening multidimensional arrays, do manual stride computation.
9065 auto &parent_type = get<SPIRType>(id: type->parent_type);
9066
9067 if (is_literal)
9068 expr += convert_to_string(t: index);
9069 else
9070 expr += to_enclosed_expression(id: index, register_expression_read);
9071
9072 for (auto j = uint32_t(parent_type.array.size()); j; j--)
9073 {
9074 expr += " * ";
9075 expr += enclose_expression(expr: to_array_size(type: parent_type, index: j - 1));
9076 }
9077
9078 if (parent_type.array.empty())
9079 pending_array_enclose = false;
9080 else
9081 expr += " + ";
9082
9083 if (!pending_array_enclose)
9084 expr += "]";
9085 }
9086 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
9087 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
9088 else if (!builtin_translates_to_nonarray(builtin: BuiltIn(get_decoration(id: base, decoration: DecorationBuiltIn))))
9089 {
9090 append_index(index, is_literal);
9091 }
9092
9093 type_id = type->parent_type;
9094 type = &get<SPIRType>(id: type_id);
9095
9096 access_chain_is_arrayed = true;
9097 }
9098 // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
9099 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9100 else if (type->basetype == SPIRType::Struct)
9101 {
9102 if (!is_literal)
9103 index = evaluate_constant_u32(id: index);
9104
9105 if (index < uint32_t(type->member_type_index_redirection.size()))
9106 index = type->member_type_index_redirection[index];
9107
9108 if (index >= type->member_types.size())
9109 SPIRV_CROSS_THROW("Member index is out of bounds!");
9110
9111 BuiltIn builtin;
9112 if (is_member_builtin(type: *type, index, builtin: &builtin) && access_chain_needs_stage_io_builtin_translation(base))
9113 {
9114 if (access_chain_is_arrayed)
9115 {
9116 expr += ".";
9117 expr += builtin_to_glsl(builtin, storage: type->storage);
9118 }
9119 else
9120 expr = builtin_to_glsl(builtin, storage: type->storage);
9121 }
9122 else
9123 {
9124 // If the member has a qualified name, use it as the entire chain
9125 string qual_mbr_name = get_member_qualified_name(type_id, index);
9126 if (!qual_mbr_name.empty())
9127 expr = qual_mbr_name;
9128 else if (flatten_member_reference)
9129 expr += join(ts: "_", ts: to_member_name(type: *type, index));
9130 else
9131 expr += to_member_reference(base, type: *type, index, ptr_chain);
9132 }
9133
9134 if (has_member_decoration(id: type->self, index, decoration: DecorationInvariant))
9135 is_invariant = true;
9136 if (has_member_decoration(id: type->self, index, decoration: DecorationRelaxedPrecision))
9137 relaxed_precision = true;
9138
9139 is_packed = member_is_packed_physical_type(type: *type, index);
9140 if (member_is_remapped_physical_type(type: *type, index))
9141 physical_type = get_extended_member_decoration(type: type->self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
9142 else
9143 physical_type = 0;
9144
9145 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(type: *type, index);
9146 type = &get<SPIRType>(id: type->member_types[index]);
9147 }
9148 // Matrix -> Vector
9149 else if (type->columns > 1)
9150 {
9151 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
9152 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
9153 // by flipping indexing order of the matrix.
9154
9155 expr += "[";
9156 if (is_literal)
9157 expr += convert_to_string(t: index);
9158 else
9159 expr += to_unpacked_expression(id: index, register_expression_read);
9160 expr += "]";
9161
9162 type_id = type->parent_type;
9163 type = &get<SPIRType>(id: type_id);
9164 }
9165 // Vector -> Scalar
9166 else if (type->vecsize > 1)
9167 {
9168 string deferred_index;
9169 if (row_major_matrix_needs_conversion)
9170 {
9171 // Flip indexing order.
9172 auto column_index = expr.find_last_of(c: '[');
9173 if (column_index != string::npos)
9174 {
9175 deferred_index = expr.substr(pos: column_index);
9176 expr.resize(n: column_index);
9177 }
9178 }
9179
9180 // Internally, access chain implementation can also be used on composites,
9181 // ignore scalar access workarounds in this case.
9182 StorageClass effective_storage = StorageClassGeneric;
9183 bool ignore_potential_sliced_writes = false;
9184 if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
9185 {
9186 if (expression_type(id: base).pointer)
9187 effective_storage = get_expression_effective_storage_class(ptr: base);
9188
9189 // Special consideration for control points.
9190 // Control points can only be written by InvocationID, so there is no need
9191 // to consider scalar access chains here.
9192 // Cleans up some cases where it's very painful to determine the accurate storage class
9193 // since blocks can be partially masked ...
9194 auto *var = maybe_get_backing_variable(chain: base);
9195 if (var && var->storage == StorageClassOutput &&
9196 get_execution_model() == ExecutionModelTessellationControl &&
9197 !has_decoration(id: var->self, decoration: DecorationPatch))
9198 {
9199 ignore_potential_sliced_writes = true;
9200 }
9201 }
9202 else
9203 ignore_potential_sliced_writes = true;
9204
9205 if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
9206 {
9207 // On some backends, we might not be able to safely access individual scalars in a vector.
9208 // To work around this, we might have to cast the access chain reference to something which can,
9209 // like a pointer to scalar, which we can then index into.
9210 prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
9211 is_packed);
9212 }
9213
9214 if (is_literal)
9215 {
9216 bool out_of_bounds = (index >= type->vecsize);
9217
9218 if (!is_packed && !row_major_matrix_needs_conversion)
9219 {
9220 expr += ".";
9221 expr += index_to_swizzle(index: out_of_bounds ? 0 : index);
9222 }
9223 else
9224 {
9225 // For packed vectors, we can only access them as an array, not by swizzle.
9226 expr += join(ts: "[", ts: out_of_bounds ? 0 : index, ts: "]");
9227 }
9228 }
9229 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
9230 {
9231 auto &c = get<SPIRConstant>(id: index);
9232 bool out_of_bounds = (c.scalar() >= type->vecsize);
9233
9234 if (c.specialization)
9235 {
9236 // If the index is a spec constant, we cannot turn extract into a swizzle.
9237 expr += join(ts: "[", ts: out_of_bounds ? "0" : to_expression(id: index), ts: "]");
9238 }
9239 else
9240 {
9241 expr += ".";
9242 expr += index_to_swizzle(index: out_of_bounds ? 0 : c.scalar());
9243 }
9244 }
9245 else
9246 {
9247 expr += "[";
9248 expr += to_unpacked_expression(id: index, register_expression_read);
9249 expr += "]";
9250 }
9251
9252 if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
9253 {
9254 prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
9255 is_packed);
9256 }
9257
9258 expr += deferred_index;
9259 row_major_matrix_needs_conversion = false;
9260
9261 is_packed = false;
9262 physical_type = 0;
9263 type_id = type->parent_type;
9264 type = &get<SPIRType>(id: type_id);
9265 }
9266 else if (!backend.allow_truncated_access_chain)
9267 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9268 }
9269
9270 if (pending_array_enclose)
9271 {
9272 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
9273 "but the access chain was terminated in the middle of a multidimensional array. "
9274 "This is not supported.");
9275 }
9276
9277 if (meta)
9278 {
9279 meta->need_transpose = row_major_matrix_needs_conversion;
9280 meta->storage_is_packed = is_packed;
9281 meta->storage_is_invariant = is_invariant;
9282 meta->storage_physical_type = physical_type;
9283 meta->relaxed_precision = relaxed_precision;
9284 }
9285
9286 return expr;
9287}
9288
9289void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
9290{
9291}
9292
9293string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
9294{
9295 auto ret = join(ts: basename, ts: "_", ts: to_member_name(type, index));
9296 ParsedIR::sanitize_underscores(str&: ret);
9297 return ret;
9298}
9299
9300string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
9301 AccessChainMeta *meta, bool ptr_chain)
9302{
9303 if (flattened_buffer_blocks.count(x: base))
9304 {
9305 uint32_t matrix_stride = 0;
9306 uint32_t array_stride = 0;
9307 bool need_transpose = false;
9308 flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset: 0, word_stride: 16, need_transpose: &need_transpose, matrix_stride: &matrix_stride,
9309 array_stride: &array_stride, ptr_chain);
9310
9311 if (meta)
9312 {
9313 meta->need_transpose = target_type.columns > 1 && need_transpose;
9314 meta->storage_is_packed = false;
9315 }
9316
9317 return flattened_access_chain(base, indices, count, target_type, offset: 0, matrix_stride, array_stride,
9318 need_transpose);
9319 }
9320 else if (flattened_structs.count(x: base) && count > 0)
9321 {
9322 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9323 if (ptr_chain)
9324 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
9325
9326 if (flattened_structs[base])
9327 {
9328 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
9329 if (meta)
9330 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
9331 }
9332
9333 auto chain = access_chain_internal(base, indices, count, flags, meta: nullptr).substr(pos: 1);
9334 if (meta)
9335 {
9336 meta->need_transpose = false;
9337 meta->storage_is_packed = false;
9338 }
9339
9340 auto basename = to_flattened_access_chain_expression(id: base);
9341 auto ret = join(ts&: basename, ts: "_", ts&: chain);
9342 ParsedIR::sanitize_underscores(str&: ret);
9343 return ret;
9344 }
9345 else
9346 {
9347 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9348 if (ptr_chain)
9349 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
9350 return access_chain_internal(base, indices, count, flags, meta);
9351 }
9352}
9353
9354string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
9355{
9356 auto expr = type_to_glsl_constructor(type);
9357 expr += '(';
9358
9359 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
9360 {
9361 if (i)
9362 expr += ", ";
9363
9364 auto &member_type = get<SPIRType>(id: type.member_types[i]);
9365 if (member_type.basetype == SPIRType::Struct)
9366 expr += load_flattened_struct(basename: to_flattened_struct_member(basename, type, index: i), type: member_type);
9367 else
9368 expr += to_flattened_struct_member(basename, type, index: i);
9369 }
9370 expr += ')';
9371 return expr;
9372}
9373
9374std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
9375{
9376 // Do not use to_expression as that will unflatten access chains.
9377 string basename;
9378 if (const auto *var = maybe_get<SPIRVariable>(id))
9379 basename = to_name(id: var->self);
9380 else if (const auto *expr = maybe_get<SPIRExpression>(id))
9381 basename = expr->expression;
9382 else
9383 basename = to_expression(id);
9384
9385 return basename;
9386}
9387
9388void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
9389 const SmallVector<uint32_t> &indices)
9390{
9391 SmallVector<uint32_t> sub_indices = indices;
9392 sub_indices.push_back(t: 0);
9393
9394 auto *member_type = &type;
9395 for (auto &index : indices)
9396 member_type = &get<SPIRType>(id: member_type->member_types[index]);
9397
9398 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
9399 {
9400 sub_indices.back() = i;
9401 auto lhs = join(ts: basename, ts: "_", ts: to_member_name(type: *member_type, index: i));
9402 ParsedIR::sanitize_underscores(str&: lhs);
9403
9404 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
9405 {
9406 store_flattened_struct(basename: lhs, rhs_id, type, indices: sub_indices);
9407 }
9408 else
9409 {
9410 auto rhs = to_expression(id: rhs_id) + to_multi_member_reference(type, indices: sub_indices);
9411 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
9412 }
9413 }
9414}
9415
9416void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
9417{
9418 auto &type = expression_type(id: lhs_id);
9419 auto basename = to_flattened_access_chain_expression(id: lhs_id);
9420 store_flattened_struct(basename, rhs_id: value, type, indices: {});
9421}
9422
9423std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
9424 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
9425 uint32_t /* array_stride */, bool need_transpose)
9426{
9427 if (!target_type.array.empty())
9428 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
9429 else if (target_type.basetype == SPIRType::Struct)
9430 return flattened_access_chain_struct(base, indices, count, target_type, offset);
9431 else if (target_type.columns > 1)
9432 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9433 else
9434 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9435}
9436
9437std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
9438 const SPIRType &target_type, uint32_t offset)
9439{
9440 std::string expr;
9441
9442 if (backend.can_declare_struct_inline)
9443 {
9444 expr += type_to_glsl_constructor(type: target_type);
9445 expr += "(";
9446 }
9447 else
9448 expr += "{";
9449
9450 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
9451 {
9452 if (i != 0)
9453 expr += ", ";
9454
9455 const SPIRType &member_type = get<SPIRType>(id: target_type.member_types[i]);
9456 uint32_t member_offset = type_struct_member_offset(type: target_type, index: i);
9457
9458 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
9459 // ahead of time.
9460 bool need_transpose = false;
9461 uint32_t matrix_stride = 0;
9462 if (member_type.columns > 1)
9463 {
9464 need_transpose = combined_decoration_for_member(type: target_type, index: i).get(bit: DecorationRowMajor);
9465 matrix_stride = type_struct_member_matrix_stride(type: target_type, index: i);
9466 }
9467
9468 auto tmp = flattened_access_chain(base, indices, count, target_type: member_type, offset: offset + member_offset, matrix_stride,
9469 0 /* array_stride */, need_transpose);
9470
9471 // Cannot forward transpositions, so resolve them here.
9472 if (need_transpose)
9473 expr += convert_row_major_matrix(exp_str: tmp, exp_type: member_type, physical_type_id: 0, is_packed: false);
9474 else
9475 expr += tmp;
9476 }
9477
9478 expr += backend.can_declare_struct_inline ? ")" : "}";
9479
9480 return expr;
9481}
9482
9483std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
9484 const SPIRType &target_type, uint32_t offset,
9485 uint32_t matrix_stride, bool need_transpose)
9486{
9487 assert(matrix_stride);
9488 SPIRType tmp_type = target_type;
9489 if (need_transpose)
9490 swap(a&: tmp_type.vecsize, b&: tmp_type.columns);
9491
9492 std::string expr;
9493
9494 expr += type_to_glsl_constructor(type: tmp_type);
9495 expr += "(";
9496
9497 for (uint32_t i = 0; i < tmp_type.columns; i++)
9498 {
9499 if (i != 0)
9500 expr += ", ";
9501
9502 expr += flattened_access_chain_vector(base, indices, count, target_type: tmp_type, offset: offset + i * matrix_stride, matrix_stride,
9503 /* need_transpose= */ false);
9504 }
9505
9506 expr += ")";
9507
9508 return expr;
9509}
9510
9511std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
9512 const SPIRType &target_type, uint32_t offset,
9513 uint32_t matrix_stride, bool need_transpose)
9514{
9515 auto result = flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset, word_stride: 16);
9516
9517 auto buffer_name = to_name(id: expression_type(id: base).self);
9518
9519 if (need_transpose)
9520 {
9521 std::string expr;
9522
9523 if (target_type.vecsize > 1)
9524 {
9525 expr += type_to_glsl_constructor(type: target_type);
9526 expr += "(";
9527 }
9528
9529 for (uint32_t i = 0; i < target_type.vecsize; ++i)
9530 {
9531 if (i != 0)
9532 expr += ", ";
9533
9534 uint32_t component_offset = result.second + i * matrix_stride;
9535
9536 assert(component_offset % (target_type.width / 8) == 0);
9537 uint32_t index = component_offset / (target_type.width / 8);
9538
9539 expr += buffer_name;
9540 expr += "[";
9541 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9542 expr += convert_to_string(t: index / 4);
9543 expr += "]";
9544
9545 expr += vector_swizzle(vecsize: 1, index: index % 4);
9546 }
9547
9548 if (target_type.vecsize > 1)
9549 {
9550 expr += ")";
9551 }
9552
9553 return expr;
9554 }
9555 else
9556 {
9557 assert(result.second % (target_type.width / 8) == 0);
9558 uint32_t index = result.second / (target_type.width / 8);
9559
9560 std::string expr;
9561
9562 expr += buffer_name;
9563 expr += "[";
9564 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9565 expr += convert_to_string(t: index / 4);
9566 expr += "]";
9567
9568 expr += vector_swizzle(vecsize: target_type.vecsize, index: index % 4);
9569
9570 return expr;
9571 }
9572}
9573
9574std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
9575 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
9576 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
9577{
9578 // Start traversing type hierarchy at the proper non-pointer types.
9579 const auto *type = &get_pointee_type(type: basetype);
9580
9581 std::string expr;
9582
9583 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
9584 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
9585 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
9586 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
9587
9588 for (uint32_t i = 0; i < count; i++)
9589 {
9590 uint32_t index = indices[i];
9591
9592 // Pointers
9593 if (ptr_chain && i == 0)
9594 {
9595 // Here, the pointer type will be decorated with an array stride.
9596 array_stride = get_decoration(id: basetype.self, decoration: DecorationArrayStride);
9597 if (!array_stride)
9598 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
9599
9600 auto *constant = maybe_get<SPIRConstant>(id: index);
9601 if (constant)
9602 {
9603 // Constant array access.
9604 offset += constant->scalar() * array_stride;
9605 }
9606 else
9607 {
9608 // Dynamic array access.
9609 if (array_stride % word_stride)
9610 {
9611 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9612 "of a 4-component vector. "
9613 "Likely culprit here is a float or vec2 array inside a push "
9614 "constant block which is std430. "
9615 "This cannot be flattened. Try using std140 layout instead.");
9616 }
9617
9618 expr += to_enclosed_expression(id: index);
9619 expr += " * ";
9620 expr += convert_to_string(t: array_stride / word_stride);
9621 expr += " + ";
9622 }
9623 }
9624 // Arrays
9625 else if (!type->array.empty())
9626 {
9627 auto *constant = maybe_get<SPIRConstant>(id: index);
9628 if (constant)
9629 {
9630 // Constant array access.
9631 offset += constant->scalar() * array_stride;
9632 }
9633 else
9634 {
9635 // Dynamic array access.
9636 if (array_stride % word_stride)
9637 {
9638 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9639 "of a 4-component vector. "
9640 "Likely culprit here is a float or vec2 array inside a push "
9641 "constant block which is std430. "
9642 "This cannot be flattened. Try using std140 layout instead.");
9643 }
9644
9645 expr += to_enclosed_expression(id: index, register_expression_read: false);
9646 expr += " * ";
9647 expr += convert_to_string(t: array_stride / word_stride);
9648 expr += " + ";
9649 }
9650
9651 uint32_t parent_type = type->parent_type;
9652 type = &get<SPIRType>(id: parent_type);
9653
9654 if (!type->array.empty())
9655 array_stride = get_decoration(id: parent_type, decoration: DecorationArrayStride);
9656 }
9657 // For structs, the index refers to a constant, which indexes into the members.
9658 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9659 else if (type->basetype == SPIRType::Struct)
9660 {
9661 index = evaluate_constant_u32(id: index);
9662
9663 if (index >= type->member_types.size())
9664 SPIRV_CROSS_THROW("Member index is out of bounds!");
9665
9666 offset += type_struct_member_offset(type: *type, index);
9667
9668 auto &struct_type = *type;
9669 type = &get<SPIRType>(id: type->member_types[index]);
9670
9671 if (type->columns > 1)
9672 {
9673 matrix_stride = type_struct_member_matrix_stride(type: struct_type, index);
9674 row_major_matrix_needs_conversion =
9675 combined_decoration_for_member(type: struct_type, index).get(bit: DecorationRowMajor);
9676 }
9677 else
9678 row_major_matrix_needs_conversion = false;
9679
9680 if (!type->array.empty())
9681 array_stride = type_struct_member_array_stride(type: struct_type, index);
9682 }
9683 // Matrix -> Vector
9684 else if (type->columns > 1)
9685 {
9686 auto *constant = maybe_get<SPIRConstant>(id: index);
9687 if (constant)
9688 {
9689 index = evaluate_constant_u32(id: index);
9690 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9691 }
9692 else
9693 {
9694 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9695 // Dynamic array access.
9696 if (indexing_stride % word_stride)
9697 {
9698 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9699 "4-component vector. "
9700 "Likely culprit here is a row-major matrix being accessed dynamically. "
9701 "This cannot be flattened. Try using std140 layout instead.");
9702 }
9703
9704 expr += to_enclosed_expression(id: index, register_expression_read: false);
9705 expr += " * ";
9706 expr += convert_to_string(t: indexing_stride / word_stride);
9707 expr += " + ";
9708 }
9709
9710 type = &get<SPIRType>(id: type->parent_type);
9711 }
9712 // Vector -> Scalar
9713 else if (type->vecsize > 1)
9714 {
9715 auto *constant = maybe_get<SPIRConstant>(id: index);
9716 if (constant)
9717 {
9718 index = evaluate_constant_u32(id: index);
9719 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9720 }
9721 else
9722 {
9723 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9724
9725 // Dynamic array access.
9726 if (indexing_stride % word_stride)
9727 {
9728 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9729 "size of a 4-component vector. "
9730 "This cannot be flattened in legacy targets.");
9731 }
9732
9733 expr += to_enclosed_expression(id: index, register_expression_read: false);
9734 expr += " * ";
9735 expr += convert_to_string(t: indexing_stride / word_stride);
9736 expr += " + ";
9737 }
9738
9739 type = &get<SPIRType>(id: type->parent_type);
9740 }
9741 else
9742 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9743 }
9744
9745 if (need_transpose)
9746 *need_transpose = row_major_matrix_needs_conversion;
9747 if (out_matrix_stride)
9748 *out_matrix_stride = matrix_stride;
9749 if (out_array_stride)
9750 *out_array_stride = array_stride;
9751
9752 return std::make_pair(x&: expr, y&: offset);
9753}
9754
9755bool CompilerGLSL::should_dereference(uint32_t id)
9756{
9757 const auto &type = expression_type(id);
9758 // Non-pointer expressions don't need to be dereferenced.
9759 if (!type.pointer)
9760 return false;
9761
9762 // Handles shouldn't be dereferenced either.
9763 if (!expression_is_lvalue(id))
9764 return false;
9765
9766 // If id is a variable but not a phi variable, we should not dereference it.
9767 if (auto *var = maybe_get<SPIRVariable>(id))
9768 return var->phi_variable;
9769
9770 // If id is an access chain, we should not dereference it.
9771 if (auto *expr = maybe_get<SPIRExpression>(id))
9772 return !expr->access_chain;
9773
9774 // Otherwise, we should dereference this pointer expression.
9775 return true;
9776}
9777
9778bool CompilerGLSL::should_forward(uint32_t id) const
9779{
9780 // If id is a variable we will try to forward it regardless of force_temporary check below
9781 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9782
9783 auto *var = maybe_get<SPIRVariable>(id);
9784 if (var)
9785 {
9786 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
9787 return !(has_decoration(id, decoration: DecorationBuiltIn) && has_decoration(id, decoration: DecorationVolatile));
9788 }
9789
9790 // For debugging emit temporary variables for all expressions
9791 if (options.force_temporary)
9792 return false;
9793
9794 // If an expression carries enough dependencies we need to stop forwarding at some point,
9795 // or we explode compilers. There are usually limits to how much we can nest expressions.
9796 auto *expr = maybe_get<SPIRExpression>(id);
9797 const uint32_t max_expression_dependencies = 64;
9798 if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
9799 return false;
9800
9801 if (expr && expr->loaded_from
9802 && has_decoration(id: expr->loaded_from, decoration: DecorationBuiltIn)
9803 && has_decoration(id: expr->loaded_from, decoration: DecorationVolatile))
9804 {
9805 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
9806 return false;
9807 }
9808
9809 // Immutable expression can always be forwarded.
9810 if (is_immutable(id))
9811 return true;
9812
9813 return false;
9814}
9815
9816bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9817{
9818 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9819 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9820}
9821
9822void CompilerGLSL::track_expression_read(uint32_t id)
9823{
9824 switch (ir.ids[id].get_type())
9825 {
9826 case TypeExpression:
9827 {
9828 auto &e = get<SPIRExpression>(id);
9829 for (auto implied_read : e.implied_read_expressions)
9830 track_expression_read(id: implied_read);
9831 break;
9832 }
9833
9834 case TypeAccessChain:
9835 {
9836 auto &e = get<SPIRAccessChain>(id);
9837 for (auto implied_read : e.implied_read_expressions)
9838 track_expression_read(id: implied_read);
9839 break;
9840 }
9841
9842 default:
9843 break;
9844 }
9845
9846 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9847 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9848 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9849 {
9850 auto &v = expression_usage_counts[id];
9851 v++;
9852
9853 // If we create an expression outside a loop,
9854 // but access it inside a loop, we're implicitly reading it multiple times.
9855 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9856 // working inside the backend compiler.
9857 if (expression_read_implies_multiple_reads(id))
9858 v++;
9859
9860 if (v >= 2)
9861 {
9862 //if (v == 2)
9863 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9864
9865 // Force a recompile after this pass to avoid forwarding this variable.
9866 force_temporary_and_recompile(id);
9867 }
9868 }
9869}
9870
9871bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9872{
9873 if (forced_temporaries.find(x: id) != end(cont&: forced_temporaries))
9874 return false;
9875
9876 for (uint32_t i = 0; i < num_args; i++)
9877 if (!should_forward(id: args[i]))
9878 return false;
9879
9880 // We need to forward globals as well.
9881 if (!pure)
9882 {
9883 for (auto global : global_variables)
9884 if (!should_forward(id: global))
9885 return false;
9886 for (auto aliased : aliased_variables)
9887 if (!should_forward(id: aliased))
9888 return false;
9889 }
9890
9891 return true;
9892}
9893
9894void CompilerGLSL::register_impure_function_call()
9895{
9896 // Impure functions can modify globals and aliased variables, so invalidate them as well.
9897 for (auto global : global_variables)
9898 flush_dependees(var&: get<SPIRVariable>(id: global));
9899 for (auto aliased : aliased_variables)
9900 flush_dependees(var&: get<SPIRVariable>(id: aliased));
9901}
9902
9903void CompilerGLSL::register_call_out_argument(uint32_t id)
9904{
9905 register_write(chain: id);
9906
9907 auto *var = maybe_get<SPIRVariable>(id);
9908 if (var)
9909 flush_variable_declaration(id: var->self);
9910}
9911
9912string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9913{
9914 // These variables are always function local,
9915 // so make sure we emit the variable without storage qualifiers.
9916 // Some backends will inject custom variables locally in a function
9917 // with a storage qualifier which is not function-local.
9918 auto old_storage = var.storage;
9919 var.storage = StorageClassFunction;
9920 auto expr = variable_decl(variable: var);
9921 var.storage = old_storage;
9922 return expr;
9923}
9924
9925void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9926{
9927 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9928 if (var.allocate_temporary_copy && !flushed_phi_variables.count(x: var.self))
9929 {
9930 auto &type = get<SPIRType>(id: var.basetype);
9931 auto &flags = get_decoration_bitset(id: var.self);
9932 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: join(ts: "_", ts: var.self, ts: "_copy")), ts: ";");
9933 flushed_phi_variables.insert(x: var.self);
9934 }
9935}
9936
9937void CompilerGLSL::flush_variable_declaration(uint32_t id)
9938{
9939 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9940 auto *var = maybe_get<SPIRVariable>(id);
9941 if (var && var->deferred_declaration)
9942 {
9943 string initializer;
9944 if (options.force_zero_initialized_variables &&
9945 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9946 var->storage == StorageClassPrivate) &&
9947 !var->initializer && type_can_zero_initialize(type: get_variable_data_type(var: *var)))
9948 {
9949 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: *var)));
9950 }
9951
9952 statement(ts: variable_decl_function_local(var&: *var), ts&: initializer, ts: ";");
9953 var->deferred_declaration = false;
9954 }
9955 if (var)
9956 {
9957 emit_variable_temporary_copies(var: *var);
9958 }
9959}
9960
9961bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9962{
9963 auto pos = op.find_last_of(c: '.');
9964 if (pos == string::npos || pos == 0)
9965 return false;
9966
9967 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
9968
9969 if (backend.swizzle_is_function)
9970 {
9971 if (final_swiz.size() < 2)
9972 return false;
9973
9974 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
9975 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
9976 else
9977 return false;
9978 }
9979
9980 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9981 // If so, and previous swizzle is of same length,
9982 // we can drop the final swizzle altogether.
9983 for (uint32_t i = 0; i < final_swiz.size(); i++)
9984 {
9985 static const char expected[] = { 'x', 'y', 'z', 'w' };
9986 if (i >= 4 || final_swiz[i] != expected[i])
9987 return false;
9988 }
9989
9990 auto prevpos = op.find_last_of(c: '.', pos: pos - 1);
9991 if (prevpos == string::npos)
9992 return false;
9993
9994 prevpos++;
9995
9996 // Make sure there are only swizzles here ...
9997 for (auto i = prevpos; i < pos; i++)
9998 {
9999 if (op[i] < 'w' || op[i] > 'z')
10000 {
10001 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
10002 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
10003 break;
10004 return false;
10005 }
10006 }
10007
10008 // If original swizzle is large enough, just carve out the components we need.
10009 // E.g. foobar.wyx.xy will turn into foobar.wy.
10010 if (pos - prevpos >= final_swiz.size())
10011 {
10012 op.erase(pos: prevpos + final_swiz.size(), n: string::npos);
10013
10014 // Add back the function call ...
10015 if (backend.swizzle_is_function)
10016 op += "()";
10017 }
10018 return true;
10019}
10020
10021// Optimizes away vector swizzles where we have something like
10022// vec3 foo;
10023// foo.xyz <-- swizzle expression does nothing.
10024// This is a very common pattern after OpCompositeCombine.
10025bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
10026{
10027 auto pos = op.find_last_of(c: '.');
10028 if (pos == string::npos || pos == 0)
10029 return false;
10030
10031 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
10032
10033 if (backend.swizzle_is_function)
10034 {
10035 if (final_swiz.size() < 2)
10036 return false;
10037
10038 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
10039 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
10040 else
10041 return false;
10042 }
10043
10044 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
10045 // If so, and previous swizzle is of same length,
10046 // we can drop the final swizzle altogether.
10047 for (uint32_t i = 0; i < final_swiz.size(); i++)
10048 {
10049 static const char expected[] = { 'x', 'y', 'z', 'w' };
10050 if (i >= 4 || final_swiz[i] != expected[i])
10051 return false;
10052 }
10053
10054 auto &type = expression_type(id: base);
10055
10056 // Sanity checking ...
10057 assert(type.columns == 1 && type.array.empty());
10058
10059 if (type.vecsize == final_swiz.size())
10060 op.erase(pos: pos, n: string::npos);
10061 return true;
10062}
10063
10064string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
10065{
10066 ID base = 0;
10067 string op;
10068 string subop;
10069
10070 // Can only merge swizzles for vectors.
10071 auto &type = get<SPIRType>(id: return_type);
10072 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
10073 bool swizzle_optimization = false;
10074
10075 for (uint32_t i = 0; i < length; i++)
10076 {
10077 auto *e = maybe_get<SPIRExpression>(id: elems[i]);
10078
10079 // If we're merging another scalar which belongs to the same base
10080 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
10081 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
10082 {
10083 // Only supposed to be used for vector swizzle -> scalar.
10084 assert(!e->expression.empty() && e->expression.front() == '.');
10085 subop += e->expression.substr(pos: 1, n: string::npos);
10086 swizzle_optimization = true;
10087 }
10088 else
10089 {
10090 // We'll likely end up with duplicated swizzles, e.g.
10091 // foobar.xyz.xyz from patterns like
10092 // OpVectorShuffle
10093 // OpCompositeExtract x 3
10094 // OpCompositeConstruct 3x + other scalar.
10095 // Just modify op in-place.
10096 if (swizzle_optimization)
10097 {
10098 if (backend.swizzle_is_function)
10099 subop += "()";
10100
10101 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
10102 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
10103 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
10104 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
10105 // Case 1:
10106 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
10107 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
10108 // Case 2:
10109 // foo.xyz: Duplicate swizzle won't kick in.
10110 // If foo is vec3, we can remove xyz, giving just foo.
10111 if (!remove_duplicate_swizzle(op&: subop))
10112 remove_unity_swizzle(base, op&: subop);
10113
10114 // Strips away redundant parens if we created them during component extraction.
10115 strip_enclosed_expression(expr&: subop);
10116 swizzle_optimization = false;
10117 op += subop;
10118 }
10119 else
10120 op += subop;
10121
10122 if (i)
10123 op += ", ";
10124
10125 bool uses_buffer_offset =
10126 type.basetype == SPIRType::Struct && has_member_decoration(id: type.self, index: i, decoration: DecorationOffset);
10127 subop = to_composite_constructor_expression(id: elems[i], block_like_type: uses_buffer_offset);
10128 }
10129
10130 base = e ? e->base_expression : ID(0);
10131 }
10132
10133 if (swizzle_optimization)
10134 {
10135 if (backend.swizzle_is_function)
10136 subop += "()";
10137
10138 if (!remove_duplicate_swizzle(op&: subop))
10139 remove_unity_swizzle(base, op&: subop);
10140 // Strips away redundant parens if we created them during component extraction.
10141 strip_enclosed_expression(expr&: subop);
10142 }
10143
10144 op += subop;
10145 return op;
10146}
10147
10148bool CompilerGLSL::skip_argument(uint32_t id) const
10149{
10150 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
10151 {
10152 auto &type = expression_type(id);
10153 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
10154 return true;
10155 }
10156 return false;
10157}
10158
10159bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
10160{
10161 // Do this with strings because we have a very clear pattern we can check for and it avoids
10162 // adding lots of special cases to the code emission.
10163 if (rhs.size() < lhs.size() + 3)
10164 return false;
10165
10166 // Do not optimize matrices. They are a bit awkward to reason about in general
10167 // (in which order does operation happen?), and it does not work on MSL anyways.
10168 if (type.vecsize > 1 && type.columns > 1)
10169 return false;
10170
10171 auto index = rhs.find(str: lhs);
10172 if (index != 0)
10173 return false;
10174
10175 // TODO: Shift operators, but it's not important for now.
10176 auto op = rhs.find_first_of(s: "+-/*%|&^", pos: lhs.size() + 1);
10177 if (op != lhs.size() + 1)
10178 return false;
10179
10180 // Check that the op is followed by space. This excludes && and ||.
10181 if (rhs[op + 1] != ' ')
10182 return false;
10183
10184 char bop = rhs[op];
10185 auto expr = rhs.substr(pos: lhs.size() + 3);
10186 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
10187 // Find some common patterns which are equivalent.
10188 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
10189 statement(ts: lhs, ts&: bop, ts&: bop, ts: ";");
10190 else
10191 statement(ts: lhs, ts: " ", ts&: bop, ts: "= ", ts&: expr, ts: ";");
10192 return true;
10193}
10194
10195void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
10196{
10197 if (forwarded_temporaries.find(x: expr) == end(cont&: forwarded_temporaries))
10198 return;
10199
10200 assert(current_emitting_block);
10201 current_emitting_block->invalidate_expressions.push_back(t: expr);
10202}
10203
10204void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
10205{
10206 current_emitting_block = &block;
10207
10208 if (backend.requires_relaxed_precision_analysis)
10209 {
10210 // If PHI variables are consumed in unexpected precision contexts, copy them here.
10211 for (auto &phi : block.phi_variables)
10212 {
10213 auto itr = temporary_to_mirror_precision_alias.find(x: phi.function_variable);
10214 if (itr != temporary_to_mirror_precision_alias.end())
10215 {
10216 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
10217 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
10218 EmbeddedInstruction inst;
10219 inst.op = OpCopyObject;
10220 inst.length = 3;
10221 inst.ops.push_back(t: expression_type_id(id: itr->first));
10222 inst.ops.push_back(t: itr->second);
10223 inst.ops.push_back(t: itr->first);
10224 emit_instruction(instr: inst);
10225 }
10226 }
10227 }
10228
10229 for (auto &op : block.ops)
10230 {
10231 auto temporary_copy = handle_instruction_precision(instr: op);
10232 emit_instruction(instr: op);
10233 if (temporary_copy.dst_id)
10234 {
10235 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
10236 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
10237 EmbeddedInstruction inst;
10238 inst.op = OpCopyObject;
10239 inst.length = 3;
10240 inst.ops.push_back(t: expression_type_id(id: temporary_copy.src_id));
10241 inst.ops.push_back(t: temporary_copy.dst_id);
10242 inst.ops.push_back(t: temporary_copy.src_id);
10243
10244 // Never attempt to hoist mirrored temporaries.
10245 // They are hoisted in lock-step with their parents.
10246 block_temporary_hoisting = true;
10247 emit_instruction(instr: inst);
10248 block_temporary_hoisting = false;
10249 }
10250 }
10251
10252 current_emitting_block = nullptr;
10253}
10254
10255void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
10256{
10257 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
10258 // these will be marked as having suppressed usage tracking.
10259 // Our only concern is to make sure arithmetic operations are done in similar ways.
10260 if (expression_is_forwarded(id: expr.self) && !expression_suppresses_usage_tracking(id: expr.self) &&
10261 forced_invariant_temporaries.count(x: expr.self) == 0)
10262 {
10263 force_temporary_and_recompile(id: expr.self);
10264 forced_invariant_temporaries.insert(x: expr.self);
10265
10266 for (auto &dependent : expr.expression_dependencies)
10267 disallow_forwarding_in_expression_chain(expr: get<SPIRExpression>(id: dependent));
10268 }
10269}
10270
10271void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
10272{
10273 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
10274 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
10275 // in one translation unit, but not another, e.g. due to multiple use of an expression.
10276 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
10277 // expressions to be temporaries.
10278 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
10279 // for all reasonable uses of invariant.
10280 if (!has_decoration(id: store_id, decoration: DecorationInvariant))
10281 return;
10282
10283 auto *expr = maybe_get<SPIRExpression>(id: value_id);
10284 if (!expr)
10285 return;
10286
10287 disallow_forwarding_in_expression_chain(expr: *expr);
10288}
10289
10290void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
10291{
10292 auto rhs = to_pointer_expression(id: rhs_expression);
10293
10294 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
10295 if (!rhs.empty())
10296 {
10297 handle_store_to_invariant_variable(store_id: lhs_expression, value_id: rhs_expression);
10298
10299 if (!unroll_array_to_complex_store(target_id: lhs_expression, source_id: rhs_expression))
10300 {
10301 auto lhs = to_dereferenced_expression(id: lhs_expression);
10302 if (has_decoration(id: lhs_expression, decoration: DecorationNonUniform))
10303 convert_non_uniform_expression(expr&: lhs, ptr_id: lhs_expression);
10304
10305 // We might need to cast in order to store to a builtin.
10306 cast_to_variable_store(target_id: lhs_expression, expr&: rhs, expr_type: expression_type(id: rhs_expression));
10307
10308 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
10309 // While this is purely cosmetic, this is important for legacy ESSL where loop
10310 // variable increments must be in either i++ or i += const-expr.
10311 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
10312 if (!optimize_read_modify_write(type: expression_type(id: rhs_expression), lhs, rhs))
10313 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
10314 }
10315 register_write(chain: lhs_expression);
10316 }
10317}
10318
10319uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
10320{
10321 if (instr.length < 3)
10322 return 32;
10323
10324 auto *ops = stream(instr);
10325
10326 switch (instr.op)
10327 {
10328 case OpSConvert:
10329 case OpConvertSToF:
10330 case OpUConvert:
10331 case OpConvertUToF:
10332 case OpIEqual:
10333 case OpINotEqual:
10334 case OpSLessThan:
10335 case OpSLessThanEqual:
10336 case OpSGreaterThan:
10337 case OpSGreaterThanEqual:
10338 case OpULessThan:
10339 case OpULessThanEqual:
10340 case OpUGreaterThan:
10341 case OpUGreaterThanEqual:
10342 return expression_type(id: ops[2]).width;
10343
10344 default:
10345 {
10346 // We can look at result type which is more robust.
10347 auto *type = maybe_get<SPIRType>(id: ops[0]);
10348 if (type && type_is_integral(type: *type))
10349 return type->width;
10350 else
10351 return 32;
10352 }
10353 }
10354}
10355
10356uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
10357{
10358 if (length < 1)
10359 return 32;
10360
10361 switch (op)
10362 {
10363 case GLSLstd450SAbs:
10364 case GLSLstd450SSign:
10365 case GLSLstd450UMin:
10366 case GLSLstd450SMin:
10367 case GLSLstd450UMax:
10368 case GLSLstd450SMax:
10369 case GLSLstd450UClamp:
10370 case GLSLstd450SClamp:
10371 case GLSLstd450FindSMsb:
10372 case GLSLstd450FindUMsb:
10373 return expression_type(id: ops[0]).width;
10374
10375 default:
10376 {
10377 // We don't need to care about other opcodes, just return 32.
10378 return 32;
10379 }
10380 }
10381}
10382
10383void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
10384{
10385 // Only GLSL supports RelaxedPrecision directly.
10386 // We cannot implement this in HLSL or MSL because it is tied to the type system.
10387 // In SPIR-V, everything must masquerade as 32-bit.
10388 if (!backend.requires_relaxed_precision_analysis)
10389 return;
10390
10391 auto input_precision = analyze_expression_precision(args, length);
10392
10393 // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
10394 // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
10395 if (input_precision == Options::Mediump)
10396 set_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
10397}
10398
10399CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
10400{
10401 // Now, analyze the precision at which the arguments would run.
10402 // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
10403 // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
10404 // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
10405 // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
10406 // correct precision.
10407 bool expression_has_highp = false;
10408 bool expression_has_mediump = false;
10409
10410 for (uint32_t i = 0; i < length; i++)
10411 {
10412 uint32_t arg = args[i];
10413
10414 auto handle_type = ir.ids[arg].get_type();
10415 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
10416 continue;
10417
10418 if (has_decoration(id: arg, decoration: DecorationRelaxedPrecision))
10419 expression_has_mediump = true;
10420 else
10421 expression_has_highp = true;
10422 }
10423
10424 if (expression_has_highp)
10425 return Options::Highp;
10426 else if (expression_has_mediump)
10427 return Options::Mediump;
10428 else
10429 return Options::DontCare;
10430}
10431
10432void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
10433{
10434 if (!backend.requires_relaxed_precision_analysis)
10435 return;
10436
10437 auto &type = get<SPIRType>(id: type_id);
10438
10439 // RelaxedPrecision only applies to 32-bit values.
10440 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
10441 return;
10442
10443 bool operation_is_highp = !has_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
10444
10445 auto input_precision = analyze_expression_precision(args, length);
10446 if (input_precision == Options::DontCare)
10447 {
10448 consume_temporary_in_precision_context(type_id, id: dst_id, precision: input_precision);
10449 return;
10450 }
10451
10452 // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
10453 // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
10454 // However, if the expression is not, inputs must be expanded to 32-bit first,
10455 // since the operation must run at high precision.
10456 // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
10457 // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
10458 // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
10459 if ((operation_is_highp && input_precision == Options::Mediump) ||
10460 (!operation_is_highp && input_precision == Options::Highp))
10461 {
10462 auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
10463 for (uint32_t i = 0; i < length; i++)
10464 {
10465 // Rewrites the opcode so that we consume an ID in correct precision context.
10466 // This is pretty hacky, but it's the most straight forward way of implementing this without adding
10467 // lots of extra passes to rewrite all code blocks.
10468 args[i] = consume_temporary_in_precision_context(type_id: expression_type_id(id: args[i]), id: args[i], precision);
10469 }
10470 }
10471}
10472
10473// This is probably not exhaustive ...
10474static bool opcode_is_precision_sensitive_operation(Op op)
10475{
10476 switch (op)
10477 {
10478 case OpFAdd:
10479 case OpFSub:
10480 case OpFMul:
10481 case OpFNegate:
10482 case OpIAdd:
10483 case OpISub:
10484 case OpIMul:
10485 case OpSNegate:
10486 case OpFMod:
10487 case OpFDiv:
10488 case OpFRem:
10489 case OpSMod:
10490 case OpSDiv:
10491 case OpSRem:
10492 case OpUMod:
10493 case OpUDiv:
10494 case OpVectorTimesMatrix:
10495 case OpMatrixTimesVector:
10496 case OpMatrixTimesMatrix:
10497 case OpDPdx:
10498 case OpDPdy:
10499 case OpDPdxCoarse:
10500 case OpDPdyCoarse:
10501 case OpDPdxFine:
10502 case OpDPdyFine:
10503 case OpFwidth:
10504 case OpFwidthCoarse:
10505 case OpFwidthFine:
10506 case OpVectorTimesScalar:
10507 case OpMatrixTimesScalar:
10508 case OpOuterProduct:
10509 case OpFConvert:
10510 case OpSConvert:
10511 case OpUConvert:
10512 case OpConvertSToF:
10513 case OpConvertUToF:
10514 case OpConvertFToU:
10515 case OpConvertFToS:
10516 return true;
10517
10518 default:
10519 return false;
10520 }
10521}
10522
10523// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
10524// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
10525// relevant when operating on the IDs, not when shuffling things around.
10526static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
10527{
10528 switch (op)
10529 {
10530 case OpLoad:
10531 case OpAccessChain:
10532 case OpInBoundsAccessChain:
10533 case OpCompositeExtract:
10534 case OpVectorExtractDynamic:
10535 case OpSampledImage:
10536 case OpImage:
10537 case OpCopyObject:
10538
10539 case OpImageRead:
10540 case OpImageFetch:
10541 case OpImageSampleImplicitLod:
10542 case OpImageSampleProjImplicitLod:
10543 case OpImageSampleDrefImplicitLod:
10544 case OpImageSampleProjDrefImplicitLod:
10545 case OpImageSampleExplicitLod:
10546 case OpImageSampleProjExplicitLod:
10547 case OpImageSampleDrefExplicitLod:
10548 case OpImageSampleProjDrefExplicitLod:
10549 case OpImageGather:
10550 case OpImageDrefGather:
10551 case OpImageSparseRead:
10552 case OpImageSparseFetch:
10553 case OpImageSparseSampleImplicitLod:
10554 case OpImageSparseSampleProjImplicitLod:
10555 case OpImageSparseSampleDrefImplicitLod:
10556 case OpImageSparseSampleProjDrefImplicitLod:
10557 case OpImageSparseSampleExplicitLod:
10558 case OpImageSparseSampleProjExplicitLod:
10559 case OpImageSparseSampleDrefExplicitLod:
10560 case OpImageSparseSampleProjDrefExplicitLod:
10561 case OpImageSparseGather:
10562 case OpImageSparseDrefGather:
10563 arg_count = 1;
10564 return true;
10565
10566 case OpVectorShuffle:
10567 arg_count = 2;
10568 return true;
10569
10570 case OpCompositeConstruct:
10571 return true;
10572
10573 default:
10574 break;
10575 }
10576
10577 return false;
10578}
10579
10580CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
10581{
10582 auto ops = stream_mutable(instr: instruction);
10583 auto opcode = static_cast<Op>(instruction.op);
10584 uint32_t length = instruction.length;
10585
10586 if (backend.requires_relaxed_precision_analysis)
10587 {
10588 if (length > 2)
10589 {
10590 uint32_t forwarding_length = length - 2;
10591
10592 if (opcode_is_precision_sensitive_operation(op: opcode))
10593 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[2], length: forwarding_length);
10594 else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(id: ops[2]).ext == SPIRExtension::GLSL)
10595 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[4], length: forwarding_length - 2);
10596 else if (opcode_is_precision_forwarding_instruction(op: opcode, arg_count&: forwarding_length))
10597 forward_relaxed_precision(dst_id: ops[1], args: &ops[2], length: forwarding_length);
10598 }
10599
10600 uint32_t result_type = 0, result_id = 0;
10601 if (instruction_to_result_type(result_type, result_id, op: opcode, args: ops, length))
10602 {
10603 auto itr = temporary_to_mirror_precision_alias.find(x: ops[1]);
10604 if (itr != temporary_to_mirror_precision_alias.end())
10605 return { .dst_id: itr->second, .src_id: itr->first };
10606 }
10607 }
10608
10609 return {};
10610}
10611
10612void CompilerGLSL::emit_instruction(const Instruction &instruction)
10613{
10614 auto ops = stream(instr: instruction);
10615 auto opcode = static_cast<Op>(instruction.op);
10616 uint32_t length = instruction.length;
10617
10618#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
10619#define GLSL_BOP_CAST(op, type) \
10620 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10621#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
10622#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
10623#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
10624#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10625#define GLSL_BFOP_CAST(op, type) \
10626 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10627#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10628#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
10629
10630 // If we need to do implicit bitcasts, make sure we do it with the correct type.
10631 uint32_t integer_width = get_integer_width_for_instruction(instr: instruction);
10632 auto int_type = to_signed_basetype(width: integer_width);
10633 auto uint_type = to_unsigned_basetype(width: integer_width);
10634
10635 opcode = get_remapped_spirv_op(op: opcode);
10636
10637 switch (opcode)
10638 {
10639 // Dealing with memory
10640 case OpLoad:
10641 {
10642 uint32_t result_type = ops[0];
10643 uint32_t id = ops[1];
10644 uint32_t ptr = ops[2];
10645
10646 flush_variable_declaration(id: ptr);
10647
10648 // If we're loading from memory that cannot be changed by the shader,
10649 // just forward the expression directly to avoid needless temporaries.
10650 // If an expression is mutable and forwardable, we speculate that it is immutable.
10651 bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
10652
10653 // If loading a non-native row-major matrix, mark the expression as need_transpose.
10654 bool need_transpose = false;
10655 bool old_need_transpose = false;
10656
10657 auto *ptr_expression = maybe_get<SPIRExpression>(id: ptr);
10658
10659 if (forward)
10660 {
10661 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
10662 // taking the expression.
10663 if (ptr_expression && ptr_expression->need_transpose)
10664 {
10665 old_need_transpose = true;
10666 ptr_expression->need_transpose = false;
10667 need_transpose = true;
10668 }
10669 else if (is_non_native_row_major_matrix(id: ptr))
10670 need_transpose = true;
10671 }
10672
10673 // If we are forwarding this load,
10674 // don't register the read to access chain here, defer that to when we actually use the expression,
10675 // using the add_implied_read_expression mechanism.
10676 string expr;
10677
10678 bool is_packed = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked);
10679 bool is_remapped = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID);
10680 if (forward || (!is_packed && !is_remapped))
10681 {
10682 // For the simple case, we do not need to deal with repacking.
10683 expr = to_dereferenced_expression(id: ptr, register_expression_read: false);
10684 }
10685 else
10686 {
10687 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
10688 // storing the expression to a temporary.
10689 expr = to_unpacked_expression(id: ptr);
10690 }
10691
10692 auto &type = get<SPIRType>(id: result_type);
10693 auto &expr_type = expression_type(id: ptr);
10694
10695 // If the expression has more vector components than the result type, insert
10696 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
10697 // happen with e.g. the MSL backend replacing the type of an input variable.
10698 if (expr_type.vecsize > type.vecsize)
10699 expr = enclose_expression(expr: expr + vector_swizzle(vecsize: type.vecsize, index: 0));
10700
10701 // We might need to cast in order to load from a builtin.
10702 cast_from_variable_load(source_id: ptr, expr, expr_type: type);
10703
10704 // We might be trying to load a gl_Position[N], where we should be
10705 // doing float4[](gl_in[i].gl_Position, ...) instead.
10706 // Similar workarounds are required for input arrays in tessellation.
10707 // Also, loading from gl_SampleMask array needs special unroll.
10708 unroll_array_from_complex_load(target_id: id, source_id: ptr, expr);
10709
10710 if (!type_is_opaque_value(type) && has_decoration(id: ptr, decoration: DecorationNonUniform))
10711 {
10712 // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
10713 convert_non_uniform_expression(expr, ptr_id: ptr);
10714 }
10715
10716 if (forward && ptr_expression)
10717 ptr_expression->need_transpose = old_need_transpose;
10718
10719 bool flattened = ptr_expression && flattened_buffer_blocks.count(x: ptr_expression->loaded_from) != 0;
10720
10721 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(id: ptr) && !flattened)
10722 rewrite_load_for_wrapped_row_major(expr, loaded_type: result_type, ptr);
10723
10724 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
10725 // However, if we try to load a complex, composite object from a flattened buffer,
10726 // we should avoid emitting the same code over and over and lower the result to a temporary.
10727 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
10728
10729 SPIRExpression *e = nullptr;
10730 if (!forward && expression_is_non_value_type_array(ptr))
10731 {
10732 // Complicated load case where we need to make a copy of ptr, but we cannot, because
10733 // it is an array, and our backend does not support arrays as value types.
10734 // Emit the temporary, and copy it explicitly.
10735 e = &emit_uninitialized_temporary_expression(type: result_type, id);
10736 emit_array_copy(lhs: to_expression(id), lhs_id: id, rhs_id: ptr, lhs_storage: StorageClassFunction, rhs_storage: get_expression_effective_storage_class(ptr));
10737 }
10738 else
10739 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: forward, suppress_usage_tracking: !usage_tracking);
10740
10741 e->need_transpose = need_transpose;
10742 register_read(expr: id, chain: ptr, forwarded: forward);
10743
10744 if (forward)
10745 {
10746 // Pass through whether the result is of a packed type and the physical type ID.
10747 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked))
10748 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
10749 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID))
10750 {
10751 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID,
10752 value: get_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID));
10753 }
10754 }
10755 else
10756 {
10757 // This might have been set on an earlier compilation iteration, force it to be unset.
10758 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
10759 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
10760 }
10761
10762 inherit_expression_dependencies(dst: id, source: ptr);
10763 if (forward)
10764 add_implied_read_expression(e&: *e, source: ptr);
10765 break;
10766 }
10767
10768 case OpInBoundsAccessChain:
10769 case OpAccessChain:
10770 case OpPtrAccessChain:
10771 {
10772 auto *var = maybe_get<SPIRVariable>(id: ops[2]);
10773 if (var)
10774 flush_variable_declaration(id: var->self);
10775
10776 // If the base is immutable, the access chain pointer must also be.
10777 // If an expression is mutable and forwardable, we speculate that it is immutable.
10778 AccessChainMeta meta;
10779 bool ptr_chain = opcode == OpPtrAccessChain;
10780 auto &target_type = get<SPIRType>(id: ops[0]);
10781 auto e = access_chain(base: ops[2], indices: &ops[3], count: length - 3, target_type, meta: &meta, ptr_chain);
10782
10783 // If the base is flattened UBO of struct type, the expression has to be a composite.
10784 // In that case, backends which do not support inline syntax need it to be bound to a temporary.
10785 // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
10786 bool requires_temporary = false;
10787 if (flattened_buffer_blocks.count(x: ops[2]) && target_type.basetype == SPIRType::Struct)
10788 requires_temporary = !backend.can_declare_struct_inline;
10789
10790 auto &expr = requires_temporary ?
10791 emit_op(result_type: ops[0], result_id: ops[1], rhs: std::move(e), forwarding: false) :
10792 set<SPIRExpression>(id: ops[1], args: std::move(e), args: ops[0], args: should_forward(id: ops[2]));
10793
10794 auto *backing_variable = maybe_get_backing_variable(chain: ops[2]);
10795 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
10796 expr.need_transpose = meta.need_transpose;
10797 expr.access_chain = true;
10798
10799 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
10800 if (meta.storage_is_packed)
10801 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypePacked);
10802 if (meta.storage_physical_type != 0)
10803 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
10804 if (meta.storage_is_invariant)
10805 set_decoration(id: ops[1], decoration: DecorationInvariant);
10806 if (meta.flattened_struct)
10807 flattened_structs[ops[1]] = true;
10808 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
10809 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
10810
10811 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
10812 // temporary which could be subject to invalidation.
10813 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
10814 forwarded_temporaries.insert(x: ops[1]);
10815 // The access chain itself is never forced to a temporary, but its dependencies might.
10816 suppressed_usage_tracking.insert(x: ops[1]);
10817
10818 for (uint32_t i = 2; i < length; i++)
10819 {
10820 inherit_expression_dependencies(dst: ops[1], source: ops[i]);
10821 add_implied_read_expression(e&: expr, source: ops[i]);
10822 }
10823
10824 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
10825 // we're not forwarded after all.
10826 if (expr.expression_dependencies.empty())
10827 forwarded_temporaries.erase(x: ops[1]);
10828
10829 break;
10830 }
10831
10832 case OpStore:
10833 {
10834 auto *var = maybe_get<SPIRVariable>(id: ops[0]);
10835
10836 if (var && var->statically_assigned)
10837 var->static_expression = ops[1];
10838 else if (var && var->loop_variable && !var->loop_variable_enable)
10839 var->static_expression = ops[1];
10840 else if (var && var->remapped_variable && var->static_expression)
10841 {
10842 // Skip the write.
10843 }
10844 else if (flattened_structs.count(x: ops[0]))
10845 {
10846 store_flattened_struct(lhs_id: ops[0], value: ops[1]);
10847 register_write(chain: ops[0]);
10848 }
10849 else
10850 {
10851 emit_store_statement(lhs_expression: ops[0], rhs_expression: ops[1]);
10852 }
10853
10854 // Storing a pointer results in a variable pointer, so we must conservatively assume
10855 // we can write through it.
10856 if (expression_type(id: ops[1]).pointer)
10857 register_write(chain: ops[1]);
10858 break;
10859 }
10860
10861 case OpArrayLength:
10862 {
10863 uint32_t result_type = ops[0];
10864 uint32_t id = ops[1];
10865 auto e = access_chain_internal(base: ops[2], indices: &ops[3], count: length - 3, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
10866 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
10867 convert_non_uniform_expression(expr&: e, ptr_id: ops[2]);
10868 set<SPIRExpression>(id, args: join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts&: e, ts: ".length())"), args&: result_type,
10869 args: true);
10870 break;
10871 }
10872
10873 // Function calls
10874 case OpFunctionCall:
10875 {
10876 uint32_t result_type = ops[0];
10877 uint32_t id = ops[1];
10878 uint32_t func = ops[2];
10879 const auto *arg = &ops[3];
10880 length -= 3;
10881
10882 auto &callee = get<SPIRFunction>(id: func);
10883 auto &return_type = get<SPIRType>(id: callee.return_type);
10884 bool pure = function_is_pure(func: callee);
10885
10886 bool callee_has_out_variables = false;
10887 bool emit_return_value_as_argument = false;
10888
10889 // Invalidate out variables passed to functions since they can be OpStore'd to.
10890 for (uint32_t i = 0; i < length; i++)
10891 {
10892 if (callee.arguments[i].write_count)
10893 {
10894 register_call_out_argument(id: arg[i]);
10895 callee_has_out_variables = true;
10896 }
10897
10898 flush_variable_declaration(id: arg[i]);
10899 }
10900
10901 if (!return_type.array.empty() && !backend.can_return_array)
10902 {
10903 callee_has_out_variables = true;
10904 emit_return_value_as_argument = true;
10905 }
10906
10907 if (!pure)
10908 register_impure_function_call();
10909
10910 string funexpr;
10911 SmallVector<string> arglist;
10912 funexpr += to_name(id: func) + "(";
10913
10914 if (emit_return_value_as_argument)
10915 {
10916 statement(ts: type_to_glsl(type: return_type), ts: " ", ts: to_name(id), ts: type_to_array_glsl(type: return_type), ts: ";");
10917 arglist.push_back(t: to_name(id));
10918 }
10919
10920 for (uint32_t i = 0; i < length; i++)
10921 {
10922 // Do not pass in separate images or samplers if we're remapping
10923 // to combined image samplers.
10924 if (skip_argument(id: arg[i]))
10925 continue;
10926
10927 arglist.push_back(t: to_func_call_arg(callee.arguments[i], id: arg[i]));
10928 }
10929
10930 for (auto &combined : callee.combined_parameters)
10931 {
10932 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
10933 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
10934 arglist.push_back(t: to_combined_image_sampler(image_id, samp_id: sampler_id));
10935 }
10936
10937 append_global_func_args(func: callee, index: length, arglist);
10938
10939 funexpr += merge(list: arglist);
10940 funexpr += ")";
10941
10942 // Check for function call constraints.
10943 check_function_call_constraints(args: arg, length);
10944
10945 if (return_type.basetype != SPIRType::Void)
10946 {
10947 // If the function actually writes to an out variable,
10948 // take the conservative route and do not forward.
10949 // The problem is that we might not read the function
10950 // result (and emit the function) before an out variable
10951 // is read (common case when return value is ignored!
10952 // In order to avoid start tracking invalid variables,
10953 // just avoid the forwarding problem altogether.
10954 bool forward = args_will_forward(id, args: arg, num_args: length, pure) && !callee_has_out_variables && pure &&
10955 (forced_temporaries.find(x: id) == end(cont&: forced_temporaries));
10956
10957 if (emit_return_value_as_argument)
10958 {
10959 statement(ts&: funexpr, ts: ";");
10960 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
10961 }
10962 else
10963 emit_op(result_type, result_id: id, rhs: funexpr, forwarding: forward);
10964
10965 // Function calls are implicit loads from all variables in question.
10966 // Set dependencies for them.
10967 for (uint32_t i = 0; i < length; i++)
10968 register_read(expr: id, chain: arg[i], forwarded: forward);
10969
10970 // If we're going to forward the temporary result,
10971 // put dependencies on every variable that must not change.
10972 if (forward)
10973 register_global_read_dependencies(func: callee, id);
10974 }
10975 else
10976 statement(ts&: funexpr, ts: ";");
10977
10978 break;
10979 }
10980
10981 // Composite munging
10982 case OpCompositeConstruct:
10983 {
10984 uint32_t result_type = ops[0];
10985 uint32_t id = ops[1];
10986 const auto *const elems = &ops[2];
10987 length -= 2;
10988
10989 bool forward = true;
10990 for (uint32_t i = 0; i < length; i++)
10991 forward = forward && should_forward(id: elems[i]);
10992
10993 auto &out_type = get<SPIRType>(id: result_type);
10994 auto *in_type = length > 0 ? &expression_type(id: elems[0]) : nullptr;
10995
10996 // Only splat if we have vector constructors.
10997 // Arrays and structs must be initialized properly in full.
10998 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10999
11000 bool splat = false;
11001 bool swizzle_splat = false;
11002
11003 if (in_type)
11004 {
11005 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
11006 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
11007
11008 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(type: *in_type))
11009 {
11010 // Cannot swizzle literal integers as a special case.
11011 swizzle_splat = false;
11012 }
11013 }
11014
11015 if (splat || swizzle_splat)
11016 {
11017 uint32_t input = elems[0];
11018 for (uint32_t i = 0; i < length; i++)
11019 {
11020 if (input != elems[i])
11021 {
11022 splat = false;
11023 swizzle_splat = false;
11024 }
11025 }
11026 }
11027
11028 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
11029 forward = false;
11030 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
11031 forward = false;
11032 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
11033 forward = false;
11034
11035 string constructor_op;
11036 if (backend.use_initializer_list && composite)
11037 {
11038 bool needs_trailing_tracket = false;
11039 // Only use this path if we are building composites.
11040 // This path cannot be used for arithmetic.
11041 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
11042 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
11043 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
11044 {
11045 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
11046 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
11047 needs_trailing_tracket = true;
11048 }
11049 constructor_op += "{ ";
11050
11051 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
11052 constructor_op += "0";
11053 else if (splat)
11054 constructor_op += to_unpacked_expression(id: elems[0]);
11055 else
11056 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
11057 constructor_op += " }";
11058 if (needs_trailing_tracket)
11059 constructor_op += ")";
11060 }
11061 else if (swizzle_splat && !composite)
11062 {
11063 constructor_op = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 1, expr: to_unpacked_expression(id: elems[0]));
11064 }
11065 else
11066 {
11067 constructor_op = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
11068 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
11069 constructor_op += "0";
11070 else if (splat)
11071 constructor_op += to_unpacked_expression(id: elems[0]);
11072 else
11073 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
11074 constructor_op += ")";
11075 }
11076
11077 if (!constructor_op.empty())
11078 {
11079 emit_op(result_type, result_id: id, rhs: constructor_op, forwarding: forward);
11080 for (uint32_t i = 0; i < length; i++)
11081 inherit_expression_dependencies(dst: id, source: elems[i]);
11082 }
11083 break;
11084 }
11085
11086 case OpVectorInsertDynamic:
11087 {
11088 uint32_t result_type = ops[0];
11089 uint32_t id = ops[1];
11090 uint32_t vec = ops[2];
11091 uint32_t comp = ops[3];
11092 uint32_t index = ops[4];
11093
11094 flush_variable_declaration(id: vec);
11095
11096 // Make a copy, then use access chain to store the variable.
11097 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: vec), ts: ";");
11098 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
11099 auto chain = access_chain_internal(base: id, indices: &index, count: 1, flags: 0, meta: nullptr);
11100 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: comp), ts: ";");
11101 break;
11102 }
11103
11104 case OpVectorExtractDynamic:
11105 {
11106 uint32_t result_type = ops[0];
11107 uint32_t id = ops[1];
11108
11109 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: 1, flags: 0, meta: nullptr);
11110 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
11111 inherit_expression_dependencies(dst: id, source: ops[2]);
11112 inherit_expression_dependencies(dst: id, source: ops[3]);
11113 break;
11114 }
11115
11116 case OpCompositeExtract:
11117 {
11118 uint32_t result_type = ops[0];
11119 uint32_t id = ops[1];
11120 length -= 3;
11121
11122 auto &type = get<SPIRType>(id: result_type);
11123
11124 // We can only split the expression here if our expression is forwarded as a temporary.
11125 bool allow_base_expression = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
11126
11127 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
11128 auto &composite_type = expression_type(id: ops[2]);
11129 bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
11130 if (composite_type_is_complex)
11131 allow_base_expression = false;
11132
11133 // Packed expressions or physical ID mapped expressions cannot be split up.
11134 if (has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypePacked) ||
11135 has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypeID))
11136 allow_base_expression = false;
11137
11138 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
11139 // into the base expression.
11140 if (is_non_native_row_major_matrix(id: ops[2]))
11141 allow_base_expression = false;
11142
11143 AccessChainMeta meta;
11144 SPIRExpression *e = nullptr;
11145 auto *c = maybe_get<SPIRConstant>(id: ops[2]);
11146
11147 if (c && !c->specialization && !composite_type_is_complex)
11148 {
11149 auto expr = to_extract_constant_composite_expression(result_type, c: *c, chain: ops + 3, length);
11150 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: true);
11151 }
11152 else if (allow_base_expression && should_forward(id: ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
11153 {
11154 // Only apply this optimization if result is scalar.
11155
11156 // We want to split the access chain from the base.
11157 // This is so we can later combine different CompositeExtract results
11158 // with CompositeConstruct without emitting code like
11159 //
11160 // vec3 temp = texture(...).xyz
11161 // vec4(temp.x, temp.y, temp.z, 1.0).
11162 //
11163 // when we actually wanted to emit this
11164 // vec4(texture(...).xyz, 1.0).
11165 //
11166 // Including the base will prevent this and would trigger multiple reads
11167 // from expression causing it to be forced to an actual temporary in GLSL.
11168 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
11169 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
11170 ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
11171 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
11172 inherit_expression_dependencies(dst: id, source: ops[2]);
11173 e->base_expression = ops[2];
11174
11175 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
11176 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
11177 }
11178 else
11179 {
11180 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
11181 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
11182 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]), suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
11183 inherit_expression_dependencies(dst: id, source: ops[2]);
11184 }
11185
11186 // Pass through some meta information to the loaded expression.
11187 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
11188 // instead of loading everything through an access chain.
11189 e->need_transpose = meta.need_transpose;
11190 if (meta.storage_is_packed)
11191 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
11192 if (meta.storage_physical_type != 0)
11193 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
11194 if (meta.storage_is_invariant)
11195 set_decoration(id, decoration: DecorationInvariant);
11196
11197 break;
11198 }
11199
11200 case OpCompositeInsert:
11201 {
11202 uint32_t result_type = ops[0];
11203 uint32_t id = ops[1];
11204 uint32_t obj = ops[2];
11205 uint32_t composite = ops[3];
11206 const auto *elems = &ops[4];
11207 length -= 4;
11208
11209 flush_variable_declaration(id: composite);
11210
11211 // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
11212 // Speculate that the input composite is no longer used, and we can modify it in-place.
11213 // There are various scenarios where this is not possible to satisfy.
11214 bool can_modify_in_place = true;
11215 forced_temporaries.insert(x: id);
11216
11217 // Cannot safely RMW PHI variables since they have no way to be invalidated,
11218 // forcing temporaries is not going to help.
11219 // This is similar for Constant and Undef inputs.
11220 // The only safe thing to RMW is SPIRExpression.
11221 if (invalid_expressions.count(x: composite) ||
11222 block_composite_insert_overwrite.count(x: composite) ||
11223 maybe_get<SPIRExpression>(id: composite) == nullptr)
11224 {
11225 can_modify_in_place = false;
11226 }
11227 else if (backend.requires_relaxed_precision_analysis &&
11228 has_decoration(id: composite, decoration: DecorationRelaxedPrecision) !=
11229 has_decoration(id, decoration: DecorationRelaxedPrecision) &&
11230 get<SPIRType>(id: result_type).basetype != SPIRType::Struct)
11231 {
11232 // Similarly, if precision does not match for input and output,
11233 // we cannot alias them. If we write a composite into a relaxed precision
11234 // ID, we might get a false truncation.
11235 can_modify_in_place = false;
11236 }
11237
11238 if (can_modify_in_place)
11239 {
11240 // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
11241 if (!forced_temporaries.count(x: composite))
11242 force_temporary_and_recompile(id: composite);
11243
11244 auto chain = access_chain_internal(base: composite, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
11245 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
11246 set<SPIRExpression>(id, args: to_expression(id: composite), args&: result_type, args: true);
11247 invalid_expressions.insert(x: composite);
11248 composite_insert_overwritten.insert(x: composite);
11249 }
11250 else
11251 {
11252 if (maybe_get<SPIRUndef>(id: composite) != nullptr)
11253 {
11254 emit_uninitialized_temporary_expression(type: result_type, id);
11255 }
11256 else
11257 {
11258 // Make a copy, then use access chain to store the variable.
11259 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: composite), ts: ";");
11260 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
11261 }
11262
11263 auto chain = access_chain_internal(base: id, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
11264 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
11265 }
11266
11267 break;
11268 }
11269
11270 case OpCopyMemory:
11271 {
11272 uint32_t lhs = ops[0];
11273 uint32_t rhs = ops[1];
11274 if (lhs != rhs)
11275 {
11276 uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
11277 if (!tmp_id)
11278 tmp_id = ir.increase_bound_by(count: 1);
11279 uint32_t tmp_type_id = expression_type(id: rhs).parent_type;
11280
11281 EmbeddedInstruction fake_load, fake_store;
11282 fake_load.op = OpLoad;
11283 fake_load.length = 3;
11284 fake_load.ops.push_back(t: tmp_type_id);
11285 fake_load.ops.push_back(t: tmp_id);
11286 fake_load.ops.push_back(t: rhs);
11287
11288 fake_store.op = OpStore;
11289 fake_store.length = 2;
11290 fake_store.ops.push_back(t: lhs);
11291 fake_store.ops.push_back(t: tmp_id);
11292
11293 // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
11294 // Synthesize a fake Load and Store pair for CopyMemory.
11295 emit_instruction(instruction: fake_load);
11296 emit_instruction(instruction: fake_store);
11297 }
11298 break;
11299 }
11300
11301 case OpCopyLogical:
11302 {
11303 // This is used for copying object of different types, arrays and structs.
11304 // We need to unroll the copy, element-by-element.
11305 uint32_t result_type = ops[0];
11306 uint32_t id = ops[1];
11307 uint32_t rhs = ops[2];
11308
11309 emit_uninitialized_temporary_expression(type: result_type, id);
11310 emit_copy_logical_type(lhs_id: id, lhs_type_id: result_type, rhs_id: rhs, rhs_type_id: expression_type_id(id: rhs), chain: {});
11311 break;
11312 }
11313
11314 case OpCopyObject:
11315 {
11316 uint32_t result_type = ops[0];
11317 uint32_t id = ops[1];
11318 uint32_t rhs = ops[2];
11319 bool pointer = get<SPIRType>(id: result_type).pointer;
11320
11321 auto *chain = maybe_get<SPIRAccessChain>(id: rhs);
11322 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(id: rhs);
11323 if (chain)
11324 {
11325 // Cannot lower to a SPIRExpression, just copy the object.
11326 auto &e = set<SPIRAccessChain>(id, args&: *chain);
11327 e.self = id;
11328 }
11329 else if (imgsamp)
11330 {
11331 // Cannot lower to a SPIRExpression, just copy the object.
11332 // GLSL does not currently use this type and will never get here, but MSL does.
11333 // Handled here instead of CompilerMSL for better integration and general handling,
11334 // and in case GLSL or other subclasses require it in the future.
11335 auto &e = set<SPIRCombinedImageSampler>(id, args&: *imgsamp);
11336 e.self = id;
11337 }
11338 else if (expression_is_lvalue(id: rhs) && !pointer)
11339 {
11340 // Need a copy.
11341 // For pointer types, we copy the pointer itself.
11342 emit_op(result_type, result_id: id, rhs: to_unpacked_expression(id: rhs), forwarding: false);
11343 }
11344 else
11345 {
11346 // RHS expression is immutable, so just forward it.
11347 // Copying these things really make no sense, but
11348 // seems to be allowed anyways.
11349 auto &e = set<SPIRExpression>(id, args: to_expression(id: rhs), args&: result_type, args: true);
11350 if (pointer)
11351 {
11352 auto *var = maybe_get_backing_variable(chain: rhs);
11353 e.loaded_from = var ? var->self : ID(0);
11354 }
11355
11356 // If we're copying an access chain, need to inherit the read expressions.
11357 auto *rhs_expr = maybe_get<SPIRExpression>(id: rhs);
11358 if (rhs_expr)
11359 {
11360 e.implied_read_expressions = rhs_expr->implied_read_expressions;
11361 e.expression_dependencies = rhs_expr->expression_dependencies;
11362 }
11363 }
11364 break;
11365 }
11366
11367 case OpVectorShuffle:
11368 {
11369 uint32_t result_type = ops[0];
11370 uint32_t id = ops[1];
11371 uint32_t vec0 = ops[2];
11372 uint32_t vec1 = ops[3];
11373 const auto *elems = &ops[4];
11374 length -= 4;
11375
11376 auto &type0 = expression_type(id: vec0);
11377
11378 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
11379 // or in our case, T(0).
11380 bool shuffle = false;
11381 for (uint32_t i = 0; i < length; i++)
11382 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
11383 shuffle = true;
11384
11385 // Cannot use swizzles with packed expressions, force shuffle path.
11386 if (!shuffle && has_extended_decoration(id: vec0, decoration: SPIRVCrossDecorationPhysicalTypePacked))
11387 shuffle = true;
11388
11389 string expr;
11390 bool should_fwd, trivial_forward;
11391
11392 if (shuffle)
11393 {
11394 should_fwd = should_forward(id: vec0) && should_forward(id: vec1);
11395 trivial_forward = should_suppress_usage_tracking(id: vec0) && should_suppress_usage_tracking(id: vec1);
11396
11397 // Constructor style and shuffling from two different vectors.
11398 SmallVector<string> args;
11399 for (uint32_t i = 0; i < length; i++)
11400 {
11401 if (elems[i] == 0xffffffffu)
11402 {
11403 // Use a constant 0 here.
11404 // We could use the first component or similar, but then we risk propagating
11405 // a value we might not need, and bog down codegen.
11406 SPIRConstant c;
11407 c.constant_type = type0.parent_type;
11408 assert(type0.parent_type != ID(0));
11409 args.push_back(t: constant_expression(c));
11410 }
11411 else if (elems[i] >= type0.vecsize)
11412 args.push_back(t: to_extract_component_expression(id: vec1, index: elems[i] - type0.vecsize));
11413 else
11414 args.push_back(t: to_extract_component_expression(id: vec0, index: elems[i]));
11415 }
11416 expr += join(ts: type_to_glsl_constructor(type: get<SPIRType>(id: result_type)), ts: "(", ts: merge(list: args), ts: ")");
11417 }
11418 else
11419 {
11420 should_fwd = should_forward(id: vec0);
11421 trivial_forward = should_suppress_usage_tracking(id: vec0);
11422
11423 // We only source from first vector, so can use swizzle.
11424 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
11425 expr += to_enclosed_unpacked_expression(id: vec0);
11426 expr += ".";
11427 for (uint32_t i = 0; i < length; i++)
11428 {
11429 assert(elems[i] != 0xffffffffu);
11430 expr += index_to_swizzle(index: elems[i]);
11431 }
11432
11433 if (backend.swizzle_is_function && length > 1)
11434 expr += "()";
11435 }
11436
11437 // A shuffle is trivial in that it doesn't actually *do* anything.
11438 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
11439
11440 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_fwd, suppress_usage_tracking: trivial_forward);
11441
11442 inherit_expression_dependencies(dst: id, source: vec0);
11443 if (vec0 != vec1)
11444 inherit_expression_dependencies(dst: id, source: vec1);
11445 break;
11446 }
11447
11448 // ALU
11449 case OpIsNan:
11450 GLSL_UFOP(isnan);
11451 break;
11452
11453 case OpIsInf:
11454 GLSL_UFOP(isinf);
11455 break;
11456
11457 case OpSNegate:
11458 case OpFNegate:
11459 GLSL_UOP(-);
11460 break;
11461
11462 case OpIAdd:
11463 {
11464 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
11465 auto type = get<SPIRType>(id: ops[0]).basetype;
11466 GLSL_BOP_CAST(+, type);
11467 break;
11468 }
11469
11470 case OpFAdd:
11471 GLSL_BOP(+);
11472 break;
11473
11474 case OpISub:
11475 {
11476 auto type = get<SPIRType>(id: ops[0]).basetype;
11477 GLSL_BOP_CAST(-, type);
11478 break;
11479 }
11480
11481 case OpFSub:
11482 GLSL_BOP(-);
11483 break;
11484
11485 case OpIMul:
11486 {
11487 auto type = get<SPIRType>(id: ops[0]).basetype;
11488 GLSL_BOP_CAST(*, type);
11489 break;
11490 }
11491
11492 case OpVectorTimesMatrix:
11493 case OpMatrixTimesVector:
11494 {
11495 // If the matrix needs transpose, just flip the multiply order.
11496 auto *e = maybe_get<SPIRExpression>(id: ops[opcode == OpMatrixTimesVector ? 2 : 3]);
11497 if (e && e->need_transpose)
11498 {
11499 e->need_transpose = false;
11500 string expr;
11501
11502 if (opcode == OpMatrixTimesVector)
11503 expr = join(ts: to_enclosed_unpacked_expression(id: ops[3]), ts: " * ",
11504 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
11505 else
11506 expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
11507 ts: to_enclosed_unpacked_expression(id: ops[2]));
11508
11509 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
11510 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
11511 e->need_transpose = true;
11512 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
11513 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
11514 }
11515 else
11516 GLSL_BOP(*);
11517 break;
11518 }
11519
11520 case OpMatrixTimesMatrix:
11521 {
11522 auto *a = maybe_get<SPIRExpression>(id: ops[2]);
11523 auto *b = maybe_get<SPIRExpression>(id: ops[3]);
11524
11525 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
11526 // a^T * b^T = (b * a)^T.
11527 if (a && b && a->need_transpose && b->need_transpose)
11528 {
11529 a->need_transpose = false;
11530 b->need_transpose = false;
11531 auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
11532 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
11533 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
11534 auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
11535 e.need_transpose = true;
11536 a->need_transpose = true;
11537 b->need_transpose = true;
11538 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
11539 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
11540 }
11541 else
11542 GLSL_BOP(*);
11543
11544 break;
11545 }
11546
11547 case OpFMul:
11548 case OpMatrixTimesScalar:
11549 case OpVectorTimesScalar:
11550 GLSL_BOP(*);
11551 break;
11552
11553 case OpOuterProduct:
11554 GLSL_BFOP(outerProduct);
11555 break;
11556
11557 case OpDot:
11558 GLSL_BFOP(dot);
11559 break;
11560
11561 case OpTranspose:
11562 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
11563 {
11564 // transpose() is not available, so instead, flip need_transpose,
11565 // which can later be turned into an emulated transpose op by
11566 // convert_row_major_matrix(), if necessary.
11567 uint32_t result_type = ops[0];
11568 uint32_t result_id = ops[1];
11569 uint32_t input = ops[2];
11570
11571 // Force need_transpose to false temporarily to prevent
11572 // to_expression() from doing the transpose.
11573 bool need_transpose = false;
11574 auto *input_e = maybe_get<SPIRExpression>(id: input);
11575 if (input_e)
11576 swap(a&: need_transpose, b&: input_e->need_transpose);
11577
11578 bool forward = should_forward(id: input);
11579 auto &e = emit_op(result_type, result_id, rhs: to_expression(id: input), forwarding: forward);
11580 e.need_transpose = !need_transpose;
11581
11582 // Restore the old need_transpose flag.
11583 if (input_e)
11584 input_e->need_transpose = need_transpose;
11585 }
11586 else
11587 GLSL_UFOP(transpose);
11588 break;
11589
11590 case OpSRem:
11591 {
11592 uint32_t result_type = ops[0];
11593 uint32_t result_id = ops[1];
11594 uint32_t op0 = ops[2];
11595 uint32_t op1 = ops[3];
11596
11597 // Needs special handling.
11598 bool forward = should_forward(id: op0) && should_forward(id: op1);
11599 auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(",
11600 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
11601
11602 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
11603 inherit_expression_dependencies(dst: result_id, source: op0);
11604 inherit_expression_dependencies(dst: result_id, source: op1);
11605 break;
11606 }
11607
11608 case OpSDiv:
11609 GLSL_BOP_CAST(/, int_type);
11610 break;
11611
11612 case OpUDiv:
11613 GLSL_BOP_CAST(/, uint_type);
11614 break;
11615
11616 case OpIAddCarry:
11617 case OpISubBorrow:
11618 {
11619 if (options.es && options.version < 310)
11620 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11621 else if (!options.es && options.version < 400)
11622 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
11623
11624 uint32_t result_type = ops[0];
11625 uint32_t result_id = ops[1];
11626 uint32_t op0 = ops[2];
11627 uint32_t op1 = ops[3];
11628 auto &type = get<SPIRType>(id: result_type);
11629 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
11630 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
11631
11632 statement(ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ",
11633 ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
11634 break;
11635 }
11636
11637 case OpUMulExtended:
11638 case OpSMulExtended:
11639 {
11640 if (options.es && options.version < 310)
11641 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11642 else if (!options.es && options.version < 400)
11643 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
11644
11645 uint32_t result_type = ops[0];
11646 uint32_t result_id = ops[1];
11647 uint32_t op0 = ops[2];
11648 uint32_t op1 = ops[3];
11649 auto &type = get<SPIRType>(id: result_type);
11650 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
11651 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
11652
11653 statement(ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ", ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".",
11654 ts: to_member_name(type, index: 1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: ");");
11655 break;
11656 }
11657
11658 case OpFDiv:
11659 GLSL_BOP(/);
11660 break;
11661
11662 case OpShiftRightLogical:
11663 GLSL_BOP_CAST(>>, uint_type);
11664 break;
11665
11666 case OpShiftRightArithmetic:
11667 GLSL_BOP_CAST(>>, int_type);
11668 break;
11669
11670 case OpShiftLeftLogical:
11671 {
11672 auto type = get<SPIRType>(id: ops[0]).basetype;
11673 GLSL_BOP_CAST(<<, type);
11674 break;
11675 }
11676
11677 case OpBitwiseOr:
11678 {
11679 auto type = get<SPIRType>(id: ops[0]).basetype;
11680 GLSL_BOP_CAST(|, type);
11681 break;
11682 }
11683
11684 case OpBitwiseXor:
11685 {
11686 auto type = get<SPIRType>(id: ops[0]).basetype;
11687 GLSL_BOP_CAST(^, type);
11688 break;
11689 }
11690
11691 case OpBitwiseAnd:
11692 {
11693 auto type = get<SPIRType>(id: ops[0]).basetype;
11694 GLSL_BOP_CAST(&, type);
11695 break;
11696 }
11697
11698 case OpNot:
11699 GLSL_UOP(~);
11700 break;
11701
11702 case OpUMod:
11703 GLSL_BOP_CAST(%, uint_type);
11704 break;
11705
11706 case OpSMod:
11707 GLSL_BOP_CAST(%, int_type);
11708 break;
11709
11710 case OpFMod:
11711 GLSL_BFOP(mod);
11712 break;
11713
11714 case OpFRem:
11715 {
11716 if (is_legacy())
11717 SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
11718 "needed for legacy.");
11719
11720 uint32_t result_type = ops[0];
11721 uint32_t result_id = ops[1];
11722 uint32_t op0 = ops[2];
11723 uint32_t op1 = ops[3];
11724
11725 // Needs special handling.
11726 bool forward = should_forward(id: op0) && should_forward(id: op1);
11727 auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "trunc(",
11728 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
11729
11730 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
11731 inherit_expression_dependencies(dst: result_id, source: op0);
11732 inherit_expression_dependencies(dst: result_id, source: op1);
11733 break;
11734 }
11735
11736 // Relational
11737 case OpAny:
11738 GLSL_UFOP(any);
11739 break;
11740
11741 case OpAll:
11742 GLSL_UFOP(all);
11743 break;
11744
11745 case OpSelect:
11746 emit_mix_op(result_type: ops[0], id: ops[1], left: ops[4], right: ops[3], lerp: ops[2]);
11747 break;
11748
11749 case OpLogicalOr:
11750 {
11751 // No vector variant in GLSL for logical OR.
11752 auto result_type = ops[0];
11753 auto id = ops[1];
11754 auto &type = get<SPIRType>(id: result_type);
11755
11756 if (type.vecsize > 1)
11757 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "||", negate: false, expected_type: SPIRType::Unknown);
11758 else
11759 GLSL_BOP(||);
11760 break;
11761 }
11762
11763 case OpLogicalAnd:
11764 {
11765 // No vector variant in GLSL for logical AND.
11766 auto result_type = ops[0];
11767 auto id = ops[1];
11768 auto &type = get<SPIRType>(id: result_type);
11769
11770 if (type.vecsize > 1)
11771 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "&&", negate: false, expected_type: SPIRType::Unknown);
11772 else
11773 GLSL_BOP(&&);
11774 break;
11775 }
11776
11777 case OpLogicalNot:
11778 {
11779 auto &type = get<SPIRType>(id: ops[0]);
11780 if (type.vecsize > 1)
11781 GLSL_UFOP(not );
11782 else
11783 GLSL_UOP(!);
11784 break;
11785 }
11786
11787 case OpIEqual:
11788 {
11789 if (expression_type(id: ops[2]).vecsize > 1)
11790 GLSL_BFOP_CAST(equal, int_type);
11791 else
11792 GLSL_BOP_CAST(==, int_type);
11793 break;
11794 }
11795
11796 case OpLogicalEqual:
11797 case OpFOrdEqual:
11798 {
11799 if (expression_type(id: ops[2]).vecsize > 1)
11800 GLSL_BFOP(equal);
11801 else
11802 GLSL_BOP(==);
11803 break;
11804 }
11805
11806 case OpINotEqual:
11807 {
11808 if (expression_type(id: ops[2]).vecsize > 1)
11809 GLSL_BFOP_CAST(notEqual, int_type);
11810 else
11811 GLSL_BOP_CAST(!=, int_type);
11812 break;
11813 }
11814
11815 case OpLogicalNotEqual:
11816 case OpFOrdNotEqual:
11817 case OpFUnordNotEqual:
11818 {
11819 // GLSL is fuzzy on what to do with ordered vs unordered not equal.
11820 // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
11821 // but this means we have no easy way of implementing ordered not equal.
11822 if (expression_type(id: ops[2]).vecsize > 1)
11823 GLSL_BFOP(notEqual);
11824 else
11825 GLSL_BOP(!=);
11826 break;
11827 }
11828
11829 case OpUGreaterThan:
11830 case OpSGreaterThan:
11831 {
11832 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
11833 if (expression_type(id: ops[2]).vecsize > 1)
11834 GLSL_BFOP_CAST(greaterThan, type);
11835 else
11836 GLSL_BOP_CAST(>, type);
11837 break;
11838 }
11839
11840 case OpFOrdGreaterThan:
11841 {
11842 if (expression_type(id: ops[2]).vecsize > 1)
11843 GLSL_BFOP(greaterThan);
11844 else
11845 GLSL_BOP(>);
11846 break;
11847 }
11848
11849 case OpUGreaterThanEqual:
11850 case OpSGreaterThanEqual:
11851 {
11852 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
11853 if (expression_type(id: ops[2]).vecsize > 1)
11854 GLSL_BFOP_CAST(greaterThanEqual, type);
11855 else
11856 GLSL_BOP_CAST(>=, type);
11857 break;
11858 }
11859
11860 case OpFOrdGreaterThanEqual:
11861 {
11862 if (expression_type(id: ops[2]).vecsize > 1)
11863 GLSL_BFOP(greaterThanEqual);
11864 else
11865 GLSL_BOP(>=);
11866 break;
11867 }
11868
11869 case OpULessThan:
11870 case OpSLessThan:
11871 {
11872 auto type = opcode == OpULessThan ? uint_type : int_type;
11873 if (expression_type(id: ops[2]).vecsize > 1)
11874 GLSL_BFOP_CAST(lessThan, type);
11875 else
11876 GLSL_BOP_CAST(<, type);
11877 break;
11878 }
11879
11880 case OpFOrdLessThan:
11881 {
11882 if (expression_type(id: ops[2]).vecsize > 1)
11883 GLSL_BFOP(lessThan);
11884 else
11885 GLSL_BOP(<);
11886 break;
11887 }
11888
11889 case OpULessThanEqual:
11890 case OpSLessThanEqual:
11891 {
11892 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
11893 if (expression_type(id: ops[2]).vecsize > 1)
11894 GLSL_BFOP_CAST(lessThanEqual, type);
11895 else
11896 GLSL_BOP_CAST(<=, type);
11897 break;
11898 }
11899
11900 case OpFOrdLessThanEqual:
11901 {
11902 if (expression_type(id: ops[2]).vecsize > 1)
11903 GLSL_BFOP(lessThanEqual);
11904 else
11905 GLSL_BOP(<=);
11906 break;
11907 }
11908
11909 // Conversion
11910 case OpSConvert:
11911 case OpConvertSToF:
11912 case OpUConvert:
11913 case OpConvertUToF:
11914 {
11915 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
11916 uint32_t result_type = ops[0];
11917 uint32_t id = ops[1];
11918
11919 auto &type = get<SPIRType>(id: result_type);
11920 auto &arg_type = expression_type(id: ops[2]);
11921 auto func = type_to_glsl_constructor(type);
11922
11923 if (arg_type.width < type.width || type_is_floating_point(type))
11924 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type, expected_result_type: type.basetype);
11925 else
11926 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
11927 break;
11928 }
11929
11930 case OpConvertFToU:
11931 case OpConvertFToS:
11932 {
11933 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
11934 uint32_t result_type = ops[0];
11935 uint32_t id = ops[1];
11936 auto &type = get<SPIRType>(id: result_type);
11937 auto expected_type = type;
11938 auto &float_type = expression_type(id: ops[2]);
11939 expected_type.basetype =
11940 opcode == OpConvertFToS ? to_signed_basetype(width: type.width) : to_unsigned_basetype(width: type.width);
11941
11942 auto func = type_to_glsl_constructor(type: expected_type);
11943 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type: float_type.basetype, expected_result_type: expected_type.basetype);
11944 break;
11945 }
11946
11947 case OpFConvert:
11948 {
11949 uint32_t result_type = ops[0];
11950 uint32_t id = ops[1];
11951
11952 auto func = type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
11953 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
11954 break;
11955 }
11956
11957 case OpBitcast:
11958 {
11959 uint32_t result_type = ops[0];
11960 uint32_t id = ops[1];
11961 uint32_t arg = ops[2];
11962
11963 if (!emit_complex_bitcast(result_type, id, op0: arg))
11964 {
11965 auto op = bitcast_glsl_op(out_type: get<SPIRType>(id: result_type), in_type: expression_type(id: arg));
11966 emit_unary_func_op(result_type, result_id: id, op0: arg, op: op.c_str());
11967 }
11968 break;
11969 }
11970
11971 case OpQuantizeToF16:
11972 {
11973 uint32_t result_type = ops[0];
11974 uint32_t id = ops[1];
11975 uint32_t arg = ops[2];
11976
11977 string op;
11978 auto &type = get<SPIRType>(id: result_type);
11979
11980 switch (type.vecsize)
11981 {
11982 case 1:
11983 op = join(ts: "unpackHalf2x16(packHalf2x16(vec2(", ts: to_expression(id: arg), ts: "))).x");
11984 break;
11985 case 2:
11986 op = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: "))");
11987 break;
11988 case 3:
11989 {
11990 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
11991 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zz)).x");
11992 op = join(ts: "vec3(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
11993 break;
11994 }
11995 case 4:
11996 {
11997 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
11998 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zw))");
11999 op = join(ts: "vec4(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
12000 break;
12001 }
12002 default:
12003 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
12004 }
12005
12006 emit_op(result_type, result_id: id, rhs: op, forwarding: should_forward(id: arg));
12007 inherit_expression_dependencies(dst: id, source: arg);
12008 break;
12009 }
12010
12011 // Derivatives
12012 case OpDPdx:
12013 GLSL_UFOP(dFdx);
12014 if (is_legacy_es())
12015 require_extension_internal(ext: "GL_OES_standard_derivatives");
12016 register_control_dependent_expression(expr: ops[1]);
12017 break;
12018
12019 case OpDPdy:
12020 GLSL_UFOP(dFdy);
12021 if (is_legacy_es())
12022 require_extension_internal(ext: "GL_OES_standard_derivatives");
12023 register_control_dependent_expression(expr: ops[1]);
12024 break;
12025
12026 case OpDPdxFine:
12027 GLSL_UFOP(dFdxFine);
12028 if (options.es)
12029 {
12030 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12031 }
12032 if (options.version < 450)
12033 require_extension_internal(ext: "GL_ARB_derivative_control");
12034 register_control_dependent_expression(expr: ops[1]);
12035 break;
12036
12037 case OpDPdyFine:
12038 GLSL_UFOP(dFdyFine);
12039 if (options.es)
12040 {
12041 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12042 }
12043 if (options.version < 450)
12044 require_extension_internal(ext: "GL_ARB_derivative_control");
12045 register_control_dependent_expression(expr: ops[1]);
12046 break;
12047
12048 case OpDPdxCoarse:
12049 if (options.es)
12050 {
12051 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12052 }
12053 GLSL_UFOP(dFdxCoarse);
12054 if (options.version < 450)
12055 require_extension_internal(ext: "GL_ARB_derivative_control");
12056 register_control_dependent_expression(expr: ops[1]);
12057 break;
12058
12059 case OpDPdyCoarse:
12060 GLSL_UFOP(dFdyCoarse);
12061 if (options.es)
12062 {
12063 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12064 }
12065 if (options.version < 450)
12066 require_extension_internal(ext: "GL_ARB_derivative_control");
12067 register_control_dependent_expression(expr: ops[1]);
12068 break;
12069
12070 case OpFwidth:
12071 GLSL_UFOP(fwidth);
12072 if (is_legacy_es())
12073 require_extension_internal(ext: "GL_OES_standard_derivatives");
12074 register_control_dependent_expression(expr: ops[1]);
12075 break;
12076
12077 case OpFwidthCoarse:
12078 GLSL_UFOP(fwidthCoarse);
12079 if (options.es)
12080 {
12081 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12082 }
12083 if (options.version < 450)
12084 require_extension_internal(ext: "GL_ARB_derivative_control");
12085 register_control_dependent_expression(expr: ops[1]);
12086 break;
12087
12088 case OpFwidthFine:
12089 GLSL_UFOP(fwidthFine);
12090 if (options.es)
12091 {
12092 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
12093 }
12094 if (options.version < 450)
12095 require_extension_internal(ext: "GL_ARB_derivative_control");
12096 register_control_dependent_expression(expr: ops[1]);
12097 break;
12098
12099 // Bitfield
12100 case OpBitFieldInsert:
12101 {
12102 emit_bitfield_insert_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op3: ops[5], op: "bitfieldInsert", offset_count_type: SPIRType::Int);
12103 break;
12104 }
12105
12106 case OpBitFieldSExtract:
12107 {
12108 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: int_type, input_type0: int_type,
12109 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
12110 break;
12111 }
12112
12113 case OpBitFieldUExtract:
12114 {
12115 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: uint_type, input_type0: uint_type,
12116 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
12117 break;
12118 }
12119
12120 case OpBitReverse:
12121 // BitReverse does not have issues with sign since result type must match input type.
12122 GLSL_UFOP(bitfieldReverse);
12123 break;
12124
12125 case OpBitCount:
12126 {
12127 auto basetype = expression_type(id: ops[2]).basetype;
12128 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "bitCount", input_type: basetype, expected_result_type: int_type);
12129 break;
12130 }
12131
12132 // Atomics
12133 case OpAtomicExchange:
12134 {
12135 uint32_t result_type = ops[0];
12136 uint32_t id = ops[1];
12137 uint32_t ptr = ops[2];
12138 // Ignore semantics for now, probably only relevant to CL.
12139 uint32_t val = ops[5];
12140 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
12141
12142 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: val, op);
12143 break;
12144 }
12145
12146 case OpAtomicCompareExchange:
12147 {
12148 uint32_t result_type = ops[0];
12149 uint32_t id = ops[1];
12150 uint32_t ptr = ops[2];
12151 uint32_t val = ops[6];
12152 uint32_t comp = ops[7];
12153 const char *op = check_atomic_image(id: ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
12154
12155 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: comp, op2: val, op);
12156 break;
12157 }
12158
12159 case OpAtomicLoad:
12160 {
12161 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
12162 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
12163 auto &type = expression_type(id: ops[2]);
12164 forced_temporaries.insert(x: ops[1]);
12165 bool atomic_image = check_atomic_image(id: ops[2]);
12166 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
12167 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
12168 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
12169 const char *increment = unsigned_type ? "0u" : "0";
12170 emit_op(result_type: ops[0], result_id: ops[1],
12171 rhs: join(ts&: op, ts: "(",
12172 ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
12173 flush_all_atomic_capable_variables();
12174 break;
12175 }
12176
12177 case OpAtomicStore:
12178 {
12179 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
12180 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
12181 uint32_t ptr = ops[0];
12182 // Ignore semantics for now, probably only relevant to CL.
12183 uint32_t val = ops[3];
12184 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
12185 statement(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ptr), ts: ", ", ts: to_expression(id: val), ts: ");");
12186 flush_all_atomic_capable_variables();
12187 break;
12188 }
12189
12190 case OpAtomicIIncrement:
12191 case OpAtomicIDecrement:
12192 {
12193 forced_temporaries.insert(x: ops[1]);
12194 auto &type = expression_type(id: ops[2]);
12195 if (type.storage == StorageClassAtomicCounter)
12196 {
12197 // Legacy GLSL stuff, not sure if this is relevant to support.
12198 if (opcode == OpAtomicIIncrement)
12199 GLSL_UFOP(atomicCounterIncrement);
12200 else
12201 GLSL_UFOP(atomicCounterDecrement);
12202 }
12203 else
12204 {
12205 bool atomic_image = check_atomic_image(id: ops[2]);
12206 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
12207 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
12208 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
12209
12210 const char *increment = nullptr;
12211 if (opcode == OpAtomicIIncrement && unsigned_type)
12212 increment = "1u";
12213 else if (opcode == OpAtomicIIncrement)
12214 increment = "1";
12215 else if (unsigned_type)
12216 increment = "uint(-1)";
12217 else
12218 increment = "-1";
12219
12220 emit_op(result_type: ops[0], result_id: ops[1],
12221 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
12222 }
12223
12224 flush_all_atomic_capable_variables();
12225 break;
12226 }
12227
12228 case OpAtomicIAdd:
12229 case OpAtomicFAddEXT:
12230 {
12231 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
12232 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12233 break;
12234 }
12235
12236 case OpAtomicISub:
12237 {
12238 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
12239 forced_temporaries.insert(x: ops[1]);
12240 auto expr = join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", -", ts: to_enclosed_expression(id: ops[5]), ts: ")");
12241 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: ops[2]) && should_forward(id: ops[5]));
12242 flush_all_atomic_capable_variables();
12243 break;
12244 }
12245
12246 case OpAtomicSMin:
12247 case OpAtomicUMin:
12248 {
12249 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMin" : "atomicMin";
12250 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12251 break;
12252 }
12253
12254 case OpAtomicSMax:
12255 case OpAtomicUMax:
12256 {
12257 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMax" : "atomicMax";
12258 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12259 break;
12260 }
12261
12262 case OpAtomicAnd:
12263 {
12264 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAnd" : "atomicAnd";
12265 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12266 break;
12267 }
12268
12269 case OpAtomicOr:
12270 {
12271 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicOr" : "atomicOr";
12272 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12273 break;
12274 }
12275
12276 case OpAtomicXor:
12277 {
12278 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicXor" : "atomicXor";
12279 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
12280 break;
12281 }
12282
12283 // Geometry shaders
12284 case OpEmitVertex:
12285 statement(ts: "EmitVertex();");
12286 break;
12287
12288 case OpEndPrimitive:
12289 statement(ts: "EndPrimitive();");
12290 break;
12291
12292 case OpEmitStreamVertex:
12293 {
12294 if (options.es)
12295 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
12296 else if (!options.es && options.version < 400)
12297 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
12298
12299 auto stream_expr = to_expression(id: ops[0]);
12300 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
12301 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
12302 statement(ts: "EmitStreamVertex(", ts&: stream_expr, ts: ");");
12303 break;
12304 }
12305
12306 case OpEndStreamPrimitive:
12307 {
12308 if (options.es)
12309 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
12310 else if (!options.es && options.version < 400)
12311 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
12312
12313 auto stream_expr = to_expression(id: ops[0]);
12314 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
12315 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
12316 statement(ts: "EndStreamPrimitive(", ts&: stream_expr, ts: ");");
12317 break;
12318 }
12319
12320 // Textures
12321 case OpImageSampleExplicitLod:
12322 case OpImageSampleProjExplicitLod:
12323 case OpImageSampleDrefExplicitLod:
12324 case OpImageSampleProjDrefExplicitLod:
12325 case OpImageSampleImplicitLod:
12326 case OpImageSampleProjImplicitLod:
12327 case OpImageSampleDrefImplicitLod:
12328 case OpImageSampleProjDrefImplicitLod:
12329 case OpImageFetch:
12330 case OpImageGather:
12331 case OpImageDrefGather:
12332 // Gets a bit hairy, so move this to a separate instruction.
12333 emit_texture_op(i: instruction, sparse: false);
12334 break;
12335
12336 case OpImageSparseSampleExplicitLod:
12337 case OpImageSparseSampleProjExplicitLod:
12338 case OpImageSparseSampleDrefExplicitLod:
12339 case OpImageSparseSampleProjDrefExplicitLod:
12340 case OpImageSparseSampleImplicitLod:
12341 case OpImageSparseSampleProjImplicitLod:
12342 case OpImageSparseSampleDrefImplicitLod:
12343 case OpImageSparseSampleProjDrefImplicitLod:
12344 case OpImageSparseFetch:
12345 case OpImageSparseGather:
12346 case OpImageSparseDrefGather:
12347 // Gets a bit hairy, so move this to a separate instruction.
12348 emit_texture_op(i: instruction, sparse: true);
12349 break;
12350
12351 case OpImageSparseTexelsResident:
12352 if (options.es)
12353 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
12354 require_extension_internal(ext: "GL_ARB_sparse_texture2");
12355 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "sparseTexelsResidentARB", input_type: int_type, expected_result_type: SPIRType::Boolean);
12356 break;
12357
12358 case OpImage:
12359 {
12360 uint32_t result_type = ops[0];
12361 uint32_t id = ops[1];
12362
12363 // Suppress usage tracking.
12364 auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[2]), forwarding: true, suppress_usage_tracking: true);
12365
12366 // When using the image, we need to know which variable it is actually loaded from.
12367 auto *var = maybe_get_backing_variable(chain: ops[2]);
12368 e.loaded_from = var ? var->self : ID(0);
12369 break;
12370 }
12371
12372 case OpImageQueryLod:
12373 {
12374 const char *op = nullptr;
12375 if (!options.es && options.version < 400)
12376 {
12377 require_extension_internal(ext: "GL_ARB_texture_query_lod");
12378 // For some reason, the ARB spec is all-caps.
12379 op = "textureQueryLOD";
12380 }
12381 else if (options.es)
12382 SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
12383 else
12384 op = "textureQueryLod";
12385
12386 auto sampler_expr = to_expression(id: ops[2]);
12387 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
12388 {
12389 if (maybe_get_backing_variable(chain: ops[2]))
12390 convert_non_uniform_expression(expr&: sampler_expr, ptr_id: ops[2]);
12391 else if (*backend.nonuniform_qualifier != '\0')
12392 sampler_expr = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: sampler_expr, ts: ")");
12393 }
12394
12395 bool forward = should_forward(id: ops[3]);
12396 emit_op(result_type: ops[0], result_id: ops[1],
12397 rhs: join(ts&: op, ts: "(", ts&: sampler_expr, ts: ", ", ts: to_unpacked_expression(id: ops[3]), ts: ")"),
12398 forwarding: forward);
12399 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
12400 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
12401 register_control_dependent_expression(expr: ops[1]);
12402 break;
12403 }
12404
12405 case OpImageQueryLevels:
12406 {
12407 uint32_t result_type = ops[0];
12408 uint32_t id = ops[1];
12409
12410 if (!options.es && options.version < 430)
12411 require_extension_internal(ext: "GL_ARB_texture_query_levels");
12412 if (options.es)
12413 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
12414
12415 auto expr = join(ts: "textureQueryLevels(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
12416 auto &restype = get<SPIRType>(id: ops[0]);
12417 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
12418 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
12419 break;
12420 }
12421
12422 case OpImageQuerySamples:
12423 {
12424 auto &type = expression_type(id: ops[2]);
12425 uint32_t result_type = ops[0];
12426 uint32_t id = ops[1];
12427
12428 string expr;
12429 if (type.image.sampled == 2)
12430 expr = join(ts: "imageSamples(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
12431 else
12432 expr = join(ts: "textureSamples(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
12433
12434 auto &restype = get<SPIRType>(id: ops[0]);
12435 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
12436 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
12437 break;
12438 }
12439
12440 case OpSampledImage:
12441 {
12442 uint32_t result_type = ops[0];
12443 uint32_t id = ops[1];
12444 emit_sampled_image_op(result_type, result_id: id, image_id: ops[2], samp_id: ops[3]);
12445 inherit_expression_dependencies(dst: id, source: ops[2]);
12446 inherit_expression_dependencies(dst: id, source: ops[3]);
12447 break;
12448 }
12449
12450 case OpImageQuerySizeLod:
12451 {
12452 uint32_t result_type = ops[0];
12453 uint32_t id = ops[1];
12454 uint32_t img = ops[2];
12455
12456 std::string fname = "textureSize";
12457 if (is_legacy_desktop())
12458 {
12459 auto &type = expression_type(id: img);
12460 auto &imgtype = get<SPIRType>(id: type.self);
12461 fname = legacy_tex_op(op: fname, imgtype, tex: img);
12462 }
12463 else if (is_legacy_es())
12464 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
12465
12466 auto expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: img), ts: ", ",
12467 ts: bitcast_expression(target_type: SPIRType::Int, arg: ops[3]), ts: ")");
12468 auto &restype = get<SPIRType>(id: ops[0]);
12469 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
12470 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
12471 break;
12472 }
12473
12474 // Image load/store
12475 case OpImageRead:
12476 case OpImageSparseRead:
12477 {
12478 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
12479 // not adding the proper qualifiers.
12480 // If it turns out we need to read the image after all, remove the qualifier and recompile.
12481 auto *var = maybe_get_backing_variable(chain: ops[2]);
12482 if (var)
12483 {
12484 auto &flags = get_decoration_bitset(id: var->self);
12485 if (flags.get(bit: DecorationNonReadable))
12486 {
12487 unset_decoration(id: var->self, decoration: DecorationNonReadable);
12488 force_recompile();
12489 }
12490 }
12491
12492 uint32_t result_type = ops[0];
12493 uint32_t id = ops[1];
12494
12495 bool pure;
12496 string imgexpr;
12497 auto &type = expression_type(id: ops[2]);
12498
12499 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
12500 {
12501 if (type.image.ms)
12502 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
12503
12504 auto itr =
12505 find_if(first: begin(cont&: pls_inputs), last: end(cont&: pls_inputs), pred: [var](const PlsRemap &pls) { return pls.id == var->self; });
12506
12507 if (itr == end(cont&: pls_inputs))
12508 {
12509 // For non-PLS inputs, we rely on subpass type remapping information to get it right
12510 // since ImageRead always returns 4-component vectors and the backing type is opaque.
12511 if (!var->remapped_components)
12512 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
12513 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: var->remapped_components, expr: to_expression(id: ops[2]));
12514 }
12515 else
12516 {
12517 // PLS input could have different number of components than what the SPIR expects, swizzle to
12518 // the appropriate vector size.
12519 uint32_t components = pls_format_to_components(format: itr->format);
12520 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: components, expr: to_expression(id: ops[2]));
12521 }
12522 pure = true;
12523 }
12524 else if (type.image.dim == DimSubpassData)
12525 {
12526 if (var && subpass_input_is_framebuffer_fetch(id: var->self))
12527 {
12528 imgexpr = to_expression(id: var->self);
12529 }
12530 else if (options.vulkan_semantics)
12531 {
12532 // With Vulkan semantics, use the proper Vulkan GLSL construct.
12533 if (type.image.ms)
12534 {
12535 uint32_t operands = ops[4];
12536 if (operands != ImageOperandsSampleMask || length != 6)
12537 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12538 "operand mask was used.");
12539
12540 uint32_t samples = ops[5];
12541 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts: to_expression(id: samples), ts: ")");
12542 }
12543 else
12544 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
12545 }
12546 else
12547 {
12548 if (type.image.ms)
12549 {
12550 uint32_t operands = ops[4];
12551 if (operands != ImageOperandsSampleMask || length != 6)
12552 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12553 "operand mask was used.");
12554
12555 uint32_t samples = ops[5];
12556 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), ",
12557 ts: to_expression(id: samples), ts: ")");
12558 }
12559 else
12560 {
12561 // Implement subpass loads via texture barrier style sampling.
12562 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), 0)");
12563 }
12564 }
12565 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
12566 pure = true;
12567 }
12568 else
12569 {
12570 bool sparse = opcode == OpImageSparseRead;
12571 uint32_t sparse_code_id = 0;
12572 uint32_t sparse_texel_id = 0;
12573 if (sparse)
12574 emit_sparse_feedback_temporaries(result_type_id: ops[0], id: ops[1], feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
12575
12576 // imageLoad only accepts int coords, not uint.
12577 auto coord_expr = to_expression(id: ops[3]);
12578 auto target_coord_type = expression_type(id: ops[3]);
12579 target_coord_type.basetype = SPIRType::Int;
12580 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
12581
12582 // ES needs to emulate 1D images as 2D.
12583 if (type.image.dim == Dim1D && options.es)
12584 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
12585
12586 // Plain image load/store.
12587 if (sparse)
12588 {
12589 if (type.image.ms)
12590 {
12591 uint32_t operands = ops[4];
12592 if (operands != ImageOperandsSampleMask || length != 6)
12593 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12594 "operand mask was used.");
12595
12596 uint32_t samples = ops[5];
12597 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
12598 ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
12599 }
12600 else
12601 {
12602 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
12603 ts&: coord_expr, ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
12604 }
12605 imgexpr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ",
12606 ts: to_expression(id: sparse_texel_id), ts: ")");
12607 }
12608 else
12609 {
12610 if (type.image.ms)
12611 {
12612 uint32_t operands = ops[4];
12613 if (operands != ImageOperandsSampleMask || length != 6)
12614 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12615 "operand mask was used.");
12616
12617 uint32_t samples = ops[5];
12618 imgexpr =
12619 join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ")");
12620 }
12621 else
12622 imgexpr = join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ")");
12623 }
12624
12625 if (!sparse)
12626 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
12627 pure = false;
12628 }
12629
12630 if (var)
12631 {
12632 bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
12633 auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: forward);
12634
12635 // We only need to track dependencies if we're reading from image load/store.
12636 if (!pure)
12637 {
12638 e.loaded_from = var->self;
12639 if (forward)
12640 var->dependees.push_back(t: id);
12641 }
12642 }
12643 else
12644 emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: false);
12645
12646 inherit_expression_dependencies(dst: id, source: ops[2]);
12647 if (type.image.ms)
12648 inherit_expression_dependencies(dst: id, source: ops[5]);
12649 break;
12650 }
12651
12652 case OpImageTexelPointer:
12653 {
12654 uint32_t result_type = ops[0];
12655 uint32_t id = ops[1];
12656
12657 auto coord_expr = to_expression(id: ops[3]);
12658 auto target_coord_type = expression_type(id: ops[3]);
12659 target_coord_type.basetype = SPIRType::Int;
12660 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
12661
12662 auto expr = join(ts: to_expression(id: ops[2]), ts: ", ", ts&: coord_expr);
12663 auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
12664
12665 // When using the pointer, we need to know which variable it is actually loaded from.
12666 auto *var = maybe_get_backing_variable(chain: ops[2]);
12667 e.loaded_from = var ? var->self : ID(0);
12668 inherit_expression_dependencies(dst: id, source: ops[3]);
12669 break;
12670 }
12671
12672 case OpImageWrite:
12673 {
12674 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
12675 // not adding the proper qualifiers.
12676 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
12677 auto *var = maybe_get_backing_variable(chain: ops[0]);
12678 if (var)
12679 {
12680 if (has_decoration(id: var->self, decoration: DecorationNonWritable))
12681 {
12682 unset_decoration(id: var->self, decoration: DecorationNonWritable);
12683 force_recompile();
12684 }
12685 }
12686
12687 auto &type = expression_type(id: ops[0]);
12688 auto &value_type = expression_type(id: ops[2]);
12689 auto store_type = value_type;
12690 store_type.vecsize = 4;
12691
12692 // imageStore only accepts int coords, not uint.
12693 auto coord_expr = to_expression(id: ops[1]);
12694 auto target_coord_type = expression_type(id: ops[1]);
12695 target_coord_type.basetype = SPIRType::Int;
12696 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[1]).basetype, expr: coord_expr);
12697
12698 // ES needs to emulate 1D images as 2D.
12699 if (type.image.dim == Dim1D && options.es)
12700 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
12701
12702 if (type.image.ms)
12703 {
12704 uint32_t operands = ops[3];
12705 if (operands != ImageOperandsSampleMask || length != 5)
12706 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
12707 uint32_t samples = ops[4];
12708 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ",
12709 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
12710 }
12711 else
12712 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ",
12713 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
12714
12715 if (var && variable_storage_is_aliased(var: *var))
12716 flush_all_aliased_variables();
12717 break;
12718 }
12719
12720 case OpImageQuerySize:
12721 {
12722 auto &type = expression_type(id: ops[2]);
12723 uint32_t result_type = ops[0];
12724 uint32_t id = ops[1];
12725
12726 if (type.basetype == SPIRType::Image)
12727 {
12728 string expr;
12729 if (type.image.sampled == 2)
12730 {
12731 if (!options.es && options.version < 430)
12732 require_extension_internal(ext: "GL_ARB_shader_image_size");
12733 else if (options.es && options.version < 310)
12734 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
12735
12736 // The size of an image is always constant.
12737 expr = join(ts: "imageSize(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
12738 }
12739 else
12740 {
12741 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
12742 std::string fname = "textureSize";
12743 if (is_legacy())
12744 {
12745 auto &imgtype = get<SPIRType>(id: type.self);
12746 fname = legacy_tex_op(op: fname, imgtype, tex: ops[2]);
12747 }
12748 expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
12749 }
12750
12751 auto &restype = get<SPIRType>(id: ops[0]);
12752 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
12753 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
12754 }
12755 else
12756 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
12757 break;
12758 }
12759
12760 // Compute
12761 case OpControlBarrier:
12762 case OpMemoryBarrier:
12763 {
12764 uint32_t execution_scope = 0;
12765 uint32_t memory;
12766 uint32_t semantics;
12767
12768 if (opcode == OpMemoryBarrier)
12769 {
12770 memory = evaluate_constant_u32(id: ops[0]);
12771 semantics = evaluate_constant_u32(id: ops[1]);
12772 }
12773 else
12774 {
12775 execution_scope = evaluate_constant_u32(id: ops[0]);
12776 memory = evaluate_constant_u32(id: ops[1]);
12777 semantics = evaluate_constant_u32(id: ops[2]);
12778 }
12779
12780 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
12781 {
12782 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
12783 if (opcode != OpControlBarrier)
12784 {
12785 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMemBarrier);
12786 }
12787 else
12788 {
12789 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBarrier);
12790 }
12791 }
12792
12793 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
12794 {
12795 // Control shaders only have barriers, and it implies memory barriers.
12796 if (opcode == OpControlBarrier)
12797 statement(ts: "barrier();");
12798 break;
12799 }
12800
12801 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
12802 semantics = mask_relevant_memory_semantics(semantics);
12803
12804 if (opcode == OpMemoryBarrier)
12805 {
12806 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
12807 // does what we need, so we avoid redundant barriers.
12808 const Instruction *next = get_next_instruction_in_block(instr: instruction);
12809 if (next && next->op == OpControlBarrier)
12810 {
12811 auto *next_ops = stream(instr: *next);
12812 uint32_t next_memory = evaluate_constant_u32(id: next_ops[1]);
12813 uint32_t next_semantics = evaluate_constant_u32(id: next_ops[2]);
12814 next_semantics = mask_relevant_memory_semantics(semantics: next_semantics);
12815
12816 bool memory_scope_covered = false;
12817 if (next_memory == memory)
12818 memory_scope_covered = true;
12819 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
12820 {
12821 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
12822 // scope does not have to match.
12823 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
12824 (memory == ScopeDevice || memory == ScopeWorkgroup))
12825 {
12826 memory_scope_covered = true;
12827 }
12828 }
12829 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
12830 {
12831 // The control barrier has device scope, but the memory barrier just has workgroup scope.
12832 memory_scope_covered = true;
12833 }
12834
12835 // If we have the same memory scope, and all memory types are covered, we're good.
12836 if (memory_scope_covered && (semantics & next_semantics) == semantics)
12837 break;
12838 }
12839 }
12840
12841 // We are synchronizing some memory or syncing execution,
12842 // so we cannot forward any loads beyond the memory barrier.
12843 if (semantics || opcode == OpControlBarrier)
12844 {
12845 assert(current_emitting_block);
12846 flush_control_dependent_expressions(block: current_emitting_block->self);
12847 flush_all_active_variables();
12848 }
12849
12850 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
12851 {
12852 if (semantics == MemorySemanticsWorkgroupMemoryMask)
12853 {
12854 // OpControlBarrier implies a memory barrier for shared memory as well.
12855 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
12856 if (!implies_shared_barrier)
12857 statement(ts: "memoryBarrierShared();");
12858 }
12859 else if (semantics != 0)
12860 statement(ts: "groupMemoryBarrier();");
12861 }
12862 else if (memory == ScopeSubgroup)
12863 {
12864 const uint32_t all_barriers =
12865 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12866
12867 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12868 {
12869 // These are not relevant for GLSL, but assume it means memoryBarrier().
12870 // memoryBarrier() does everything, so no need to test anything else.
12871 statement(ts: "subgroupMemoryBarrier();");
12872 }
12873 else if ((semantics & all_barriers) == all_barriers)
12874 {
12875 // Short-hand instead of emitting 3 barriers.
12876 statement(ts: "subgroupMemoryBarrier();");
12877 }
12878 else
12879 {
12880 // Pick out individual barriers.
12881 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12882 statement(ts: "subgroupMemoryBarrierShared();");
12883 if (semantics & MemorySemanticsUniformMemoryMask)
12884 statement(ts: "subgroupMemoryBarrierBuffer();");
12885 if (semantics & MemorySemanticsImageMemoryMask)
12886 statement(ts: "subgroupMemoryBarrierImage();");
12887 }
12888 }
12889 else
12890 {
12891 const uint32_t all_barriers =
12892 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12893
12894 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12895 {
12896 // These are not relevant for GLSL, but assume it means memoryBarrier().
12897 // memoryBarrier() does everything, so no need to test anything else.
12898 statement(ts: "memoryBarrier();");
12899 }
12900 else if ((semantics & all_barriers) == all_barriers)
12901 {
12902 // Short-hand instead of emitting 4 barriers.
12903 statement(ts: "memoryBarrier();");
12904 }
12905 else
12906 {
12907 // Pick out individual barriers.
12908 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12909 statement(ts: "memoryBarrierShared();");
12910 if (semantics & MemorySemanticsUniformMemoryMask)
12911 statement(ts: "memoryBarrierBuffer();");
12912 if (semantics & MemorySemanticsImageMemoryMask)
12913 statement(ts: "memoryBarrierImage();");
12914 }
12915 }
12916
12917 if (opcode == OpControlBarrier)
12918 {
12919 if (execution_scope == ScopeSubgroup)
12920 statement(ts: "subgroupBarrier();");
12921 else
12922 statement(ts: "barrier();");
12923 }
12924 break;
12925 }
12926
12927 case OpExtInst:
12928 {
12929 uint32_t extension_set = ops[2];
12930 auto ext = get<SPIRExtension>(id: extension_set).ext;
12931
12932 if (ext == SPIRExtension::GLSL)
12933 {
12934 emit_glsl_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length: length - 4);
12935 }
12936 else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
12937 {
12938 emit_spv_amd_shader_ballot_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
12939 }
12940 else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
12941 {
12942 emit_spv_amd_shader_explicit_vertex_parameter_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
12943 }
12944 else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
12945 {
12946 emit_spv_amd_shader_trinary_minmax_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
12947 }
12948 else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
12949 {
12950 emit_spv_amd_gcn_shader_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
12951 }
12952 else if (ext == SPIRExtension::SPV_debug_info)
12953 {
12954 break; // Ignore SPIR-V debug information extended instructions.
12955 }
12956 else if (ext == SPIRExtension::NonSemanticDebugPrintf)
12957 {
12958 // Operation 1 is printf.
12959 if (ops[3] == 1)
12960 {
12961 if (!options.vulkan_semantics)
12962 SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
12963 require_extension_internal(ext: "GL_EXT_debug_printf");
12964 auto &format_string = get<SPIRString>(id: ops[4]).str;
12965 string expr = join(ts: "debugPrintfEXT(\"", ts&: format_string, ts: "\"");
12966 for (uint32_t i = 5; i < length; i++)
12967 {
12968 expr += ", ";
12969 expr += to_expression(id: ops[i]);
12970 }
12971 statement(ts&: expr, ts: ");");
12972 }
12973 }
12974 else
12975 {
12976 statement(ts: "// unimplemented ext op ", ts: instruction.op);
12977 break;
12978 }
12979
12980 break;
12981 }
12982
12983 // Legacy sub-group stuff ...
12984 case OpSubgroupBallotKHR:
12985 {
12986 uint32_t result_type = ops[0];
12987 uint32_t id = ops[1];
12988 string expr;
12989 expr = join(ts: "uvec4(unpackUint2x32(ballotARB(" + to_expression(id: ops[2]) + ")), 0u, 0u)");
12990 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
12991
12992 require_extension_internal(ext: "GL_ARB_shader_ballot");
12993 inherit_expression_dependencies(dst: id, source: ops[2]);
12994 register_control_dependent_expression(expr: ops[1]);
12995 break;
12996 }
12997
12998 case OpSubgroupFirstInvocationKHR:
12999 {
13000 uint32_t result_type = ops[0];
13001 uint32_t id = ops[1];
13002 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "readFirstInvocationARB");
13003
13004 require_extension_internal(ext: "GL_ARB_shader_ballot");
13005 register_control_dependent_expression(expr: ops[1]);
13006 break;
13007 }
13008
13009 case OpSubgroupReadInvocationKHR:
13010 {
13011 uint32_t result_type = ops[0];
13012 uint32_t id = ops[1];
13013 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "readInvocationARB");
13014
13015 require_extension_internal(ext: "GL_ARB_shader_ballot");
13016 register_control_dependent_expression(expr: ops[1]);
13017 break;
13018 }
13019
13020 case OpSubgroupAllKHR:
13021 {
13022 uint32_t result_type = ops[0];
13023 uint32_t id = ops[1];
13024 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsARB");
13025
13026 require_extension_internal(ext: "GL_ARB_shader_group_vote");
13027 register_control_dependent_expression(expr: ops[1]);
13028 break;
13029 }
13030
13031 case OpSubgroupAnyKHR:
13032 {
13033 uint32_t result_type = ops[0];
13034 uint32_t id = ops[1];
13035 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "anyInvocationARB");
13036
13037 require_extension_internal(ext: "GL_ARB_shader_group_vote");
13038 register_control_dependent_expression(expr: ops[1]);
13039 break;
13040 }
13041
13042 case OpSubgroupAllEqualKHR:
13043 {
13044 uint32_t result_type = ops[0];
13045 uint32_t id = ops[1];
13046 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsEqualARB");
13047
13048 require_extension_internal(ext: "GL_ARB_shader_group_vote");
13049 register_control_dependent_expression(expr: ops[1]);
13050 break;
13051 }
13052
13053 case OpGroupIAddNonUniformAMD:
13054 case OpGroupFAddNonUniformAMD:
13055 {
13056 uint32_t result_type = ops[0];
13057 uint32_t id = ops[1];
13058 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "addInvocationsNonUniformAMD");
13059
13060 require_extension_internal(ext: "GL_AMD_shader_ballot");
13061 register_control_dependent_expression(expr: ops[1]);
13062 break;
13063 }
13064
13065 case OpGroupFMinNonUniformAMD:
13066 case OpGroupUMinNonUniformAMD:
13067 case OpGroupSMinNonUniformAMD:
13068 {
13069 uint32_t result_type = ops[0];
13070 uint32_t id = ops[1];
13071 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "minInvocationsNonUniformAMD");
13072
13073 require_extension_internal(ext: "GL_AMD_shader_ballot");
13074 register_control_dependent_expression(expr: ops[1]);
13075 break;
13076 }
13077
13078 case OpGroupFMaxNonUniformAMD:
13079 case OpGroupUMaxNonUniformAMD:
13080 case OpGroupSMaxNonUniformAMD:
13081 {
13082 uint32_t result_type = ops[0];
13083 uint32_t id = ops[1];
13084 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "maxInvocationsNonUniformAMD");
13085
13086 require_extension_internal(ext: "GL_AMD_shader_ballot");
13087 register_control_dependent_expression(expr: ops[1]);
13088 break;
13089 }
13090
13091 case OpFragmentMaskFetchAMD:
13092 {
13093 auto &type = expression_type(id: ops[2]);
13094 uint32_t result_type = ops[0];
13095 uint32_t id = ops[1];
13096
13097 if (type.image.dim == spv::DimSubpassData)
13098 {
13099 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "fragmentMaskFetchAMD");
13100 }
13101 else
13102 {
13103 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "fragmentMaskFetchAMD");
13104 }
13105
13106 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
13107 break;
13108 }
13109
13110 case OpFragmentFetchAMD:
13111 {
13112 auto &type = expression_type(id: ops[2]);
13113 uint32_t result_type = ops[0];
13114 uint32_t id = ops[1];
13115
13116 if (type.image.dim == spv::DimSubpassData)
13117 {
13118 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[4], op: "fragmentFetchAMD");
13119 }
13120 else
13121 {
13122 emit_trinary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op2: ops[4], op: "fragmentFetchAMD");
13123 }
13124
13125 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
13126 break;
13127 }
13128
13129 // Vulkan 1.1 sub-group stuff ...
13130 case OpGroupNonUniformElect:
13131 case OpGroupNonUniformBroadcast:
13132 case OpGroupNonUniformBroadcastFirst:
13133 case OpGroupNonUniformBallot:
13134 case OpGroupNonUniformInverseBallot:
13135 case OpGroupNonUniformBallotBitExtract:
13136 case OpGroupNonUniformBallotBitCount:
13137 case OpGroupNonUniformBallotFindLSB:
13138 case OpGroupNonUniformBallotFindMSB:
13139 case OpGroupNonUniformShuffle:
13140 case OpGroupNonUniformShuffleXor:
13141 case OpGroupNonUniformShuffleUp:
13142 case OpGroupNonUniformShuffleDown:
13143 case OpGroupNonUniformAll:
13144 case OpGroupNonUniformAny:
13145 case OpGroupNonUniformAllEqual:
13146 case OpGroupNonUniformFAdd:
13147 case OpGroupNonUniformIAdd:
13148 case OpGroupNonUniformFMul:
13149 case OpGroupNonUniformIMul:
13150 case OpGroupNonUniformFMin:
13151 case OpGroupNonUniformFMax:
13152 case OpGroupNonUniformSMin:
13153 case OpGroupNonUniformSMax:
13154 case OpGroupNonUniformUMin:
13155 case OpGroupNonUniformUMax:
13156 case OpGroupNonUniformBitwiseAnd:
13157 case OpGroupNonUniformBitwiseOr:
13158 case OpGroupNonUniformBitwiseXor:
13159 case OpGroupNonUniformLogicalAnd:
13160 case OpGroupNonUniformLogicalOr:
13161 case OpGroupNonUniformLogicalXor:
13162 case OpGroupNonUniformQuadSwap:
13163 case OpGroupNonUniformQuadBroadcast:
13164 emit_subgroup_op(i: instruction);
13165 break;
13166
13167 case OpFUnordEqual:
13168 case OpFUnordLessThan:
13169 case OpFUnordGreaterThan:
13170 case OpFUnordLessThanEqual:
13171 case OpFUnordGreaterThanEqual:
13172 {
13173 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
13174 // but glslang always emits ordered floating point compares for GLSL.
13175 // To get unordered compares, we can test the opposite thing and invert the result.
13176 // This way, we force true when there is any NaN present.
13177 uint32_t op0 = ops[2];
13178 uint32_t op1 = ops[3];
13179
13180 string expr;
13181 if (expression_type(id: op0).vecsize > 1)
13182 {
13183 const char *comp_op = nullptr;
13184 switch (opcode)
13185 {
13186 case OpFUnordEqual:
13187 comp_op = "notEqual";
13188 break;
13189
13190 case OpFUnordLessThan:
13191 comp_op = "greaterThanEqual";
13192 break;
13193
13194 case OpFUnordLessThanEqual:
13195 comp_op = "greaterThan";
13196 break;
13197
13198 case OpFUnordGreaterThan:
13199 comp_op = "lessThanEqual";
13200 break;
13201
13202 case OpFUnordGreaterThanEqual:
13203 comp_op = "lessThan";
13204 break;
13205
13206 default:
13207 assert(0);
13208 break;
13209 }
13210
13211 expr = join(ts: "not(", ts&: comp_op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: "))");
13212 }
13213 else
13214 {
13215 const char *comp_op = nullptr;
13216 switch (opcode)
13217 {
13218 case OpFUnordEqual:
13219 comp_op = " != ";
13220 break;
13221
13222 case OpFUnordLessThan:
13223 comp_op = " >= ";
13224 break;
13225
13226 case OpFUnordLessThanEqual:
13227 comp_op = " > ";
13228 break;
13229
13230 case OpFUnordGreaterThan:
13231 comp_op = " <= ";
13232 break;
13233
13234 case OpFUnordGreaterThanEqual:
13235 comp_op = " < ";
13236 break;
13237
13238 default:
13239 assert(0);
13240 break;
13241 }
13242
13243 expr = join(ts: "!(", ts: to_enclosed_unpacked_expression(id: op0), ts&: comp_op, ts: to_enclosed_unpacked_expression(id: op1), ts: ")");
13244 }
13245
13246 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
13247 inherit_expression_dependencies(dst: ops[1], source: op0);
13248 inherit_expression_dependencies(dst: ops[1], source: op1);
13249 break;
13250 }
13251
13252 case OpReportIntersectionKHR:
13253 // NV is same opcode.
13254 forced_temporaries.insert(x: ops[1]);
13255 if (ray_tracing_is_khr)
13256 GLSL_BFOP(reportIntersectionEXT);
13257 else
13258 GLSL_BFOP(reportIntersectionNV);
13259 flush_control_dependent_expressions(block: current_emitting_block->self);
13260 break;
13261 case OpIgnoreIntersectionNV:
13262 // KHR variant is a terminator.
13263 statement(ts: "ignoreIntersectionNV();");
13264 flush_control_dependent_expressions(block: current_emitting_block->self);
13265 break;
13266 case OpTerminateRayNV:
13267 // KHR variant is a terminator.
13268 statement(ts: "terminateRayNV();");
13269 flush_control_dependent_expressions(block: current_emitting_block->self);
13270 break;
13271 case OpTraceNV:
13272 statement(ts: "traceNV(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
13273 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
13274 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
13275 ts: to_expression(id: ops[9]), ts: ", ", ts: to_expression(id: ops[10]), ts: ");");
13276 flush_control_dependent_expressions(block: current_emitting_block->self);
13277 break;
13278 case OpTraceRayKHR:
13279 if (!has_decoration(id: ops[10], decoration: DecorationLocation))
13280 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
13281 statement(ts: "traceRayEXT(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
13282 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
13283 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
13284 ts: to_expression(id: ops[9]), ts: ", ", ts: get_decoration(id: ops[10], decoration: DecorationLocation), ts: ");");
13285 flush_control_dependent_expressions(block: current_emitting_block->self);
13286 break;
13287 case OpExecuteCallableNV:
13288 statement(ts: "executeCallableNV(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
13289 flush_control_dependent_expressions(block: current_emitting_block->self);
13290 break;
13291 case OpExecuteCallableKHR:
13292 if (!has_decoration(id: ops[1], decoration: DecorationLocation))
13293 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
13294 statement(ts: "executeCallableEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: get_decoration(id: ops[1], decoration: DecorationLocation), ts: ");");
13295 flush_control_dependent_expressions(block: current_emitting_block->self);
13296 break;
13297
13298 // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
13299 case OpRayQueryInitializeKHR:
13300 flush_variable_declaration(id: ops[0]);
13301 statement(ts: "rayQueryInitializeEXT(",
13302 ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ",
13303 ts: to_expression(id: ops[2]), ts: ", ", ts: to_expression(id: ops[3]), ts: ", ",
13304 ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
13305 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ");");
13306 break;
13307 case OpRayQueryProceedKHR:
13308 flush_variable_declaration(id: ops[0]);
13309 emit_op(result_type: ops[0], result_id: ops[1], rhs: join(ts: "rayQueryProceedEXT(", ts: to_expression(id: ops[2]), ts: ")"), forwarding: false);
13310 break;
13311 case OpRayQueryTerminateKHR:
13312 flush_variable_declaration(id: ops[0]);
13313 statement(ts: "rayQueryTerminateEXT(", ts: to_expression(id: ops[0]), ts: ");");
13314 break;
13315 case OpRayQueryGenerateIntersectionKHR:
13316 flush_variable_declaration(id: ops[0]);
13317 statement(ts: "rayQueryGenerateIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
13318 break;
13319 case OpRayQueryConfirmIntersectionKHR:
13320 flush_variable_declaration(id: ops[0]);
13321 statement(ts: "rayQueryConfirmIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ");");
13322 break;
13323#define GLSL_RAY_QUERY_GET_OP(op) \
13324 case OpRayQueryGet##op##KHR: \
13325 flush_variable_declaration(ops[2]); \
13326 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
13327 break
13328#define GLSL_RAY_QUERY_GET_OP2(op) \
13329 case OpRayQueryGet##op##KHR: \
13330 flush_variable_declaration(ops[2]); \
13331 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
13332 break
13333 GLSL_RAY_QUERY_GET_OP(RayTMin);
13334 GLSL_RAY_QUERY_GET_OP(RayFlags);
13335 GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
13336 GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
13337 GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
13338 GLSL_RAY_QUERY_GET_OP2(IntersectionType);
13339 GLSL_RAY_QUERY_GET_OP2(IntersectionT);
13340 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
13341 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
13342 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
13343 GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
13344 GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
13345 GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
13346 GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
13347 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
13348 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
13349 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
13350 GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
13351#undef GLSL_RAY_QUERY_GET_OP
13352#undef GLSL_RAY_QUERY_GET_OP2
13353
13354 case OpConvertUToAccelerationStructureKHR:
13355 require_extension_internal(ext: "GL_EXT_ray_tracing");
13356 GLSL_UFOP(accelerationStructureEXT);
13357 break;
13358
13359 case OpConvertUToPtr:
13360 {
13361 auto &type = get<SPIRType>(id: ops[0]);
13362 if (type.storage != StorageClassPhysicalStorageBufferEXT)
13363 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
13364
13365 auto &in_type = expression_type(id: ops[2]);
13366 if (in_type.vecsize == 2)
13367 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
13368
13369 auto op = type_to_glsl(type);
13370 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
13371 break;
13372 }
13373
13374 case OpConvertPtrToU:
13375 {
13376 auto &type = get<SPIRType>(id: ops[0]);
13377 auto &ptr_type = expression_type(id: ops[2]);
13378 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
13379 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
13380
13381 if (type.vecsize == 2)
13382 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
13383
13384 auto op = type_to_glsl(type);
13385 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
13386 break;
13387 }
13388
13389 case OpUndef:
13390 // Undefined value has been declared.
13391 break;
13392
13393 case OpLine:
13394 {
13395 emit_line_directive(file_id: ops[0], line_literal: ops[1]);
13396 break;
13397 }
13398
13399 case OpNoLine:
13400 break;
13401
13402 case OpDemoteToHelperInvocationEXT:
13403 if (!options.vulkan_semantics)
13404 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
13405 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
13406 statement(ts&: backend.demote_literal, ts: ";");
13407 break;
13408
13409 case OpIsHelperInvocationEXT:
13410 if (!options.vulkan_semantics)
13411 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
13412 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
13413 // Helper lane state with demote is volatile by nature.
13414 // Do not forward this.
13415 emit_op(result_type: ops[0], result_id: ops[1], rhs: "helperInvocationEXT()", forwarding: false);
13416 break;
13417
13418 case OpBeginInvocationInterlockEXT:
13419 // If the interlock is complex, we emit this elsewhere.
13420 if (!interlocked_is_complex)
13421 {
13422 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
13423 flush_all_active_variables();
13424 // Make sure forwarding doesn't propagate outside interlock region.
13425 }
13426 break;
13427
13428 case OpEndInvocationInterlockEXT:
13429 // If the interlock is complex, we emit this elsewhere.
13430 if (!interlocked_is_complex)
13431 {
13432 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
13433 flush_all_active_variables();
13434 // Make sure forwarding doesn't propagate outside interlock region.
13435 }
13436 break;
13437
13438 default:
13439 statement(ts: "// unimplemented op ", ts: instruction.op);
13440 break;
13441 }
13442}
13443
13444// Appends function arguments, mapped from global variables, beyond the specified arg index.
13445// This is used when a function call uses fewer arguments than the function defines.
13446// This situation may occur if the function signature has been dynamically modified to
13447// extract global variables referenced from within the function, and convert them to
13448// function arguments. This is necessary for shader languages that do not support global
13449// access to shader input content from within a function (eg. Metal). Each additional
13450// function args uses the name of the global variable. Function nesting will modify the
13451// functions and function calls all the way up the nesting chain.
13452void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
13453{
13454 auto &args = func.arguments;
13455 uint32_t arg_cnt = uint32_t(args.size());
13456 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
13457 {
13458 auto &arg = args[arg_idx];
13459 assert(arg.alias_global_variable);
13460
13461 // If the underlying variable needs to be declared
13462 // (ie. a local variable with deferred declaration), do so now.
13463 uint32_t var_id = get<SPIRVariable>(id: arg.id).basevariable;
13464 if (var_id)
13465 flush_variable_declaration(id: var_id);
13466
13467 arglist.push_back(t: to_func_call_arg(arg, id: arg.id));
13468 }
13469}
13470
13471string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
13472{
13473 if (type.type_alias != TypeID(0) &&
13474 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
13475 {
13476 return to_member_name(type: get<SPIRType>(id: type.type_alias), index);
13477 }
13478
13479 auto &memb = ir.meta[type.self].members;
13480 if (index < memb.size() && !memb[index].alias.empty())
13481 return memb[index].alias;
13482 else
13483 return join(ts: "_m", ts&: index);
13484}
13485
13486string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
13487{
13488 return join(ts: ".", ts: to_member_name(type, index));
13489}
13490
13491string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
13492{
13493 string ret;
13494 auto *member_type = &type;
13495 for (auto &index : indices)
13496 {
13497 ret += join(ts: ".", ts: to_member_name(type: *member_type, index));
13498 member_type = &get<SPIRType>(id: member_type->member_types[index]);
13499 }
13500 return ret;
13501}
13502
13503void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
13504{
13505 auto &memb = ir.meta[type.self].members;
13506 if (index < memb.size() && !memb[index].alias.empty())
13507 {
13508 auto &name = memb[index].alias;
13509 if (name.empty())
13510 return;
13511
13512 ParsedIR::sanitize_identifier(str&: name, member: true, allow_reserved_prefixes: true);
13513 update_name_cache(cache&: type.member_name_cache, name);
13514 }
13515}
13516
13517// Checks whether the ID is a row_major matrix that requires conversion before use
13518bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
13519{
13520 // Natively supported row-major matrices do not need to be converted.
13521 // Legacy targets do not support row major.
13522 if (backend.native_row_major_matrix && !is_legacy())
13523 return false;
13524
13525 auto *e = maybe_get<SPIRExpression>(id);
13526 if (e)
13527 return e->need_transpose;
13528 else
13529 return has_decoration(id, decoration: DecorationRowMajor);
13530}
13531
13532// Checks whether the member is a row_major matrix that requires conversion before use
13533bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
13534{
13535 // Natively supported row-major matrices do not need to be converted.
13536 if (backend.native_row_major_matrix && !is_legacy())
13537 return false;
13538
13539 // Non-matrix or column-major matrix types do not need to be converted.
13540 if (!has_member_decoration(id: type.self, index, decoration: DecorationRowMajor))
13541 return false;
13542
13543 // Only square row-major matrices can be converted at this time.
13544 // Converting non-square matrices will require defining custom GLSL function that
13545 // swaps matrix elements while retaining the original dimensional form of the matrix.
13546 const auto mbr_type = get<SPIRType>(id: type.member_types[index]);
13547 if (mbr_type.columns != mbr_type.vecsize)
13548 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
13549
13550 return true;
13551}
13552
13553// Checks if we need to remap physical type IDs when declaring the type in a buffer.
13554bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
13555{
13556 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
13557}
13558
13559// Checks whether the member is in packed data type, that might need to be unpacked.
13560bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
13561{
13562 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypePacked);
13563}
13564
13565// Wraps the expression string in a function call that converts the
13566// row_major matrix result of the expression to a column_major matrix.
13567// Base implementation uses the standard library transpose() function.
13568// Subclasses may override to use a different function.
13569string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
13570 bool /*is_packed*/)
13571{
13572 strip_enclosed_expression(expr&: exp_str);
13573 if (!is_matrix(type: exp_type))
13574 {
13575 auto column_index = exp_str.find_last_of(c: '[');
13576 if (column_index == string::npos)
13577 return exp_str;
13578
13579 auto column_expr = exp_str.substr(pos: column_index);
13580 exp_str.resize(n: column_index);
13581
13582 auto transposed_expr = type_to_glsl_constructor(type: exp_type) + "(";
13583
13584 // Loading a column from a row-major matrix. Unroll the load.
13585 for (uint32_t c = 0; c < exp_type.vecsize; c++)
13586 {
13587 transposed_expr += join(ts&: exp_str, ts: '[', ts&: c, ts: ']', ts&: column_expr);
13588 if (c + 1 < exp_type.vecsize)
13589 transposed_expr += ", ";
13590 }
13591
13592 transposed_expr += ")";
13593 return transposed_expr;
13594 }
13595 else if (options.version < 120)
13596 {
13597 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
13598 // these GLSL versions do not support non-square matrices.
13599 if (exp_type.vecsize == 2 && exp_type.columns == 2)
13600 {
13601 if (!requires_transpose_2x2)
13602 {
13603 requires_transpose_2x2 = true;
13604 force_recompile();
13605 }
13606 }
13607 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
13608 {
13609 if (!requires_transpose_3x3)
13610 {
13611 requires_transpose_3x3 = true;
13612 force_recompile();
13613 }
13614 }
13615 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
13616 {
13617 if (!requires_transpose_4x4)
13618 {
13619 requires_transpose_4x4 = true;
13620 force_recompile();
13621 }
13622 }
13623 else
13624 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
13625 return join(ts: "spvTranspose(", ts&: exp_str, ts: ")");
13626 }
13627 else
13628 return join(ts: "transpose(", ts&: exp_str, ts: ")");
13629}
13630
13631string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
13632{
13633 string type_name = type_to_glsl(type, id);
13634 remap_variable_type_name(type, var_name: name, type_name);
13635 return join(ts&: type_name, ts: " ", ts: name, ts: type_to_array_glsl(type));
13636}
13637
13638bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
13639{
13640 return var.storage == storage;
13641}
13642
13643// Emit a structure member. Subclasses may override to modify output,
13644// or to dynamically add a padding member if needed.
13645void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
13646 const string &qualifier, uint32_t)
13647{
13648 auto &membertype = get<SPIRType>(id: member_type_id);
13649
13650 Bitset memberflags;
13651 auto &memb = ir.meta[type.self].members;
13652 if (index < memb.size())
13653 memberflags = memb[index].decoration_flags;
13654
13655 string qualifiers;
13656 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
13657 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
13658
13659 if (is_block)
13660 qualifiers = to_interpolation_qualifiers(flags: memberflags);
13661
13662 statement(ts: layout_for_member(type, index), ts&: qualifiers, ts: qualifier, ts: flags_to_qualifiers_glsl(type: membertype, flags: memberflags),
13663 ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts: ";");
13664}
13665
13666void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
13667{
13668}
13669
13670string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
13671{
13672 // GL_EXT_buffer_reference variables can be marked as restrict.
13673 if (flags.get(bit: DecorationRestrictPointerEXT))
13674 return "restrict ";
13675
13676 string qual;
13677
13678 if (type_is_floating_point(type) && flags.get(bit: DecorationNoContraction) && backend.support_precise_qualifier)
13679 qual = "precise ";
13680
13681 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
13682 bool type_supports_precision =
13683 type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
13684 type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
13685 type.basetype == SPIRType::Sampler;
13686
13687 if (!type_supports_precision)
13688 return qual;
13689
13690 if (options.es)
13691 {
13692 auto &execution = get_entry_point();
13693
13694 if (flags.get(bit: DecorationRelaxedPrecision))
13695 {
13696 bool implied_fmediump = type.basetype == SPIRType::Float &&
13697 options.fragment.default_float_precision == Options::Mediump &&
13698 execution.model == ExecutionModelFragment;
13699
13700 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
13701 options.fragment.default_int_precision == Options::Mediump &&
13702 execution.model == ExecutionModelFragment;
13703
13704 qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
13705 }
13706 else
13707 {
13708 bool implied_fhighp =
13709 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
13710 execution.model == ExecutionModelFragment) ||
13711 (execution.model != ExecutionModelFragment));
13712
13713 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
13714 ((options.fragment.default_int_precision == Options::Highp &&
13715 execution.model == ExecutionModelFragment) ||
13716 (execution.model != ExecutionModelFragment));
13717
13718 qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
13719 }
13720 }
13721 else if (backend.allow_precision_qualifiers)
13722 {
13723 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
13724 // The default is highp however, so only emit mediump in the rare case that a shader has these.
13725 if (flags.get(bit: DecorationRelaxedPrecision))
13726 qual += "mediump ";
13727 }
13728
13729 return qual;
13730}
13731
13732string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
13733{
13734 auto &type = expression_type(id);
13735 bool use_precision_qualifiers = backend.allow_precision_qualifiers;
13736 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
13737 {
13738 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
13739 auto &result_type = get<SPIRType>(id: type.image.type);
13740 if (result_type.width < 32)
13741 return "mediump ";
13742 }
13743 return flags_to_qualifiers_glsl(type, flags: ir.meta[id].decoration.decoration_flags);
13744}
13745
13746void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
13747{
13748 // Works around weird behavior in glslangValidator where
13749 // a patch out block is translated to just block members getting the decoration.
13750 // To make glslang not complain when we compile again, we have to transform this back to a case where
13751 // the variable itself has Patch decoration, and not members.
13752 auto &type = get<SPIRType>(id: var.basetype);
13753 if (has_decoration(id: type.self, decoration: DecorationBlock))
13754 {
13755 uint32_t member_count = uint32_t(type.member_types.size());
13756 for (uint32_t i = 0; i < member_count; i++)
13757 {
13758 if (has_member_decoration(id: type.self, index: i, decoration: DecorationPatch))
13759 {
13760 set_decoration(id: var.self, decoration: DecorationPatch);
13761 break;
13762 }
13763 }
13764
13765 if (has_decoration(id: var.self, decoration: DecorationPatch))
13766 for (uint32_t i = 0; i < member_count; i++)
13767 unset_member_decoration(id: type.self, index: i, decoration: DecorationPatch);
13768 }
13769}
13770
13771string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
13772{
13773 auto &flags = get_decoration_bitset(id);
13774 string res;
13775
13776 auto *var = maybe_get<SPIRVariable>(id);
13777
13778 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
13779 res += "shared ";
13780
13781 res += to_interpolation_qualifiers(flags);
13782 if (var)
13783 res += to_storage_qualifiers_glsl(var: *var);
13784
13785 auto &type = expression_type(id);
13786 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
13787 {
13788 if (flags.get(bit: DecorationCoherent))
13789 res += "coherent ";
13790 if (flags.get(bit: DecorationRestrict))
13791 res += "restrict ";
13792
13793 if (flags.get(bit: DecorationNonWritable))
13794 res += "readonly ";
13795
13796 bool formatted_load = type.image.format == ImageFormatUnknown;
13797 if (flags.get(bit: DecorationNonReadable))
13798 {
13799 res += "writeonly ";
13800 formatted_load = false;
13801 }
13802
13803 if (formatted_load)
13804 {
13805 if (!options.es)
13806 require_extension_internal(ext: "GL_EXT_shader_image_load_formatted");
13807 else
13808 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
13809 }
13810 }
13811
13812 res += to_precision_qualifiers_glsl(id);
13813
13814 return res;
13815}
13816
13817string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
13818{
13819 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
13820 auto &type = expression_type(id: arg.id);
13821 const char *direction = "";
13822
13823 if (type.pointer)
13824 {
13825 if (arg.write_count && arg.read_count)
13826 direction = "inout ";
13827 else if (arg.write_count)
13828 direction = "out ";
13829 }
13830
13831 return join(ts&: direction, ts: to_qualifiers_glsl(id: arg.id), ts: variable_decl(type, name: to_name(id: arg.id), id: arg.id));
13832}
13833
13834string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
13835{
13836 return to_unpacked_expression(id: var.initializer);
13837}
13838
13839string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
13840{
13841#ifndef NDEBUG
13842 auto &type = get<SPIRType>(id: type_id);
13843 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
13844 type.storage == StorageClassGeneric);
13845#endif
13846 uint32_t id = ir.increase_bound_by(count: 1);
13847 ir.make_constant_null(id, type: type_id, add_to_typed_id_set: false);
13848 return constant_expression(c: get<SPIRConstant>(id));
13849}
13850
13851bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
13852{
13853 if (type.pointer)
13854 return false;
13855
13856 if (!type.array.empty() && options.flatten_multidimensional_arrays)
13857 return false;
13858
13859 for (auto &literal : type.array_size_literal)
13860 if (!literal)
13861 return false;
13862
13863 for (auto &memb : type.member_types)
13864 if (!type_can_zero_initialize(type: get<SPIRType>(id: memb)))
13865 return false;
13866
13867 return true;
13868}
13869
13870string CompilerGLSL::variable_decl(const SPIRVariable &variable)
13871{
13872 // Ignore the pointer type since GLSL doesn't have pointers.
13873 auto &type = get_variable_data_type(var: variable);
13874
13875 if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
13876 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
13877
13878 auto res = join(ts: to_qualifiers_glsl(id: variable.self), ts: variable_decl(type, name: to_name(id: variable.self), id: variable.self));
13879
13880 if (variable.loop_variable && variable.static_expression)
13881 {
13882 uint32_t expr = variable.static_expression;
13883 if (ir.ids[expr].get_type() != TypeUndef)
13884 res += join(ts: " = ", ts: to_unpacked_expression(id: variable.static_expression));
13885 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13886 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
13887 }
13888 else if (variable.initializer && !variable_decl_is_remapped_storage(var: variable, storage: StorageClassWorkgroup))
13889 {
13890 uint32_t expr = variable.initializer;
13891 if (ir.ids[expr].get_type() != TypeUndef)
13892 res += join(ts: " = ", ts: to_initializer_expression(var: variable));
13893 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13894 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
13895 }
13896
13897 return res;
13898}
13899
13900const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
13901{
13902 auto &flags = get_decoration_bitset(id: variable.self);
13903 if (flags.get(bit: DecorationRelaxedPrecision))
13904 return "mediump ";
13905 else
13906 return "highp ";
13907}
13908
13909string CompilerGLSL::pls_decl(const PlsRemap &var)
13910{
13911 auto &variable = get<SPIRVariable>(id: var.id);
13912
13913 SPIRType type;
13914 type.vecsize = pls_format_to_components(format: var.format);
13915 type.basetype = pls_format_to_basetype(format: var.format);
13916
13917 return join(ts: to_pls_layout(format: var.format), ts: to_pls_qualifiers_glsl(variable), ts: type_to_glsl(type), ts: " ",
13918 ts: to_name(id: variable.self));
13919}
13920
13921uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
13922{
13923 return to_array_size_literal(type, index: uint32_t(type.array.size() - 1));
13924}
13925
13926uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
13927{
13928 assert(type.array.size() == type.array_size_literal.size());
13929
13930 if (type.array_size_literal[index])
13931 {
13932 return type.array[index];
13933 }
13934 else
13935 {
13936 // Use the default spec constant value.
13937 // This is the best we can do.
13938 return evaluate_constant_u32(id: type.array[index]);
13939 }
13940}
13941
13942string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
13943{
13944 assert(type.array.size() == type.array_size_literal.size());
13945
13946 auto &size = type.array[index];
13947 if (!type.array_size_literal[index])
13948 return to_expression(id: size);
13949 else if (size)
13950 return convert_to_string(t: size);
13951 else if (!backend.unsized_array_supported)
13952 {
13953 // For runtime-sized arrays, we can work around
13954 // lack of standard support for this by simply having
13955 // a single element array.
13956 //
13957 // Runtime length arrays must always be the last element
13958 // in an interface block.
13959 return "1";
13960 }
13961 else
13962 return "";
13963}
13964
13965string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
13966{
13967 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13968 {
13969 // We are using a wrapped pointer type, and we should not emit any array declarations here.
13970 return "";
13971 }
13972
13973 if (type.array.empty())
13974 return "";
13975
13976 if (options.flatten_multidimensional_arrays)
13977 {
13978 string res;
13979 res += "[";
13980 for (auto i = uint32_t(type.array.size()); i; i--)
13981 {
13982 res += enclose_expression(expr: to_array_size(type, index: i - 1));
13983 if (i > 1)
13984 res += " * ";
13985 }
13986 res += "]";
13987 return res;
13988 }
13989 else
13990 {
13991 if (type.array.size() > 1)
13992 {
13993 if (!options.es && options.version < 430)
13994 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
13995 else if (options.es && options.version < 310)
13996 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
13997 "Try using --flatten-multidimensional-arrays or set "
13998 "options.flatten_multidimensional_arrays to true.");
13999 }
14000
14001 string res;
14002 for (auto i = uint32_t(type.array.size()); i; i--)
14003 {
14004 res += "[";
14005 res += to_array_size(type, index: i - 1);
14006 res += "]";
14007 }
14008 return res;
14009 }
14010}
14011
14012string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
14013{
14014 auto &imagetype = get<SPIRType>(id: type.image.type);
14015 string res;
14016
14017 switch (imagetype.basetype)
14018 {
14019 case SPIRType::Int:
14020 case SPIRType::Short:
14021 case SPIRType::SByte:
14022 res = "i";
14023 break;
14024 case SPIRType::UInt:
14025 case SPIRType::UShort:
14026 case SPIRType::UByte:
14027 res = "u";
14028 break;
14029 default:
14030 break;
14031 }
14032
14033 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
14034 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
14035
14036 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
14037 return res + "subpassInput" + (type.image.ms ? "MS" : "");
14038 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
14039 subpass_input_is_framebuffer_fetch(id))
14040 {
14041 SPIRType sampled_type = get<SPIRType>(id: type.image.type);
14042 sampled_type.vecsize = 4;
14043 return type_to_glsl(type: sampled_type);
14044 }
14045
14046 // If we're emulating subpassInput with samplers, force sampler2D
14047 // so we don't have to specify format.
14048 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
14049 {
14050 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
14051 if (type.image.dim == DimBuffer && type.image.sampled == 1)
14052 res += "sampler";
14053 else
14054 res += type.image.sampled == 2 ? "image" : "texture";
14055 }
14056 else
14057 res += "sampler";
14058
14059 switch (type.image.dim)
14060 {
14061 case Dim1D:
14062 // ES doesn't support 1D. Fake it with 2D.
14063 res += options.es ? "2D" : "1D";
14064 break;
14065 case Dim2D:
14066 res += "2D";
14067 break;
14068 case Dim3D:
14069 res += "3D";
14070 break;
14071 case DimCube:
14072 res += "Cube";
14073 break;
14074 case DimRect:
14075 if (options.es)
14076 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
14077
14078 if (is_legacy_desktop())
14079 require_extension_internal(ext: "GL_ARB_texture_rectangle");
14080
14081 res += "2DRect";
14082 break;
14083
14084 case DimBuffer:
14085 if (options.es && options.version < 320)
14086 require_extension_internal(ext: "GL_EXT_texture_buffer");
14087 else if (!options.es && options.version < 300)
14088 require_extension_internal(ext: "GL_EXT_texture_buffer_object");
14089 res += "Buffer";
14090 break;
14091
14092 case DimSubpassData:
14093 res += "2D";
14094 break;
14095 default:
14096 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
14097 }
14098
14099 if (type.image.ms)
14100 res += "MS";
14101 if (type.image.arrayed)
14102 {
14103 if (is_legacy_desktop())
14104 require_extension_internal(ext: "GL_EXT_texture_array");
14105 res += "Array";
14106 }
14107
14108 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
14109 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
14110 is_depth_image(type, id))
14111 {
14112 res += "Shadow";
14113 }
14114
14115 return res;
14116}
14117
14118string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
14119{
14120 if (backend.use_array_constructor && type.array.size() > 1)
14121 {
14122 if (options.flatten_multidimensional_arrays)
14123 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
14124 "e.g. float[][]().");
14125 else if (!options.es && options.version < 430)
14126 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
14127 else if (options.es && options.version < 310)
14128 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
14129 }
14130
14131 auto e = type_to_glsl(type);
14132 if (backend.use_array_constructor)
14133 {
14134 for (uint32_t i = 0; i < type.array.size(); i++)
14135 e += "[]";
14136 }
14137 return e;
14138}
14139
14140// The optional id parameter indicates the object whose type we are trying
14141// to find the description for. It is optional. Most type descriptions do not
14142// depend on a specific object's use of that type.
14143string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
14144{
14145 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
14146 {
14147 // Need to create a magic type name which compacts the entire type information.
14148 string name = type_to_glsl(type: get_pointee_type(type));
14149 for (size_t i = 0; i < type.array.size(); i++)
14150 {
14151 if (type.array_size_literal[i])
14152 name += join(ts: type.array[i], ts: "_");
14153 else
14154 name += join(ts: "id", ts: type.array[i], ts: "_");
14155 }
14156 name += "Pointer";
14157 return name;
14158 }
14159
14160 switch (type.basetype)
14161 {
14162 case SPIRType::Struct:
14163 // Need OpName lookup here to get a "sensible" name for a struct.
14164 if (backend.explicit_struct_type)
14165 return join(ts: "struct ", ts: to_name(id: type.self));
14166 else
14167 return to_name(id: type.self);
14168
14169 case SPIRType::Image:
14170 case SPIRType::SampledImage:
14171 return image_type_glsl(type, id);
14172
14173 case SPIRType::Sampler:
14174 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
14175 // this distinction into the type system.
14176 return comparison_ids.count(x: id) ? "samplerShadow" : "sampler";
14177
14178 case SPIRType::AccelerationStructure:
14179 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
14180
14181 case SPIRType::RayQuery:
14182 return "rayQueryEXT";
14183
14184 case SPIRType::Void:
14185 return "void";
14186
14187 default:
14188 break;
14189 }
14190
14191 if (type.basetype == SPIRType::UInt && is_legacy())
14192 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
14193
14194 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
14195 {
14196 switch (type.basetype)
14197 {
14198 case SPIRType::Boolean:
14199 return "bool";
14200 case SPIRType::SByte:
14201 return backend.basic_int8_type;
14202 case SPIRType::UByte:
14203 return backend.basic_uint8_type;
14204 case SPIRType::Short:
14205 return backend.basic_int16_type;
14206 case SPIRType::UShort:
14207 return backend.basic_uint16_type;
14208 case SPIRType::Int:
14209 return backend.basic_int_type;
14210 case SPIRType::UInt:
14211 return backend.basic_uint_type;
14212 case SPIRType::AtomicCounter:
14213 return "atomic_uint";
14214 case SPIRType::Half:
14215 return "float16_t";
14216 case SPIRType::Float:
14217 return "float";
14218 case SPIRType::Double:
14219 return "double";
14220 case SPIRType::Int64:
14221 return "int64_t";
14222 case SPIRType::UInt64:
14223 return "uint64_t";
14224 default:
14225 return "???";
14226 }
14227 }
14228 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
14229 {
14230 switch (type.basetype)
14231 {
14232 case SPIRType::Boolean:
14233 return join(ts: "bvec", ts: type.vecsize);
14234 case SPIRType::SByte:
14235 return join(ts: "i8vec", ts: type.vecsize);
14236 case SPIRType::UByte:
14237 return join(ts: "u8vec", ts: type.vecsize);
14238 case SPIRType::Short:
14239 return join(ts: "i16vec", ts: type.vecsize);
14240 case SPIRType::UShort:
14241 return join(ts: "u16vec", ts: type.vecsize);
14242 case SPIRType::Int:
14243 return join(ts: "ivec", ts: type.vecsize);
14244 case SPIRType::UInt:
14245 return join(ts: "uvec", ts: type.vecsize);
14246 case SPIRType::Half:
14247 return join(ts: "f16vec", ts: type.vecsize);
14248 case SPIRType::Float:
14249 return join(ts: "vec", ts: type.vecsize);
14250 case SPIRType::Double:
14251 return join(ts: "dvec", ts: type.vecsize);
14252 case SPIRType::Int64:
14253 return join(ts: "i64vec", ts: type.vecsize);
14254 case SPIRType::UInt64:
14255 return join(ts: "u64vec", ts: type.vecsize);
14256 default:
14257 return "???";
14258 }
14259 }
14260 else if (type.vecsize == type.columns) // Simple Matrix builtin
14261 {
14262 switch (type.basetype)
14263 {
14264 case SPIRType::Boolean:
14265 return join(ts: "bmat", ts: type.vecsize);
14266 case SPIRType::Int:
14267 return join(ts: "imat", ts: type.vecsize);
14268 case SPIRType::UInt:
14269 return join(ts: "umat", ts: type.vecsize);
14270 case SPIRType::Half:
14271 return join(ts: "f16mat", ts: type.vecsize);
14272 case SPIRType::Float:
14273 return join(ts: "mat", ts: type.vecsize);
14274 case SPIRType::Double:
14275 return join(ts: "dmat", ts: type.vecsize);
14276 // Matrix types not supported for int64/uint64.
14277 default:
14278 return "???";
14279 }
14280 }
14281 else
14282 {
14283 switch (type.basetype)
14284 {
14285 case SPIRType::Boolean:
14286 return join(ts: "bmat", ts: type.columns, ts: "x", ts: type.vecsize);
14287 case SPIRType::Int:
14288 return join(ts: "imat", ts: type.columns, ts: "x", ts: type.vecsize);
14289 case SPIRType::UInt:
14290 return join(ts: "umat", ts: type.columns, ts: "x", ts: type.vecsize);
14291 case SPIRType::Half:
14292 return join(ts: "f16mat", ts: type.columns, ts: "x", ts: type.vecsize);
14293 case SPIRType::Float:
14294 return join(ts: "mat", ts: type.columns, ts: "x", ts: type.vecsize);
14295 case SPIRType::Double:
14296 return join(ts: "dmat", ts: type.columns, ts: "x", ts: type.vecsize);
14297 // Matrix types not supported for int64/uint64.
14298 default:
14299 return "???";
14300 }
14301 }
14302}
14303
14304void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
14305 const unordered_set<string> &variables_secondary, string &name)
14306{
14307 if (name.empty())
14308 return;
14309
14310 ParsedIR::sanitize_underscores(str&: name);
14311 if (ParsedIR::is_globally_reserved_identifier(str&: name, allow_reserved_prefixes: true))
14312 {
14313 name.clear();
14314 return;
14315 }
14316
14317 update_name_cache(cache_primary&: variables_primary, cache_secondary: variables_secondary, name);
14318}
14319
14320void CompilerGLSL::add_local_variable_name(uint32_t id)
14321{
14322 add_variable(variables_primary&: local_variable_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
14323}
14324
14325void CompilerGLSL::add_resource_name(uint32_t id)
14326{
14327 add_variable(variables_primary&: resource_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
14328}
14329
14330void CompilerGLSL::add_header_line(const std::string &line)
14331{
14332 header_lines.push_back(t: line);
14333}
14334
14335bool CompilerGLSL::has_extension(const std::string &ext) const
14336{
14337 auto itr = find(first: begin(cont: forced_extensions), last: end(cont: forced_extensions), val: ext);
14338 return itr != end(cont: forced_extensions);
14339}
14340
14341void CompilerGLSL::require_extension(const std::string &ext)
14342{
14343 if (!has_extension(ext))
14344 forced_extensions.push_back(t: ext);
14345}
14346
14347void CompilerGLSL::require_extension_internal(const string &ext)
14348{
14349 if (backend.supports_extensions && !has_extension(ext))
14350 {
14351 forced_extensions.push_back(t: ext);
14352 force_recompile();
14353 }
14354}
14355
14356void CompilerGLSL::flatten_buffer_block(VariableID id)
14357{
14358 auto &var = get<SPIRVariable>(id);
14359 auto &type = get<SPIRType>(id: var.basetype);
14360 auto name = to_name(id: type.self, allow_alias: false);
14361 auto &flags = get_decoration_bitset(id: type.self);
14362
14363 if (!type.array.empty())
14364 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
14365 if (type.basetype != SPIRType::Struct)
14366 SPIRV_CROSS_THROW(name + " is not a struct.");
14367 if (!flags.get(bit: DecorationBlock))
14368 SPIRV_CROSS_THROW(name + " is not a block.");
14369 if (type.member_types.empty())
14370 SPIRV_CROSS_THROW(name + " is an empty struct.");
14371
14372 flattened_buffer_blocks.insert(x: id);
14373}
14374
14375bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
14376{
14377 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
14378}
14379
14380bool CompilerGLSL::check_atomic_image(uint32_t id)
14381{
14382 auto &type = expression_type(id);
14383 if (type.storage == StorageClassImage)
14384 {
14385 if (options.es && options.version < 320)
14386 require_extension_internal(ext: "GL_OES_shader_image_atomic");
14387
14388 auto *var = maybe_get_backing_variable(chain: id);
14389 if (var)
14390 {
14391 if (has_decoration(id: var->self, decoration: DecorationNonWritable) || has_decoration(id: var->self, decoration: DecorationNonReadable))
14392 {
14393 unset_decoration(id: var->self, decoration: DecorationNonWritable);
14394 unset_decoration(id: var->self, decoration: DecorationNonReadable);
14395 force_recompile();
14396 }
14397 }
14398 return true;
14399 }
14400 else
14401 return false;
14402}
14403
14404void CompilerGLSL::add_function_overload(const SPIRFunction &func)
14405{
14406 Hasher hasher;
14407 for (auto &arg : func.arguments)
14408 {
14409 // Parameters can vary with pointer type or not,
14410 // but that will not change the signature in GLSL/HLSL,
14411 // so strip the pointer type before hashing.
14412 uint32_t type_id = get_pointee_type_id(type_id: arg.type);
14413 auto &type = get<SPIRType>(id: type_id);
14414
14415 if (!combined_image_samplers.empty())
14416 {
14417 // If we have combined image samplers, we cannot really trust the image and sampler arguments
14418 // we pass down to callees, because they may be shuffled around.
14419 // Ignore these arguments, to make sure that functions need to differ in some other way
14420 // to be considered different overloads.
14421 if (type.basetype == SPIRType::SampledImage ||
14422 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
14423 {
14424 continue;
14425 }
14426 }
14427
14428 hasher.u32(value: type_id);
14429 }
14430 uint64_t types_hash = hasher.get();
14431
14432 auto function_name = to_name(id: func.self);
14433 auto itr = function_overloads.find(x: function_name);
14434 if (itr != end(cont&: function_overloads))
14435 {
14436 // There exists a function with this name already.
14437 auto &overloads = itr->second;
14438 if (overloads.count(x: types_hash) != 0)
14439 {
14440 // Overload conflict, assign a new name.
14441 add_resource_name(id: func.self);
14442 function_overloads[to_name(id: func.self)].insert(x: types_hash);
14443 }
14444 else
14445 {
14446 // Can reuse the name.
14447 overloads.insert(x: types_hash);
14448 }
14449 }
14450 else
14451 {
14452 // First time we see this function name.
14453 add_resource_name(id: func.self);
14454 function_overloads[to_name(id: func.self)].insert(x: types_hash);
14455 }
14456}
14457
14458void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
14459{
14460 if (func.self != ir.default_entry_point)
14461 add_function_overload(func);
14462
14463 // Avoid shadow declarations.
14464 local_variable_names = resource_names;
14465
14466 string decl;
14467
14468 auto &type = get<SPIRType>(id: func.return_type);
14469 decl += flags_to_qualifiers_glsl(type, flags: return_flags);
14470 decl += type_to_glsl(type);
14471 decl += type_to_array_glsl(type);
14472 decl += " ";
14473
14474 if (func.self == ir.default_entry_point)
14475 {
14476 // If we need complex fallback in GLSL, we just wrap main() in a function
14477 // and interlock the entire shader ...
14478 if (interlocked_is_complex)
14479 decl += "spvMainInterlockedBody";
14480 else
14481 decl += "main";
14482
14483 processing_entry_point = true;
14484 }
14485 else
14486 decl += to_name(id: func.self);
14487
14488 decl += "(";
14489 SmallVector<string> arglist;
14490 for (auto &arg : func.arguments)
14491 {
14492 // Do not pass in separate images or samplers if we're remapping
14493 // to combined image samplers.
14494 if (skip_argument(id: arg.id))
14495 continue;
14496
14497 // Might change the variable name if it already exists in this function.
14498 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
14499 // to use same name for variables.
14500 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
14501 add_local_variable_name(id: arg.id);
14502
14503 arglist.push_back(t: argument_decl(arg));
14504
14505 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
14506 auto *var = maybe_get<SPIRVariable>(id: arg.id);
14507 if (var)
14508 var->parameter = &arg;
14509 }
14510
14511 for (auto &arg : func.shadow_arguments)
14512 {
14513 // Might change the variable name if it already exists in this function.
14514 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
14515 // to use same name for variables.
14516 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
14517 add_local_variable_name(id: arg.id);
14518
14519 arglist.push_back(t: argument_decl(arg));
14520
14521 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
14522 auto *var = maybe_get<SPIRVariable>(id: arg.id);
14523 if (var)
14524 var->parameter = &arg;
14525 }
14526
14527 decl += merge(list: arglist);
14528 decl += ")";
14529 statement(ts&: decl);
14530}
14531
14532void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
14533{
14534 // Avoid potential cycles.
14535 if (func.active)
14536 return;
14537 func.active = true;
14538
14539 // If we depend on a function, emit that function before we emit our own function.
14540 for (auto block : func.blocks)
14541 {
14542 auto &b = get<SPIRBlock>(id: block);
14543 for (auto &i : b.ops)
14544 {
14545 auto ops = stream(instr: i);
14546 auto op = static_cast<Op>(i.op);
14547
14548 if (op == OpFunctionCall)
14549 {
14550 // Recursively emit functions which are called.
14551 uint32_t id = ops[2];
14552 emit_function(func&: get<SPIRFunction>(id), return_flags: ir.meta[ops[1]].decoration.decoration_flags);
14553 }
14554 }
14555 }
14556
14557 if (func.entry_line.file_id != 0)
14558 emit_line_directive(file_id: func.entry_line.file_id, line_literal: func.entry_line.line_literal);
14559 emit_function_prototype(func, return_flags);
14560 begin_scope();
14561
14562 if (func.self == ir.default_entry_point)
14563 emit_entry_point_declarations();
14564
14565 current_function = &func;
14566 auto &entry_block = get<SPIRBlock>(id: func.entry_block);
14567
14568 sort(first: begin(cont&: func.constant_arrays_needed_on_stack), last: end(cont&: func.constant_arrays_needed_on_stack));
14569 for (auto &array : func.constant_arrays_needed_on_stack)
14570 {
14571 auto &c = get<SPIRConstant>(id: array);
14572 auto &type = get<SPIRType>(id: c.constant_type);
14573 statement(ts: variable_decl(type, name: join(ts: "_", ts&: array, ts: "_array_copy")), ts: " = ", ts: constant_expression(c), ts: ";");
14574 }
14575
14576 for (auto &v : func.local_variables)
14577 {
14578 auto &var = get<SPIRVariable>(id: v);
14579 var.deferred_declaration = false;
14580
14581 if (variable_decl_is_remapped_storage(var, storage: StorageClassWorkgroup))
14582 {
14583 // Special variable type which cannot have initializer,
14584 // need to be declared as standalone variables.
14585 // Comes from MSL which can push global variables as local variables in main function.
14586 add_local_variable_name(id: var.self);
14587 statement(ts: variable_decl(variable: var), ts: ";");
14588 var.deferred_declaration = false;
14589 }
14590 else if (var.storage == StorageClassPrivate)
14591 {
14592 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
14593 // Comes from MSL which can push global variables as local variables in main function.
14594 // We could just declare them right now, but we would miss out on an important initialization case which is
14595 // LUT declaration in MSL.
14596 // If we don't declare the variable when it is assigned we're forced to go through a helper function
14597 // which copies elements one by one.
14598 add_local_variable_name(id: var.self);
14599
14600 if (var.initializer)
14601 {
14602 statement(ts: variable_decl(variable: var), ts: ";");
14603 var.deferred_declaration = false;
14604 }
14605 else
14606 {
14607 auto &dominated = entry_block.dominated_variables;
14608 if (find(first: begin(cont&: dominated), last: end(cont&: dominated), val: var.self) == end(cont&: dominated))
14609 entry_block.dominated_variables.push_back(t: var.self);
14610 var.deferred_declaration = true;
14611 }
14612 }
14613 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
14614 {
14615 // No need to declare this variable, it has a static expression.
14616 var.deferred_declaration = false;
14617 }
14618 else if (expression_is_lvalue(id: v))
14619 {
14620 add_local_variable_name(id: var.self);
14621
14622 // Loop variables should never be declared early, they are explicitly emitted in a loop.
14623 if (var.initializer && !var.loop_variable)
14624 statement(ts: variable_decl_function_local(var), ts: ";");
14625 else
14626 {
14627 // Don't declare variable until first use to declutter the GLSL output quite a lot.
14628 // If we don't touch the variable before first branch,
14629 // declare it then since we need variable declaration to be in top scope.
14630 var.deferred_declaration = true;
14631 }
14632 }
14633 else
14634 {
14635 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
14636 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
14637 // This means that when we OpStore to these variables, we just write in the expression ID directly.
14638 // This breaks any kind of branching, since the variable must be statically assigned.
14639 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
14640 var.statically_assigned = true;
14641 }
14642
14643 var.loop_variable_enable = false;
14644
14645 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
14646 if (var.loop_variable)
14647 {
14648 var.deferred_declaration = false;
14649 // Need to reset the static expression so we can fallback to initializer if need be.
14650 var.static_expression = 0;
14651 }
14652 }
14653
14654 // Enforce declaration order for regression testing purposes.
14655 for (auto &block_id : func.blocks)
14656 {
14657 auto &block = get<SPIRBlock>(id: block_id);
14658 sort(first: begin(cont&: block.dominated_variables), last: end(cont&: block.dominated_variables));
14659 }
14660
14661 for (auto &line : current_function->fixup_hooks_in)
14662 line();
14663
14664 emit_block_chain(block&: entry_block);
14665
14666 end_scope();
14667 processing_entry_point = false;
14668 statement(ts: "");
14669
14670 // Make sure deferred declaration state for local variables is cleared when we are done with function.
14671 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
14672 for (auto &v : func.local_variables)
14673 {
14674 auto &var = get<SPIRVariable>(id: v);
14675 var.deferred_declaration = false;
14676 }
14677}
14678
14679void CompilerGLSL::emit_fixup()
14680{
14681 if (is_vertex_like_shader())
14682 {
14683 if (options.vertex.fixup_clipspace)
14684 {
14685 const char *suffix = backend.float_literal_suffix ? "f" : "";
14686 statement(ts: "gl_Position.z = 2.0", ts&: suffix, ts: " * gl_Position.z - gl_Position.w;");
14687 }
14688
14689 if (options.vertex.flip_vert_y)
14690 statement(ts: "gl_Position.y = -gl_Position.y;");
14691 }
14692}
14693
14694void CompilerGLSL::flush_phi(BlockID from, BlockID to)
14695{
14696 auto &child = get<SPIRBlock>(id: to);
14697 if (child.ignore_phi_from_block == from)
14698 return;
14699
14700 unordered_set<uint32_t> temporary_phi_variables;
14701
14702 for (auto itr = begin(cont&: child.phi_variables); itr != end(cont&: child.phi_variables); ++itr)
14703 {
14704 auto &phi = *itr;
14705
14706 if (phi.parent == from)
14707 {
14708 auto &var = get<SPIRVariable>(id: phi.function_variable);
14709
14710 // A Phi variable might be a loop variable, so flush to static expression.
14711 if (var.loop_variable && !var.loop_variable_enable)
14712 var.static_expression = phi.local_variable;
14713 else
14714 {
14715 flush_variable_declaration(id: phi.function_variable);
14716
14717 // Check if we are going to write to a Phi variable that another statement will read from
14718 // as part of another Phi node in our target block.
14719 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
14720 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
14721 bool need_saved_temporary =
14722 find_if(first: itr + 1, last: end(cont&: child.phi_variables), pred: [&](const SPIRBlock::Phi &future_phi) -> bool {
14723 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
14724 }) != end(cont&: child.phi_variables);
14725
14726 if (need_saved_temporary)
14727 {
14728 // Need to make sure we declare the phi variable with a copy at the right scope.
14729 // We cannot safely declare a temporary here since we might be inside a continue block.
14730 if (!var.allocate_temporary_copy)
14731 {
14732 var.allocate_temporary_copy = true;
14733 force_recompile();
14734 }
14735 statement(ts: "_", ts&: phi.function_variable, ts: "_copy", ts: " = ", ts: to_name(id: phi.function_variable), ts: ";");
14736 temporary_phi_variables.insert(x: phi.function_variable);
14737 }
14738
14739 // This might be called in continue block, so make sure we
14740 // use this to emit ESSL 1.0 compliant increments/decrements.
14741 auto lhs = to_expression(id: phi.function_variable);
14742
14743 string rhs;
14744 if (temporary_phi_variables.count(x: phi.local_variable))
14745 rhs = join(ts: "_", ts&: phi.local_variable, ts: "_copy");
14746 else
14747 rhs = to_pointer_expression(id: phi.local_variable);
14748
14749 if (!optimize_read_modify_write(type: get<SPIRType>(id: var.basetype), lhs, rhs))
14750 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
14751 }
14752
14753 register_write(chain: phi.function_variable);
14754 }
14755 }
14756}
14757
14758void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
14759{
14760 auto &to_block = get<SPIRBlock>(id: to);
14761 if (from == to)
14762 return;
14763
14764 assert(is_continue(to));
14765 if (to_block.complex_continue)
14766 {
14767 // Just emit the whole block chain as is.
14768 auto usage_counts = expression_usage_counts;
14769
14770 emit_block_chain(block&: to_block);
14771
14772 // Expression usage counts are moot after returning from the continue block.
14773 expression_usage_counts = usage_counts;
14774 }
14775 else
14776 {
14777 auto &from_block = get<SPIRBlock>(id: from);
14778 bool outside_control_flow = false;
14779 uint32_t loop_dominator = 0;
14780
14781 // FIXME: Refactor this to not use the old loop_dominator tracking.
14782 if (from_block.merge_block)
14783 {
14784 // If we are a loop header, we don't set the loop dominator,
14785 // so just use "self" here.
14786 loop_dominator = from;
14787 }
14788 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14789 {
14790 loop_dominator = from_block.loop_dominator;
14791 }
14792
14793 if (loop_dominator != 0)
14794 {
14795 auto &cfg = get_cfg_for_current_function();
14796
14797 // For non-complex continue blocks, we implicitly branch to the continue block
14798 // by having the continue block be part of the loop header in for (; ; continue-block).
14799 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: loop_dominator, to: from);
14800 }
14801
14802 // Some simplification for for-loops. We always end up with a useless continue;
14803 // statement since we branch to a loop block.
14804 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
14805 // we can avoid writing out an explicit continue statement.
14806 // Similar optimization to return statements if we know we're outside flow control.
14807 if (!outside_control_flow)
14808 statement(ts: "continue;");
14809 }
14810}
14811
14812void CompilerGLSL::branch(BlockID from, BlockID to)
14813{
14814 flush_phi(from, to);
14815 flush_control_dependent_expressions(block: from);
14816
14817 bool to_is_continue = is_continue(next: to);
14818
14819 // This is only a continue if we branch to our loop dominator.
14820 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(id: from).loop_dominator == to)
14821 {
14822 // This can happen if we had a complex continue block which was emitted.
14823 // Once the continue block tries to branch to the loop header, just emit continue;
14824 // and end the chain here.
14825 statement(ts: "continue;");
14826 }
14827 else if (from != to && is_break(next: to))
14828 {
14829 // We cannot break to ourselves, so check explicitly for from != to.
14830 // This case can trigger if a loop header is all three of these things:
14831 // - Continue block
14832 // - Loop header
14833 // - Break merge target all at once ...
14834
14835 // Very dirty workaround.
14836 // Switch constructs are able to break, but they cannot break out of a loop at the same time.
14837 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
14838 // write to the ladder here, and defer the break.
14839 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
14840 if (current_emitting_switch && is_loop_break(next: to) &&
14841 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
14842 get<SPIRBlock>(id: current_emitting_switch->loop_dominator).merge_block == to)
14843 {
14844 if (!current_emitting_switch->need_ladder_break)
14845 {
14846 force_recompile();
14847 current_emitting_switch->need_ladder_break = true;
14848 }
14849
14850 statement(ts: "_", ts&: current_emitting_switch->self, ts: "_ladder_break = true;");
14851 }
14852 statement(ts: "break;");
14853 }
14854 else if (to_is_continue || from == to)
14855 {
14856 // For from == to case can happen for a do-while loop which branches into itself.
14857 // We don't mark these cases as continue blocks, but the only possible way to branch into
14858 // ourselves is through means of continue blocks.
14859
14860 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
14861 // We can branch to the continue block after we merge execution.
14862
14863 // Here we make use of structured control flow rules from spec:
14864 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
14865 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
14866 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
14867 auto &block_meta = ir.block_meta[to];
14868 bool branching_to_merge =
14869 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
14870 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
14871 if (!to_is_continue || !branching_to_merge)
14872 branch_to_continue(from, to);
14873 }
14874 else if (!is_conditional(next: to))
14875 emit_block_chain(block&: get<SPIRBlock>(id: to));
14876
14877 // It is important that we check for break before continue.
14878 // A block might serve two purposes, a break block for the inner scope, and
14879 // a continue block in the outer scope.
14880 // Inner scope always takes precedence.
14881}
14882
14883void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
14884{
14885 auto &from_block = get<SPIRBlock>(id: from);
14886 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
14887
14888 // If we branch directly to our selection merge target, we don't need a code path.
14889 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, to: true_block);
14890 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, to: false_block);
14891
14892 if (!true_block_needs_code && !false_block_needs_code)
14893 return;
14894
14895 // We might have a loop merge here. Only consider selection flattening constructs.
14896 // Loop hints are handled explicitly elsewhere.
14897 if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
14898 emit_block_hints(block: from_block);
14899
14900 if (true_block_needs_code)
14901 {
14902 statement(ts: "if (", ts: to_expression(id: cond), ts: ")");
14903 begin_scope();
14904 branch(from, to: true_block);
14905 end_scope();
14906
14907 if (false_block_needs_code)
14908 {
14909 statement(ts: "else");
14910 begin_scope();
14911 branch(from, to: false_block);
14912 end_scope();
14913 }
14914 }
14915 else if (false_block_needs_code)
14916 {
14917 // Only need false path, use negative conditional.
14918 statement(ts: "if (!", ts: to_enclosed_expression(id: cond), ts: ")");
14919 begin_scope();
14920 branch(from, to: false_block);
14921 end_scope();
14922 }
14923}
14924
14925// FIXME: This currently cannot handle complex continue blocks
14926// as in do-while.
14927// This should be seen as a "trivial" continue block.
14928string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
14929{
14930 auto *block = &get<SPIRBlock>(id: continue_block);
14931
14932 // While emitting the continue block, declare_temporary will check this
14933 // if we have to emit temporaries.
14934 current_continue_block = block;
14935
14936 SmallVector<string> statements;
14937
14938 // Capture all statements into our list.
14939 auto *old = redirect_statement;
14940 redirect_statement = &statements;
14941
14942 // Stamp out all blocks one after each other.
14943 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
14944 {
14945 // Write out all instructions we have in this block.
14946 emit_block_instructions(block&: *block);
14947
14948 // For plain branchless for/while continue blocks.
14949 if (block->next_block)
14950 {
14951 flush_phi(from: continue_block, to: block->next_block);
14952 block = &get<SPIRBlock>(id: block->next_block);
14953 }
14954 // For do while blocks. The last block will be a select block.
14955 else if (block->true_block && follow_true_block)
14956 {
14957 flush_phi(from: continue_block, to: block->true_block);
14958 block = &get<SPIRBlock>(id: block->true_block);
14959 }
14960 else if (block->false_block && follow_false_block)
14961 {
14962 flush_phi(from: continue_block, to: block->false_block);
14963 block = &get<SPIRBlock>(id: block->false_block);
14964 }
14965 else
14966 {
14967 SPIRV_CROSS_THROW("Invalid continue block detected!");
14968 }
14969 }
14970
14971 // Restore old pointer.
14972 redirect_statement = old;
14973
14974 // Somewhat ugly, strip off the last ';' since we use ',' instead.
14975 // Ideally, we should select this behavior in statement().
14976 for (auto &s : statements)
14977 {
14978 if (!s.empty() && s.back() == ';')
14979 s.erase(pos: s.size() - 1, n: 1);
14980 }
14981
14982 current_continue_block = nullptr;
14983 return merge(list: statements);
14984}
14985
14986void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
14987{
14988 // While loops do not take initializers, so declare all of them outside.
14989 for (auto &loop_var : block.loop_variables)
14990 {
14991 auto &var = get<SPIRVariable>(id: loop_var);
14992 statement(ts: variable_decl(variable: var), ts: ";");
14993 }
14994}
14995
14996string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
14997{
14998 if (block.loop_variables.empty())
14999 return "";
15000
15001 bool same_types = for_loop_initializers_are_same_type(block);
15002 // We can only declare for loop initializers if all variables are of same type.
15003 // If we cannot do this, declare individual variables before the loop header.
15004
15005 // We might have a loop variable candidate which was not assigned to for some reason.
15006 uint32_t missing_initializers = 0;
15007 for (auto &variable : block.loop_variables)
15008 {
15009 uint32_t expr = get<SPIRVariable>(id: variable).static_expression;
15010
15011 // Sometimes loop variables are initialized with OpUndef, but we can just declare
15012 // a plain variable without initializer in this case.
15013 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
15014 missing_initializers++;
15015 }
15016
15017 if (block.loop_variables.size() == 1 && missing_initializers == 0)
15018 {
15019 return variable_decl(variable: get<SPIRVariable>(id: block.loop_variables.front()));
15020 }
15021 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
15022 {
15023 for (auto &loop_var : block.loop_variables)
15024 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
15025 return "";
15026 }
15027 else
15028 {
15029 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
15030 // Separate the two streams.
15031 string expr;
15032
15033 for (auto &loop_var : block.loop_variables)
15034 {
15035 uint32_t static_expr = get<SPIRVariable>(id: loop_var).static_expression;
15036 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
15037 {
15038 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
15039 }
15040 else
15041 {
15042 auto &var = get<SPIRVariable>(id: loop_var);
15043 auto &type = get_variable_data_type(var);
15044 if (expr.empty())
15045 {
15046 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
15047 expr = join(ts: to_qualifiers_glsl(id: var.self), ts: type_to_glsl(type), ts: " ");
15048 }
15049 else
15050 {
15051 expr += ", ";
15052 // In MSL, being based on C++, the asterisk marking a pointer
15053 // binds to the identifier, not the type.
15054 if (type.pointer)
15055 expr += "* ";
15056 }
15057
15058 expr += join(ts: to_name(id: loop_var), ts: " = ", ts: to_pointer_expression(id: var.static_expression));
15059 }
15060 }
15061 return expr;
15062 }
15063}
15064
15065bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
15066{
15067 if (block.loop_variables.size() <= 1)
15068 return true;
15069
15070 uint32_t expected = 0;
15071 Bitset expected_flags;
15072 for (auto &var : block.loop_variables)
15073 {
15074 // Don't care about uninitialized variables as they will not be part of the initializers.
15075 uint32_t expr = get<SPIRVariable>(id: var).static_expression;
15076 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
15077 continue;
15078
15079 if (expected == 0)
15080 {
15081 expected = get<SPIRVariable>(id: var).basetype;
15082 expected_flags = get_decoration_bitset(id: var);
15083 }
15084 else if (expected != get<SPIRVariable>(id: var).basetype)
15085 return false;
15086
15087 // Precision flags and things like that must also match.
15088 if (expected_flags != get_decoration_bitset(id: var))
15089 return false;
15090 }
15091
15092 return true;
15093}
15094
15095bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
15096{
15097 SPIRBlock::ContinueBlockType continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
15098
15099 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
15100 {
15101 uint32_t current_count = statement_count;
15102 // If we're trying to create a true for loop,
15103 // we need to make sure that all opcodes before branch statement do not actually emit any code.
15104 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
15105 emit_block_instructions(block);
15106
15107 bool condition_is_temporary = forced_temporaries.find(x: block.condition) == end(cont&: forced_temporaries);
15108
15109 // This can work! We only did trivial things which could be forwarded in block body!
15110 if (current_count == statement_count && condition_is_temporary)
15111 {
15112 switch (continue_type)
15113 {
15114 case SPIRBlock::ForLoop:
15115 {
15116 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
15117 flush_undeclared_variables(block);
15118
15119 // Important that we do this in this order because
15120 // emitting the continue block can invalidate the condition expression.
15121 auto initializer = emit_for_loop_initializers(block);
15122 auto condition = to_expression(id: block.condition);
15123
15124 // Condition might have to be inverted.
15125 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15126 condition = join(ts: "!", ts: enclose_expression(expr: condition));
15127
15128 emit_block_hints(block);
15129 if (method != SPIRBlock::MergeToSelectContinueForLoop)
15130 {
15131 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
15132 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
15133 }
15134 else
15135 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; )");
15136 break;
15137 }
15138
15139 case SPIRBlock::WhileLoop:
15140 {
15141 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
15142 flush_undeclared_variables(block);
15143 emit_while_loop_initializers(block);
15144 emit_block_hints(block);
15145
15146 auto condition = to_expression(id: block.condition);
15147 // Condition might have to be inverted.
15148 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15149 condition = join(ts: "!", ts: enclose_expression(expr: condition));
15150
15151 statement(ts: "while (", ts&: condition, ts: ")");
15152 break;
15153 }
15154
15155 default:
15156 block.disable_block_optimization = true;
15157 force_recompile();
15158 begin_scope(); // We'll see an end_scope() later.
15159 return false;
15160 }
15161
15162 begin_scope();
15163 return true;
15164 }
15165 else
15166 {
15167 block.disable_block_optimization = true;
15168 force_recompile();
15169 begin_scope(); // We'll see an end_scope() later.
15170 return false;
15171 }
15172 }
15173 else if (method == SPIRBlock::MergeToDirectForLoop)
15174 {
15175 auto &child = get<SPIRBlock>(id: block.next_block);
15176
15177 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
15178 flush_undeclared_variables(block&: child);
15179
15180 uint32_t current_count = statement_count;
15181
15182 // If we're trying to create a true for loop,
15183 // we need to make sure that all opcodes before branch statement do not actually emit any code.
15184 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
15185 emit_block_instructions(block&: child);
15186
15187 bool condition_is_temporary = forced_temporaries.find(x: child.condition) == end(cont&: forced_temporaries);
15188
15189 if (current_count == statement_count && condition_is_temporary)
15190 {
15191 uint32_t target_block = child.true_block;
15192
15193 switch (continue_type)
15194 {
15195 case SPIRBlock::ForLoop:
15196 {
15197 // Important that we do this in this order because
15198 // emitting the continue block can invalidate the condition expression.
15199 auto initializer = emit_for_loop_initializers(block);
15200 auto condition = to_expression(id: child.condition);
15201
15202 // Condition might have to be inverted.
15203 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15204 {
15205 condition = join(ts: "!", ts: enclose_expression(expr: condition));
15206 target_block = child.false_block;
15207 }
15208
15209 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
15210 emit_block_hints(block);
15211 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
15212 break;
15213 }
15214
15215 case SPIRBlock::WhileLoop:
15216 {
15217 emit_while_loop_initializers(block);
15218 emit_block_hints(block);
15219
15220 auto condition = to_expression(id: child.condition);
15221 // Condition might have to be inverted.
15222 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15223 {
15224 condition = join(ts: "!", ts: enclose_expression(expr: condition));
15225 target_block = child.false_block;
15226 }
15227
15228 statement(ts: "while (", ts&: condition, ts: ")");
15229 break;
15230 }
15231
15232 default:
15233 block.disable_block_optimization = true;
15234 force_recompile();
15235 begin_scope(); // We'll see an end_scope() later.
15236 return false;
15237 }
15238
15239 begin_scope();
15240 branch(from: child.self, to: target_block);
15241 return true;
15242 }
15243 else
15244 {
15245 block.disable_block_optimization = true;
15246 force_recompile();
15247 begin_scope(); // We'll see an end_scope() later.
15248 return false;
15249 }
15250 }
15251 else
15252 return false;
15253}
15254
15255void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
15256{
15257 for (auto &v : block.dominated_variables)
15258 flush_variable_declaration(id: v);
15259}
15260
15261void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
15262{
15263 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
15264 // Need to sort these to ensure that reference output is stable.
15265 sort(first: begin(cont&: temporaries), last: end(cont&: temporaries),
15266 comp: [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
15267
15268 for (auto &tmp : temporaries)
15269 {
15270 auto &type = get<SPIRType>(id: tmp.first);
15271
15272 // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
15273 // This should be ignored unless we're doing actual variable pointers and backend supports it.
15274 // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
15275 if (type.pointer && !backend.native_pointers)
15276 continue;
15277
15278 add_local_variable_name(id: tmp.second);
15279 auto &flags = get_decoration_bitset(id: tmp.second);
15280
15281 // Not all targets support pointer literals, so don't bother with that case.
15282 string initializer;
15283 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
15284 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: tmp.first));
15285
15286 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: tmp.second)), ts&: initializer, ts: ";");
15287
15288 hoisted_temporaries.insert(x: tmp.second);
15289 forced_temporaries.insert(x: tmp.second);
15290
15291 // The temporary might be read from before it's assigned, set up the expression now.
15292 set<SPIRExpression>(id: tmp.second, args: to_name(id: tmp.second), args&: tmp.first, args: true);
15293
15294 // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
15295 // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
15296 auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: tmp.second);
15297 if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
15298 {
15299 uint32_t mirror_id = mirrored_precision_itr->second;
15300 auto &mirror_flags = get_decoration_bitset(id: mirror_id);
15301 statement(ts: flags_to_qualifiers_glsl(type, flags: mirror_flags),
15302 ts: variable_decl(type, name: to_name(id: mirror_id)),
15303 ts&: initializer, ts: ";");
15304 // The temporary might be read from before it's assigned, set up the expression now.
15305 set<SPIRExpression>(id: mirror_id, args: to_name(id: mirror_id), args&: tmp.first, args: true);
15306 hoisted_temporaries.insert(x: mirror_id);
15307 }
15308 }
15309}
15310
15311void CompilerGLSL::emit_block_chain(SPIRBlock &block)
15312{
15313 bool select_branch_to_true_block = false;
15314 bool select_branch_to_false_block = false;
15315 bool skip_direct_branch = false;
15316 bool emitted_loop_header_variables = false;
15317 bool force_complex_continue_block = false;
15318 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
15319
15320 if (block.merge == SPIRBlock::MergeLoop)
15321 add_loop_level();
15322
15323 emit_hoisted_temporaries(temporaries&: block.declare_temporary);
15324
15325 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
15326 if (block.continue_block)
15327 {
15328 continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
15329 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
15330 if (continue_type == SPIRBlock::ComplexLoop)
15331 block.complex_continue = true;
15332 }
15333
15334 // If we have loop variables, stop masking out access to the variable now.
15335 for (auto var_id : block.loop_variables)
15336 {
15337 auto &var = get<SPIRVariable>(id: var_id);
15338 var.loop_variable_enable = true;
15339 // We're not going to declare the variable directly, so emit a copy here.
15340 emit_variable_temporary_copies(var);
15341 }
15342
15343 // Remember deferred declaration state. We will restore it before returning.
15344 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
15345 for (size_t i = 0; i < block.dominated_variables.size(); i++)
15346 {
15347 uint32_t var_id = block.dominated_variables[i];
15348 auto &var = get<SPIRVariable>(id: var_id);
15349 rearm_dominated_variables[i] = var.deferred_declaration;
15350 }
15351
15352 // This is the method often used by spirv-opt to implement loops.
15353 // The loop header goes straight into the continue block.
15354 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
15355 // it *MUST* be used in the continue block. This loop method will not work.
15356 if (!is_legacy_es() && block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectContinueForLoop))
15357 {
15358 flush_undeclared_variables(block);
15359 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectContinueForLoop))
15360 {
15361 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15362 select_branch_to_false_block = true;
15363 else
15364 select_branch_to_true_block = true;
15365
15366 emitted_loop_header_variables = true;
15367 force_complex_continue_block = true;
15368 }
15369 }
15370 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
15371 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectForLoop))
15372 {
15373 flush_undeclared_variables(block);
15374 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectForLoop))
15375 {
15376 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
15377 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
15378 select_branch_to_false_block = true;
15379 else
15380 select_branch_to_true_block = true;
15381
15382 emitted_loop_header_variables = true;
15383 }
15384 }
15385 // This is the newer loop behavior in glslang which branches from Loop header directly to
15386 // a new block, which in turn has a OpBranchSelection without a selection merge.
15387 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToDirectForLoop))
15388 {
15389 flush_undeclared_variables(block);
15390 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToDirectForLoop))
15391 {
15392 skip_direct_branch = true;
15393 emitted_loop_header_variables = true;
15394 }
15395 }
15396 else if (continue_type == SPIRBlock::DoWhileLoop)
15397 {
15398 flush_undeclared_variables(block);
15399 emit_while_loop_initializers(block);
15400 emitted_loop_header_variables = true;
15401 // We have some temporaries where the loop header is the dominator.
15402 // We risk a case where we have code like:
15403 // for (;;) { create-temporary; break; } consume-temporary;
15404 // so force-declare temporaries here.
15405 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
15406 statement(ts: "do");
15407 begin_scope();
15408
15409 emit_block_instructions(block);
15410 }
15411 else if (block.merge == SPIRBlock::MergeLoop)
15412 {
15413 flush_undeclared_variables(block);
15414 emit_while_loop_initializers(block);
15415 emitted_loop_header_variables = true;
15416
15417 // We have a generic loop without any distinguishable pattern like for, while or do while.
15418 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
15419 continue_type = SPIRBlock::ComplexLoop;
15420
15421 // We have some temporaries where the loop header is the dominator.
15422 // We risk a case where we have code like:
15423 // for (;;) { create-temporary; break; } consume-temporary;
15424 // so force-declare temporaries here.
15425 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
15426 emit_block_hints(block);
15427 statement(ts: "for (;;)");
15428 begin_scope();
15429
15430 emit_block_instructions(block);
15431 }
15432 else
15433 {
15434 emit_block_instructions(block);
15435 }
15436
15437 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
15438 // as writes to said loop variables might have been masked out, we need a recompile.
15439 if (!emitted_loop_header_variables && !block.loop_variables.empty())
15440 {
15441 force_recompile_guarantee_forward_progress();
15442 for (auto var : block.loop_variables)
15443 get<SPIRVariable>(id: var).loop_variable = false;
15444 block.loop_variables.clear();
15445 }
15446
15447 flush_undeclared_variables(block);
15448 bool emit_next_block = true;
15449
15450 // Handle end of block.
15451 switch (block.terminator)
15452 {
15453 case SPIRBlock::Direct:
15454 // True when emitting complex continue block.
15455 if (block.loop_dominator == block.next_block)
15456 {
15457 branch(from: block.self, to: block.next_block);
15458 emit_next_block = false;
15459 }
15460 // True if MergeToDirectForLoop succeeded.
15461 else if (skip_direct_branch)
15462 emit_next_block = false;
15463 else if (is_continue(next: block.next_block) || is_break(next: block.next_block) || is_conditional(next: block.next_block))
15464 {
15465 branch(from: block.self, to: block.next_block);
15466 emit_next_block = false;
15467 }
15468 break;
15469
15470 case SPIRBlock::Select:
15471 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
15472 if (select_branch_to_true_block)
15473 {
15474 if (force_complex_continue_block)
15475 {
15476 assert(block.true_block == block.continue_block);
15477
15478 // We're going to emit a continue block directly here, so make sure it's marked as complex.
15479 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
15480 bool old_complex = complex_continue;
15481 complex_continue = true;
15482 branch(from: block.self, to: block.true_block);
15483 complex_continue = old_complex;
15484 }
15485 else
15486 branch(from: block.self, to: block.true_block);
15487 }
15488 else if (select_branch_to_false_block)
15489 {
15490 if (force_complex_continue_block)
15491 {
15492 assert(block.false_block == block.continue_block);
15493
15494 // We're going to emit a continue block directly here, so make sure it's marked as complex.
15495 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
15496 bool old_complex = complex_continue;
15497 complex_continue = true;
15498 branch(from: block.self, to: block.false_block);
15499 complex_continue = old_complex;
15500 }
15501 else
15502 branch(from: block.self, to: block.false_block);
15503 }
15504 else
15505 branch(from: block.self, cond: block.condition, true_block: block.true_block, false_block: block.false_block);
15506 break;
15507
15508 case SPIRBlock::MultiSelect:
15509 {
15510 auto &type = expression_type(id: block.condition);
15511 bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
15512 type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
15513
15514 if (block.merge == SPIRBlock::MergeNone)
15515 SPIRV_CROSS_THROW("Switch statement is not structured");
15516
15517 if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
15518 {
15519 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
15520 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
15521 }
15522
15523 const char *label_suffix = "";
15524 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
15525 label_suffix = "u";
15526 else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
15527 label_suffix = "l";
15528 else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
15529 label_suffix = "ul";
15530 else if (type.basetype == SPIRType::UShort)
15531 label_suffix = backend.uint16_t_literal_suffix;
15532 else if (type.basetype == SPIRType::Short)
15533 label_suffix = backend.int16_t_literal_suffix;
15534
15535 SPIRBlock *old_emitting_switch = current_emitting_switch;
15536 current_emitting_switch = &block;
15537
15538 if (block.need_ladder_break)
15539 statement(ts: "bool _", ts&: block.self, ts: "_ladder_break = false;");
15540
15541 // Find all unique case constructs.
15542 unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
15543 SmallVector<uint32_t> block_declaration_order;
15544 SmallVector<uint64_t> literals_to_merge;
15545
15546 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
15547 // and let the default: block handle it.
15548 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
15549 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
15550 auto &cases = get_case_list(block);
15551 for (auto &c : cases)
15552 {
15553 if (c.block != block.next_block && c.block != block.default_block)
15554 {
15555 if (!case_constructs.count(x: c.block))
15556 block_declaration_order.push_back(t: c.block);
15557 case_constructs[c.block].push_back(t: c.value);
15558 }
15559 else if (c.block == block.next_block && block.default_block != block.next_block)
15560 {
15561 // We might have to flush phi inside specific case labels.
15562 // If we can piggyback on default:, do so instead.
15563 literals_to_merge.push_back(t: c.value);
15564 }
15565 }
15566
15567 // Empty literal array -> default.
15568 if (block.default_block != block.next_block)
15569 {
15570 auto &default_block = get<SPIRBlock>(id: block.default_block);
15571
15572 // We need to slide in the default block somewhere in this chain
15573 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
15574 // Only consider trivial fall-through cases here.
15575 size_t num_blocks = block_declaration_order.size();
15576 bool injected_block = false;
15577
15578 for (size_t i = 0; i < num_blocks; i++)
15579 {
15580 auto &case_block = get<SPIRBlock>(id: block_declaration_order[i]);
15581 if (execution_is_direct_branch(from: case_block, to: default_block))
15582 {
15583 // Fallthrough to default block, we must inject the default block here.
15584 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i + 1, value: block.default_block);
15585 injected_block = true;
15586 break;
15587 }
15588 else if (execution_is_direct_branch(from: default_block, to: case_block))
15589 {
15590 // Default case is falling through to another case label, we must inject the default block here.
15591 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i, value: block.default_block);
15592 injected_block = true;
15593 break;
15594 }
15595 }
15596
15597 // Order does not matter.
15598 if (!injected_block)
15599 block_declaration_order.push_back(t: block.default_block);
15600 else if (is_legacy_es())
15601 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
15602
15603 case_constructs[block.default_block] = {};
15604 }
15605
15606 size_t num_blocks = block_declaration_order.size();
15607
15608 const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
15609 {
15610 if (is_unsigned_case)
15611 return convert_to_string(t: literal);
15612
15613 // For smaller cases, the literals are compiled as 32 bit wide
15614 // literals so we don't need to care for all sizes specifically.
15615 if (width <= 32)
15616 {
15617 return convert_to_string(t: int64_t(int32_t(literal)));
15618 }
15619
15620 return convert_to_string(t: int64_t(literal));
15621 };
15622
15623 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
15624 const char *suffix) -> string {
15625 string ret;
15626 size_t count = labels.size();
15627 for (size_t i = 0; i < count; i++)
15628 {
15629 if (i)
15630 ret += " || ";
15631 ret += join(ts: count > 1 ? "(" : "", ts: to_enclosed_expression(id: condition), ts: " == ", ts: labels[i], ts&: suffix,
15632 ts: count > 1 ? ")" : "");
15633 }
15634 return ret;
15635 };
15636
15637 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
15638 // we need to flush phi nodes outside the switch block in a branch,
15639 // and skip any Phi handling inside the case label to make fall-through work as expected.
15640 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
15641 // inside the case label if at all possible.
15642 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
15643 {
15644 if (flush_phi_required(from: block.self, to: block_declaration_order[i]) &&
15645 flush_phi_required(from: block_declaration_order[i - 1], to: block_declaration_order[i]))
15646 {
15647 uint32_t target_block = block_declaration_order[i];
15648
15649 // Make sure we flush Phi, it might have been marked to be ignored earlier.
15650 get<SPIRBlock>(id: target_block).ignore_phi_from_block = 0;
15651
15652 auto &literals = case_constructs[target_block];
15653
15654 if (literals.empty())
15655 {
15656 // Oh boy, gotta make a complete negative test instead! o.o
15657 // Find all possible literals that would *not* make us enter the default block.
15658 // If none of those literals match, we flush Phi ...
15659 SmallVector<string> conditions;
15660 for (size_t j = 0; j < num_blocks; j++)
15661 {
15662 auto &negative_literals = case_constructs[block_declaration_order[j]];
15663 for (auto &case_label : negative_literals)
15664 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
15665 ts: " != ", ts: to_case_label(case_label, type.width, unsigned_case)));
15666 }
15667
15668 statement(ts: "if (", ts: merge(list: conditions, between: " && "), ts: ")");
15669 begin_scope();
15670 flush_phi(from: block.self, to: target_block);
15671 end_scope();
15672 }
15673 else
15674 {
15675 SmallVector<string> conditions;
15676 conditions.reserve(count: literals.size());
15677 for (auto &case_label : literals)
15678 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
15679 ts: " == ", ts: to_case_label(case_label, type.width, unsigned_case)));
15680 statement(ts: "if (", ts: merge(list: conditions, between: " || "), ts: ")");
15681 begin_scope();
15682 flush_phi(from: block.self, to: target_block);
15683 end_scope();
15684 }
15685
15686 // Mark the block so that we don't flush Phi from header to case label.
15687 get<SPIRBlock>(id: target_block).ignore_phi_from_block = block.self;
15688 }
15689 }
15690
15691 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
15692 // non-structured exits with the help of a switch block.
15693 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
15694 bool degenerate_switch = block.default_block != block.merge_block && cases.empty();
15695
15696 if (degenerate_switch || is_legacy_es())
15697 {
15698 // ESSL 1.0 is not guaranteed to support do/while.
15699 if (is_legacy_es())
15700 {
15701 uint32_t counter = statement_count;
15702 statement(ts: "for (int spvDummy", ts&: counter, ts: " = 0; spvDummy", ts&: counter,
15703 ts: " < 1; spvDummy", ts&: counter, ts: "++)");
15704 }
15705 else
15706 statement(ts: "do");
15707 }
15708 else
15709 {
15710 emit_block_hints(block);
15711 statement(ts: "switch (", ts: to_unpacked_expression(id: block.condition), ts: ")");
15712 }
15713 begin_scope();
15714
15715 for (size_t i = 0; i < num_blocks; i++)
15716 {
15717 uint32_t target_block = block_declaration_order[i];
15718 auto &literals = case_constructs[target_block];
15719
15720 if (literals.empty())
15721 {
15722 // Default case.
15723 if (!degenerate_switch)
15724 {
15725 if (is_legacy_es())
15726 statement(ts: "else");
15727 else
15728 statement(ts: "default:");
15729 }
15730 }
15731 else
15732 {
15733 if (is_legacy_es())
15734 {
15735 statement(ts: (i ? "else " : ""), ts: "if (", ts: to_legacy_case_label(block.condition, literals, label_suffix),
15736 ts: ")");
15737 }
15738 else
15739 {
15740 for (auto &case_literal : literals)
15741 {
15742 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
15743 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
15744 }
15745 }
15746 }
15747
15748 auto &case_block = get<SPIRBlock>(id: target_block);
15749 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
15750 execution_is_direct_branch(from: case_block, to: get<SPIRBlock>(id: block_declaration_order[i + 1])))
15751 {
15752 // We will fall through here, so just terminate the block chain early.
15753 // We still need to deal with Phi potentially.
15754 // No need for a stack-like thing here since we only do fall-through when there is a
15755 // single trivial branch to fall-through target..
15756 current_emitting_switch_fallthrough = true;
15757 }
15758 else
15759 current_emitting_switch_fallthrough = false;
15760
15761 if (!degenerate_switch)
15762 begin_scope();
15763 branch(from: block.self, to: target_block);
15764 if (!degenerate_switch)
15765 end_scope();
15766
15767 current_emitting_switch_fallthrough = false;
15768 }
15769
15770 // Might still have to flush phi variables if we branch from loop header directly to merge target.
15771 // This is supposed to emit all cases where we branch from header to merge block directly.
15772 // There are two main scenarios where cannot rely on default fallthrough.
15773 // - There is an explicit default: label already.
15774 // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
15775 // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
15776 bool header_merge_requires_phi = flush_phi_required(from: block.self, to: block.next_block);
15777 bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
15778 if ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())
15779 {
15780 for (auto &case_literal : literals_to_merge)
15781 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
15782
15783 if (block.default_block == block.next_block)
15784 {
15785 if (is_legacy_es())
15786 statement(ts: "else");
15787 else
15788 statement(ts: "default:");
15789 }
15790
15791 begin_scope();
15792 flush_phi(from: block.self, to: block.next_block);
15793 statement(ts: "break;");
15794 end_scope();
15795 }
15796
15797 if (degenerate_switch && !is_legacy_es())
15798 end_scope_decl(decl: "while(false)");
15799 else
15800 end_scope();
15801
15802 if (block.need_ladder_break)
15803 {
15804 statement(ts: "if (_", ts&: block.self, ts: "_ladder_break)");
15805 begin_scope();
15806 statement(ts: "break;");
15807 end_scope();
15808 }
15809
15810 current_emitting_switch = old_emitting_switch;
15811 break;
15812 }
15813
15814 case SPIRBlock::Return:
15815 {
15816 for (auto &line : current_function->fixup_hooks_out)
15817 line();
15818
15819 if (processing_entry_point)
15820 emit_fixup();
15821
15822 auto &cfg = get_cfg_for_current_function();
15823
15824 if (block.return_value)
15825 {
15826 auto &type = expression_type(id: block.return_value);
15827 if (!type.array.empty() && !backend.can_return_array)
15828 {
15829 // If we cannot return arrays, we will have a special out argument we can write to instead.
15830 // The backend is responsible for setting this up, and redirection the return values as appropriate.
15831 if (ir.ids[block.return_value].get_type() != TypeUndef)
15832 {
15833 emit_array_copy(lhs: "spvReturnValue", lhs_id: 0, rhs_id: block.return_value, lhs_storage: StorageClassFunction,
15834 rhs_storage: get_expression_effective_storage_class(ptr: block.return_value));
15835 }
15836
15837 if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
15838 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15839 {
15840 statement(ts: "return;");
15841 }
15842 }
15843 else
15844 {
15845 // OpReturnValue can return Undef, so don't emit anything for this case.
15846 if (ir.ids[block.return_value].get_type() != TypeUndef)
15847 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
15848 }
15849 }
15850 else if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
15851 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15852 {
15853 // If this block is the very final block and not called from control flow,
15854 // we do not need an explicit return which looks out of place. Just end the function here.
15855 // In the very weird case of for(;;) { return; } executing return is unconditional,
15856 // but we actually need a return here ...
15857 statement(ts: "return;");
15858 }
15859 break;
15860 }
15861
15862 // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
15863 case SPIRBlock::Kill:
15864 statement(ts&: backend.discard_literal, ts: ";");
15865 if (block.return_value)
15866 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
15867 break;
15868
15869 case SPIRBlock::Unreachable:
15870 {
15871 // Avoid emitting false fallthrough, which can happen for
15872 // if (cond) break; else discard; inside a case label.
15873 // Discard is not always implementable as a terminator.
15874
15875 auto &cfg = get_cfg_for_current_function();
15876 bool inner_dominator_is_switch = false;
15877 ID id = block.self;
15878
15879 while (id)
15880 {
15881 auto &iter_block = get<SPIRBlock>(id);
15882 if (iter_block.terminator == SPIRBlock::MultiSelect ||
15883 iter_block.merge == SPIRBlock::MergeLoop)
15884 {
15885 ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
15886 iter_block.merge_block : iter_block.next_block;
15887 bool outside_construct = next_block && cfg.find_common_dominator(a: next_block, b: block.self) == next_block;
15888 if (!outside_construct)
15889 {
15890 inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
15891 break;
15892 }
15893 }
15894
15895 if (cfg.get_preceding_edges(block: id).empty())
15896 break;
15897
15898 id = cfg.get_immediate_dominator(block: id);
15899 }
15900
15901 if (inner_dominator_is_switch)
15902 statement(ts: "break; // unreachable workaround");
15903
15904 emit_next_block = false;
15905 break;
15906 }
15907
15908 case SPIRBlock::IgnoreIntersection:
15909 statement(ts: "ignoreIntersectionEXT;");
15910 break;
15911
15912 case SPIRBlock::TerminateRay:
15913 statement(ts: "terminateRayEXT;");
15914 break;
15915
15916 default:
15917 SPIRV_CROSS_THROW("Unimplemented block terminator.");
15918 }
15919
15920 if (block.next_block && emit_next_block)
15921 {
15922 // If we hit this case, we're dealing with an unconditional branch, which means we will output
15923 // that block after this. If we had selection merge, we already flushed phi variables.
15924 if (block.merge != SPIRBlock::MergeSelection)
15925 {
15926 flush_phi(from: block.self, to: block.next_block);
15927 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
15928 get<SPIRBlock>(id: block.next_block).invalidate_expressions = block.invalidate_expressions;
15929 }
15930
15931 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
15932 if (!current_emitting_switch_fallthrough)
15933 {
15934 // For merge selects we might have ignored the fact that a merge target
15935 // could have been a break; or continue;
15936 // We will need to deal with it here.
15937 if (is_loop_break(next: block.next_block))
15938 {
15939 // Cannot check for just break, because switch statements will also use break.
15940 assert(block.merge == SPIRBlock::MergeSelection);
15941 statement(ts: "break;");
15942 }
15943 else if (is_continue(next: block.next_block))
15944 {
15945 assert(block.merge == SPIRBlock::MergeSelection);
15946 branch_to_continue(from: block.self, to: block.next_block);
15947 }
15948 else if (BlockID(block.self) != block.next_block)
15949 emit_block_chain(block&: get<SPIRBlock>(id: block.next_block));
15950 }
15951 }
15952
15953 if (block.merge == SPIRBlock::MergeLoop)
15954 {
15955 if (continue_type == SPIRBlock::DoWhileLoop)
15956 {
15957 // Make sure that we run the continue block to get the expressions set, but this
15958 // should become an empty string.
15959 // We have no fallbacks if we cannot forward everything to temporaries ...
15960 const auto &continue_block = get<SPIRBlock>(id: block.continue_block);
15961 bool positive_test = execution_is_noop(from: get<SPIRBlock>(id: continue_block.true_block),
15962 to: get<SPIRBlock>(id: continue_block.loop_dominator));
15963
15964 uint32_t current_count = statement_count;
15965 auto statements = emit_continue_block(continue_block: block.continue_block, follow_true_block: positive_test, follow_false_block: !positive_test);
15966 if (statement_count != current_count)
15967 {
15968 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
15969 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
15970 force_recompile();
15971 }
15972
15973 // Might have to invert the do-while test here.
15974 auto condition = to_expression(id: continue_block.condition);
15975 if (!positive_test)
15976 condition = join(ts: "!", ts: enclose_expression(expr: condition));
15977
15978 end_scope_decl(decl: join(ts: "while (", ts&: condition, ts: ")"));
15979 }
15980 else
15981 end_scope();
15982
15983 loop_level_saver.release();
15984
15985 // We cannot break out of two loops at once, so don't check for break; here.
15986 // Using block.self as the "from" block isn't quite right, but it has the same scope
15987 // and dominance structure, so it's fine.
15988 if (is_continue(next: block.merge_block))
15989 branch_to_continue(from: block.self, to: block.merge_block);
15990 else
15991 emit_block_chain(block&: get<SPIRBlock>(id: block.merge_block));
15992 }
15993
15994 // Forget about control dependent expressions now.
15995 block.invalidate_expressions.clear();
15996
15997 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
15998 // re-declare variables if necessary.
15999 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
16000 for (size_t i = 0; i < block.dominated_variables.size(); i++)
16001 {
16002 uint32_t var = block.dominated_variables[i];
16003 get<SPIRVariable>(id: var).deferred_declaration = rearm_dominated_variables[i];
16004 }
16005
16006 // Just like for deferred declaration, we need to forget about loop variable enable
16007 // if our block chain is reinstantiated later.
16008 for (auto &var_id : block.loop_variables)
16009 get<SPIRVariable>(id: var_id).loop_variable_enable = false;
16010}
16011
16012void CompilerGLSL::begin_scope()
16013{
16014 statement(ts: "{");
16015 indent++;
16016}
16017
16018void CompilerGLSL::end_scope()
16019{
16020 if (!indent)
16021 SPIRV_CROSS_THROW("Popping empty indent stack.");
16022 indent--;
16023 statement(ts: "}");
16024}
16025
16026void CompilerGLSL::end_scope(const string &trailer)
16027{
16028 if (!indent)
16029 SPIRV_CROSS_THROW("Popping empty indent stack.");
16030 indent--;
16031 statement(ts: "}", ts: trailer);
16032}
16033
16034void CompilerGLSL::end_scope_decl()
16035{
16036 if (!indent)
16037 SPIRV_CROSS_THROW("Popping empty indent stack.");
16038 indent--;
16039 statement(ts: "};");
16040}
16041
16042void CompilerGLSL::end_scope_decl(const string &decl)
16043{
16044 if (!indent)
16045 SPIRV_CROSS_THROW("Popping empty indent stack.");
16046 indent--;
16047 statement(ts: "} ", ts: decl, ts: ";");
16048}
16049
16050void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
16051{
16052 // If our variable is remapped, and we rely on type-remapping information as
16053 // well, then we cannot pass the variable as a function parameter.
16054 // Fixing this is non-trivial without stamping out variants of the same function,
16055 // so for now warn about this and suggest workarounds instead.
16056 for (uint32_t i = 0; i < length; i++)
16057 {
16058 auto *var = maybe_get<SPIRVariable>(id: args[i]);
16059 if (!var || !var->remapped_variable)
16060 continue;
16061
16062 auto &type = get<SPIRType>(id: var->basetype);
16063 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
16064 {
16065 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
16066 "This will not work correctly because type-remapping information is lost. "
16067 "To workaround, please consider not passing the subpass input as a function parameter, "
16068 "or use in/out variables instead which do not need type remapping information.");
16069 }
16070 }
16071}
16072
16073const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
16074{
16075 // FIXME: This is kind of hacky. There should be a cleaner way.
16076 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
16077 if ((offset + 1) < current_emitting_block->ops.size())
16078 return &current_emitting_block->ops[offset + 1];
16079 else
16080 return nullptr;
16081}
16082
16083uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
16084{
16085 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
16086 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
16087 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
16088}
16089
16090void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
16091{
16092 statement(ts: lhs, ts: " = ", ts: to_expression(id: rhs_id), ts: ";");
16093}
16094
16095bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
16096{
16097 if (!backend.force_gl_in_out_block)
16098 return false;
16099 // This path is only relevant for GL backends.
16100
16101 auto *var = maybe_get<SPIRVariable>(id: target_id);
16102 if (!var || var->storage != StorageClassOutput)
16103 return false;
16104
16105 if (!is_builtin_variable(var: *var) || BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)) != BuiltInSampleMask)
16106 return false;
16107
16108 auto &type = expression_type(id: source_id);
16109 string array_expr;
16110 if (type.array_size_literal.back())
16111 {
16112 array_expr = convert_to_string(t: type.array.back());
16113 if (type.array.back() == 0)
16114 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
16115 }
16116 else
16117 array_expr = to_expression(id: type.array.back());
16118
16119 SPIRType target_type;
16120 target_type.basetype = SPIRType::Int;
16121
16122 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
16123 begin_scope();
16124 statement(ts: to_expression(id: target_id), ts: "[i] = ",
16125 ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts: to_expression(id: source_id), ts: "[i]")),
16126 ts: ";");
16127 end_scope();
16128
16129 return true;
16130}
16131
16132void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
16133{
16134 if (!backend.force_gl_in_out_block)
16135 return;
16136 // This path is only relevant for GL backends.
16137
16138 auto *var = maybe_get<SPIRVariable>(id: source_id);
16139 if (!var)
16140 return;
16141
16142 if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
16143 return;
16144
16145 auto &type = get_variable_data_type(var: *var);
16146 if (type.array.empty())
16147 return;
16148
16149 auto builtin = BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn));
16150 bool is_builtin = is_builtin_variable(var: *var) &&
16151 (builtin == BuiltInPointSize ||
16152 builtin == BuiltInPosition ||
16153 builtin == BuiltInSampleMask);
16154 bool is_tess = is_tessellation_shader();
16155 bool is_patch = has_decoration(id: var->self, decoration: DecorationPatch);
16156 bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
16157
16158 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
16159 // We must unroll the array load.
16160 // For builtins, we couldn't catch this case normally,
16161 // because this is resolved in the OpAccessChain in most cases.
16162 // If we load the entire array, we have no choice but to unroll here.
16163 if (!is_patch && (is_builtin || is_tess))
16164 {
16165 auto new_expr = join(ts: "_", ts&: target_id, ts: "_unrolled");
16166 statement(ts: variable_decl(type, name: new_expr, id: target_id), ts: ";");
16167 string array_expr;
16168 if (type.array_size_literal.back())
16169 {
16170 array_expr = convert_to_string(t: type.array.back());
16171 if (type.array.back() == 0)
16172 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
16173 }
16174 else
16175 array_expr = to_expression(id: type.array.back());
16176
16177 // The array size might be a specialization constant, so use a for-loop instead.
16178 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
16179 begin_scope();
16180 if (is_builtin && !is_sample_mask)
16181 statement(ts&: new_expr, ts: "[i] = gl_in[i].", ts&: expr, ts: ";");
16182 else if (is_sample_mask)
16183 {
16184 SPIRType target_type;
16185 target_type.basetype = SPIRType::Int;
16186 statement(ts&: new_expr, ts: "[i] = ", ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts&: expr, ts: "[i]")), ts: ";");
16187 }
16188 else
16189 statement(ts&: new_expr, ts: "[i] = ", ts&: expr, ts: "[i];");
16190 end_scope();
16191
16192 expr = std::move(new_expr);
16193 }
16194}
16195
16196void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
16197{
16198 // We will handle array cases elsewhere.
16199 if (!expr_type.array.empty())
16200 return;
16201
16202 auto *var = maybe_get_backing_variable(chain: source_id);
16203 if (var)
16204 source_id = var->self;
16205
16206 // Only interested in standalone builtin variables.
16207 if (!has_decoration(id: source_id, decoration: DecorationBuiltIn))
16208 return;
16209
16210 auto builtin = static_cast<BuiltIn>(get_decoration(id: source_id, decoration: DecorationBuiltIn));
16211 auto expected_type = expr_type.basetype;
16212
16213 // TODO: Fill in for more builtins.
16214 switch (builtin)
16215 {
16216 case BuiltInLayer:
16217 case BuiltInPrimitiveId:
16218 case BuiltInViewportIndex:
16219 case BuiltInInstanceId:
16220 case BuiltInInstanceIndex:
16221 case BuiltInVertexId:
16222 case BuiltInVertexIndex:
16223 case BuiltInSampleId:
16224 case BuiltInBaseVertex:
16225 case BuiltInBaseInstance:
16226 case BuiltInDrawIndex:
16227 case BuiltInFragStencilRefEXT:
16228 case BuiltInInstanceCustomIndexNV:
16229 case BuiltInSampleMask:
16230 case BuiltInPrimitiveShadingRateKHR:
16231 case BuiltInShadingRateKHR:
16232 expected_type = SPIRType::Int;
16233 break;
16234
16235 case BuiltInGlobalInvocationId:
16236 case BuiltInLocalInvocationId:
16237 case BuiltInWorkgroupId:
16238 case BuiltInLocalInvocationIndex:
16239 case BuiltInWorkgroupSize:
16240 case BuiltInNumWorkgroups:
16241 case BuiltInIncomingRayFlagsNV:
16242 case BuiltInLaunchIdNV:
16243 case BuiltInLaunchSizeNV:
16244 expected_type = SPIRType::UInt;
16245 break;
16246
16247 default:
16248 break;
16249 }
16250
16251 if (expected_type != expr_type.basetype)
16252 expr = bitcast_expression(target_type: expr_type, expr_type: expected_type, expr);
16253}
16254
16255void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
16256{
16257 auto *var = maybe_get_backing_variable(chain: target_id);
16258 if (var)
16259 target_id = var->self;
16260
16261 // Only interested in standalone builtin variables.
16262 if (!has_decoration(id: target_id, decoration: DecorationBuiltIn))
16263 return;
16264
16265 auto builtin = static_cast<BuiltIn>(get_decoration(id: target_id, decoration: DecorationBuiltIn));
16266 auto expected_type = expr_type.basetype;
16267
16268 // TODO: Fill in for more builtins.
16269 switch (builtin)
16270 {
16271 case BuiltInLayer:
16272 case BuiltInPrimitiveId:
16273 case BuiltInViewportIndex:
16274 case BuiltInFragStencilRefEXT:
16275 case BuiltInSampleMask:
16276 case BuiltInPrimitiveShadingRateKHR:
16277 case BuiltInShadingRateKHR:
16278 expected_type = SPIRType::Int;
16279 break;
16280
16281 default:
16282 break;
16283 }
16284
16285 if (expected_type != expr_type.basetype)
16286 {
16287 auto type = expr_type;
16288 type.basetype = expected_type;
16289 expr = bitcast_expression(target_type: type, expr_type: expr_type.basetype, expr);
16290 }
16291}
16292
16293void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
16294{
16295 if (*backend.nonuniform_qualifier == '\0')
16296 return;
16297
16298 auto *var = maybe_get_backing_variable(chain: ptr_id);
16299 if (!var)
16300 return;
16301
16302 if (var->storage != StorageClassUniformConstant &&
16303 var->storage != StorageClassStorageBuffer &&
16304 var->storage != StorageClassUniform)
16305 return;
16306
16307 auto &backing_type = get<SPIRType>(id: var->basetype);
16308 if (backing_type.array.empty())
16309 return;
16310
16311 // If we get here, we know we're accessing an arrayed resource which
16312 // might require nonuniform qualifier.
16313
16314 auto start_array_index = expr.find_first_of(c: '[');
16315
16316 if (start_array_index == string::npos)
16317 return;
16318
16319 // We've opened a bracket, track expressions until we can close the bracket.
16320 // This must be our resource index.
16321 size_t end_array_index = string::npos;
16322 unsigned bracket_count = 1;
16323 for (size_t index = start_array_index + 1; index < expr.size(); index++)
16324 {
16325 if (expr[index] == ']')
16326 {
16327 if (--bracket_count == 0)
16328 {
16329 end_array_index = index;
16330 break;
16331 }
16332 }
16333 else if (expr[index] == '[')
16334 bracket_count++;
16335 }
16336
16337 assert(bracket_count == 0);
16338
16339 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
16340 // nothing we can do here to express that.
16341 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
16342 return;
16343
16344 start_array_index++;
16345
16346 expr = join(ts: expr.substr(pos: 0, n: start_array_index), ts&: backend.nonuniform_qualifier, ts: "(",
16347 ts: expr.substr(pos: start_array_index, n: end_array_index - start_array_index), ts: ")",
16348 ts: expr.substr(pos: end_array_index, n: string::npos));
16349}
16350
16351void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
16352{
16353 if ((options.es && options.version < 310) || (!options.es && options.version < 140))
16354 return;
16355
16356 switch (block.hint)
16357 {
16358 case SPIRBlock::HintFlatten:
16359 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
16360 statement(ts: "SPIRV_CROSS_FLATTEN");
16361 break;
16362 case SPIRBlock::HintDontFlatten:
16363 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
16364 statement(ts: "SPIRV_CROSS_BRANCH");
16365 break;
16366 case SPIRBlock::HintUnroll:
16367 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
16368 statement(ts: "SPIRV_CROSS_UNROLL");
16369 break;
16370 case SPIRBlock::HintDontUnroll:
16371 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
16372 statement(ts: "SPIRV_CROSS_LOOP");
16373 break;
16374 default:
16375 break;
16376 }
16377}
16378
16379void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
16380{
16381 preserved_aliases[id] = get_name(id);
16382}
16383
16384void CompilerGLSL::reset_name_caches()
16385{
16386 for (auto &preserved : preserved_aliases)
16387 set_name(id: preserved.first, name: preserved.second);
16388
16389 preserved_aliases.clear();
16390 resource_names.clear();
16391 block_input_names.clear();
16392 block_output_names.clear();
16393 block_ubo_names.clear();
16394 block_ssbo_names.clear();
16395 block_names.clear();
16396 function_overloads.clear();
16397}
16398
16399void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
16400{
16401 if (visited.count(x: type.self))
16402 return;
16403 visited.insert(x: type.self);
16404
16405 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
16406 {
16407 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
16408
16409 if (mbr_type.basetype == SPIRType::Struct)
16410 {
16411 // If there are multiple aliases, the output might be somewhat unpredictable,
16412 // but the only real alternative in that case is to do nothing, which isn't any better.
16413 // This check should be fine in practice.
16414 if (get_name(id: mbr_type.self).empty() && !get_member_name(id: type.self, index: i).empty())
16415 {
16416 auto anon_name = join(ts: "anon_", ts: get_member_name(id: type.self, index: i));
16417 ParsedIR::sanitize_underscores(str&: anon_name);
16418 set_name(id: mbr_type.self, name: anon_name);
16419 }
16420
16421 fixup_anonymous_struct_names(visited, type: mbr_type);
16422 }
16423 }
16424}
16425
16426void CompilerGLSL::fixup_anonymous_struct_names()
16427{
16428 // HLSL codegen can often end up emitting anonymous structs inside blocks, which
16429 // breaks GL linking since all names must match ...
16430 // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
16431
16432 // Breaks exponential explosion with weird type trees.
16433 std::unordered_set<uint32_t> visited;
16434
16435 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) {
16436 if (type.basetype == SPIRType::Struct &&
16437 (has_decoration(id: type.self, decoration: DecorationBlock) ||
16438 has_decoration(id: type.self, decoration: DecorationBufferBlock)))
16439 {
16440 fixup_anonymous_struct_names(visited, type);
16441 }
16442 });
16443}
16444
16445void CompilerGLSL::fixup_type_alias()
16446{
16447 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
16448 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) {
16449 if (!type.type_alias)
16450 return;
16451
16452 if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock))
16453 {
16454 // Top-level block types should never alias anything else.
16455 type.type_alias = 0;
16456 }
16457 else if (type_is_block_like(type) && type.self == ID(self))
16458 {
16459 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
16460 // i.e. blocks which are placed inside buffers.
16461 // Become the master.
16462 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t other_id, SPIRType &other_type) {
16463 if (other_id == self)
16464 return;
16465
16466 if (other_type.type_alias == type.type_alias)
16467 other_type.type_alias = self;
16468 });
16469
16470 this->get<SPIRType>(id: type.type_alias).type_alias = self;
16471 type.type_alias = 0;
16472 }
16473 });
16474}
16475
16476void CompilerGLSL::reorder_type_alias()
16477{
16478 // Reorder declaration of types so that the master of the type alias is always emitted first.
16479 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
16480 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
16481 auto loop_lock = ir.create_loop_hard_lock();
16482
16483 auto &type_ids = ir.ids_for_type[TypeType];
16484 for (auto alias_itr = begin(cont&: type_ids); alias_itr != end(cont&: type_ids); ++alias_itr)
16485 {
16486 auto &type = get<SPIRType>(id: *alias_itr);
16487 if (type.type_alias != TypeID(0) &&
16488 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
16489 {
16490 // We will skip declaring this type, so make sure the type_alias type comes before.
16491 auto master_itr = find(first: begin(cont&: type_ids), last: end(cont&: type_ids), val: ID(type.type_alias));
16492 assert(master_itr != end(type_ids));
16493
16494 if (alias_itr < master_itr)
16495 {
16496 // Must also swap the type order for the constant-type joined array.
16497 auto &joined_types = ir.ids_for_constant_or_type;
16498 auto alt_alias_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *alias_itr);
16499 auto alt_master_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *master_itr);
16500 assert(alt_alias_itr != end(joined_types));
16501 assert(alt_master_itr != end(joined_types));
16502
16503 swap(a&: *alias_itr, b&: *master_itr);
16504 swap(a&: *alt_alias_itr, b&: *alt_master_itr);
16505 }
16506 }
16507 }
16508}
16509
16510void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
16511{
16512 // If we are redirecting statements, ignore the line directive.
16513 // Common case here is continue blocks.
16514 if (redirect_statement)
16515 return;
16516
16517 if (options.emit_line_directives)
16518 {
16519 require_extension_internal(ext: "GL_GOOGLE_cpp_style_line_directive");
16520 statement_no_indent(ts: "#line ", ts&: line_literal, ts: " \"", ts&: get<SPIRString>(id: file_id).str, ts: "\"");
16521 }
16522}
16523
16524void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
16525 SmallVector<uint32_t> chain)
16526{
16527 // Fully unroll all member/array indices one by one.
16528
16529 auto &lhs_type = get<SPIRType>(id: lhs_type_id);
16530 auto &rhs_type = get<SPIRType>(id: rhs_type_id);
16531
16532 if (!lhs_type.array.empty())
16533 {
16534 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
16535 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
16536 uint32_t array_size = to_array_size_literal(type: lhs_type);
16537 chain.push_back(t: 0);
16538
16539 for (uint32_t i = 0; i < array_size; i++)
16540 {
16541 chain.back() = i;
16542 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.parent_type, rhs_id, rhs_type_id: rhs_type.parent_type, chain);
16543 }
16544 }
16545 else if (lhs_type.basetype == SPIRType::Struct)
16546 {
16547 chain.push_back(t: 0);
16548 uint32_t member_count = uint32_t(lhs_type.member_types.size());
16549 for (uint32_t i = 0; i < member_count; i++)
16550 {
16551 chain.back() = i;
16552 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.member_types[i], rhs_id, rhs_type_id: rhs_type.member_types[i], chain);
16553 }
16554 }
16555 else
16556 {
16557 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
16558 // particularly in MSL.
16559 // To deal with this, we emit access chains and go through emit_store_statement
16560 // to deal with all the special cases we can encounter.
16561
16562 AccessChainMeta lhs_meta, rhs_meta;
16563 auto lhs = access_chain_internal(base: lhs_id, indices: chain.data(), count: uint32_t(chain.size()),
16564 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &lhs_meta);
16565 auto rhs = access_chain_internal(base: rhs_id, indices: chain.data(), count: uint32_t(chain.size()),
16566 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &rhs_meta);
16567
16568 uint32_t id = ir.increase_bound_by(count: 2);
16569 lhs_id = id;
16570 rhs_id = id + 1;
16571
16572 {
16573 auto &lhs_expr = set<SPIRExpression>(id: lhs_id, args: std::move(lhs), args&: lhs_type_id, args: true);
16574 lhs_expr.need_transpose = lhs_meta.need_transpose;
16575
16576 if (lhs_meta.storage_is_packed)
16577 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
16578 if (lhs_meta.storage_physical_type != 0)
16579 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: lhs_meta.storage_physical_type);
16580
16581 forwarded_temporaries.insert(x: lhs_id);
16582 suppressed_usage_tracking.insert(x: lhs_id);
16583 }
16584
16585 {
16586 auto &rhs_expr = set<SPIRExpression>(id: rhs_id, args: std::move(rhs), args&: rhs_type_id, args: true);
16587 rhs_expr.need_transpose = rhs_meta.need_transpose;
16588
16589 if (rhs_meta.storage_is_packed)
16590 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
16591 if (rhs_meta.storage_physical_type != 0)
16592 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: rhs_meta.storage_physical_type);
16593
16594 forwarded_temporaries.insert(x: rhs_id);
16595 suppressed_usage_tracking.insert(x: rhs_id);
16596 }
16597
16598 emit_store_statement(lhs_expression: lhs_id, rhs_expression: rhs_id);
16599 }
16600}
16601
16602bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
16603{
16604 if (!has_decoration(id, decoration: DecorationInputAttachmentIndex))
16605 return false;
16606
16607 uint32_t input_attachment_index = get_decoration(id, decoration: DecorationInputAttachmentIndex);
16608 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
16609 if (remap.first == input_attachment_index)
16610 return true;
16611
16612 return false;
16613}
16614
16615const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
16616{
16617 const SPIRVariable *ret = nullptr;
16618 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
16619 if (has_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) &&
16620 get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) == index)
16621 {
16622 ret = &var;
16623 }
16624 });
16625 return ret;
16626}
16627
16628const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
16629{
16630 const SPIRVariable *ret = nullptr;
16631 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
16632 if (var.storage == StorageClassOutput && get_decoration(id: var.self, decoration: DecorationLocation) == location)
16633 ret = &var;
16634 });
16635 return ret;
16636}
16637
16638void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
16639{
16640 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
16641 {
16642 auto *subpass_var = find_subpass_input_by_attachment_index(index: remap.first);
16643 auto *output_var = find_color_output_by_location(location: remap.second);
16644 if (!subpass_var)
16645 continue;
16646 if (!output_var)
16647 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
16648 "to read from it.");
16649 if (is_array(type: get<SPIRType>(id: output_var->basetype)))
16650 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
16651
16652 auto &func = get<SPIRFunction>(id: get_entry_point().self);
16653 func.fixup_hooks_in.push_back(t: [=]() {
16654 if (is_legacy())
16655 {
16656 statement(ts: to_expression(id: subpass_var->self), ts: " = ", ts: "gl_LastFragData[",
16657 ts: get_decoration(id: output_var->self, decoration: DecorationLocation), ts: "];");
16658 }
16659 else
16660 {
16661 uint32_t num_rt_components = this->get<SPIRType>(id: output_var->basetype).vecsize;
16662 statement(ts: to_expression(id: subpass_var->self), ts: vector_swizzle(vecsize: num_rt_components, index: 0), ts: " = ",
16663 ts: to_expression(id: output_var->self), ts: ";");
16664 }
16665 });
16666 }
16667}
16668
16669bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
16670{
16671 return is_depth_image(type: get<SPIRType>(id: get<SPIRVariable>(id).basetype), id);
16672}
16673
16674const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
16675{
16676 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
16677 "GL_KHR_shader_subgroup_basic",
16678 "GL_KHR_shader_subgroup_vote",
16679 "GL_NV_gpu_shader_5",
16680 "GL_NV_shader_thread_group",
16681 "GL_NV_shader_thread_shuffle",
16682 "GL_ARB_shader_ballot",
16683 "GL_ARB_shader_group_vote",
16684 "GL_AMD_gcn_shader" };
16685 return retval[c];
16686}
16687
16688SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
16689{
16690 switch (c)
16691 {
16692 case ARB_shader_ballot:
16693 return { "GL_ARB_shader_int64" };
16694 case AMD_gcn_shader:
16695 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
16696 default:
16697 return {};
16698 }
16699}
16700
16701const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
16702{
16703 switch (c)
16704 {
16705 case ARB_shader_ballot:
16706 return "defined(GL_ARB_shader_int64)";
16707 case AMD_gcn_shader:
16708 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
16709 default:
16710 return "";
16711 }
16712}
16713
16714CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
16715 get_feature_dependencies(Feature feature)
16716{
16717 switch (feature)
16718 {
16719 case SubgroupAllEqualT:
16720 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
16721 case SubgroupElect:
16722 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
16723 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
16724 return { SubgroupMask };
16725 case SubgroupBallotBitCount:
16726 return { SubgroupBallot };
16727 default:
16728 return {};
16729 }
16730}
16731
16732CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
16733 get_feature_dependency_mask(Feature feature)
16734{
16735 return build_mask(features: get_feature_dependencies(feature));
16736}
16737
16738bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
16739{
16740 static const bool retval[FeatureCount] = { false, false, false, false, false, false,
16741 true, // SubgroupBalloFindLSB_MSB
16742 false, false, false, false,
16743 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
16744 false, false, true, false };
16745
16746 return retval[feature];
16747}
16748
16749CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
16750 get_KHR_extension_for_feature(Feature feature)
16751{
16752 static const Candidate extensions[FeatureCount] = {
16753 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16754 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
16755 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16756 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
16757 };
16758
16759 return extensions[feature];
16760}
16761
16762void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
16763{
16764 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
16765}
16766
16767bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
16768{
16769 return (feature_mask & (1u << feature)) != 0;
16770}
16771
16772CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
16773{
16774 Result res;
16775
16776 for (uint32_t i = 0u; i < FeatureCount; ++i)
16777 {
16778 if (feature_mask & (1u << i))
16779 {
16780 auto feature = static_cast<Feature>(i);
16781 std::unordered_set<uint32_t> unique_candidates;
16782
16783 auto candidates = get_candidates_for_feature(ft: feature);
16784 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
16785
16786 auto deps = get_feature_dependencies(feature);
16787 for (Feature d : deps)
16788 {
16789 candidates = get_candidates_for_feature(ft: d);
16790 if (!candidates.empty())
16791 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
16792 }
16793
16794 for (uint32_t c : unique_candidates)
16795 ++res.weights[static_cast<Candidate>(c)];
16796 }
16797 }
16798
16799 return res;
16800}
16801
16802CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16803 get_candidates_for_feature(Feature ft, const Result &r)
16804{
16805 auto c = get_candidates_for_feature(ft);
16806 auto cmp = [&r](Candidate a, Candidate b) {
16807 if (r.weights[a] == r.weights[b])
16808 return a < b; // Prefer candidates with lower enum value
16809 return r.weights[a] > r.weights[b];
16810 };
16811 std::sort(first: c.begin(), last: c.end(), comp: cmp);
16812 return c;
16813}
16814
16815CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16816 get_candidates_for_feature(Feature feature)
16817{
16818 switch (feature)
16819 {
16820 case SubgroupMask:
16821 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16822 case SubgroupSize:
16823 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
16824 case SubgroupInvocationID:
16825 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
16826 case SubgroupID:
16827 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16828 case NumSubgroups:
16829 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16830 case SubgroupBroadcast_First:
16831 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
16832 case SubgroupBallotFindLSB_MSB:
16833 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
16834 case SubgroupAll_Any_AllEqualBool:
16835 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
16836 case SubgroupAllEqualT:
16837 return {}; // depends on other features only
16838 case SubgroupElect:
16839 return {}; // depends on other features only
16840 case SubgroupBallot:
16841 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16842 case SubgroupBarrier:
16843 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
16844 case SubgroupMemBarrier:
16845 return { KHR_shader_subgroup_basic };
16846 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
16847 return {};
16848 case SubgroupBallotBitExtract:
16849 return { NV_shader_thread_group };
16850 case SubgroupBallotBitCount:
16851 return {};
16852 default:
16853 return {};
16854 }
16855}
16856
16857CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
16858 const SmallVector<Feature> &features)
16859{
16860 FeatureMask mask = 0;
16861 for (Feature f : features)
16862 mask |= FeatureMask(1) << f;
16863 return mask;
16864}
16865
16866CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
16867{
16868 for (auto &weight : weights)
16869 weight = 0;
16870
16871 // Make sure KHR_shader_subgroup extensions are always prefered.
16872 const uint32_t big_num = FeatureCount;
16873 weights[KHR_shader_subgroup_ballot] = big_num;
16874 weights[KHR_shader_subgroup_basic] = big_num;
16875 weights[KHR_shader_subgroup_vote] = big_num;
16876}
16877
16878void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
16879{
16880 // Must be ordered to maintain deterministic output, so vector is appropriate.
16881 if (find(first: begin(cont&: workaround_ubo_load_overload_types), last: end(cont&: workaround_ubo_load_overload_types), val: id) ==
16882 end(cont&: workaround_ubo_load_overload_types))
16883 {
16884 force_recompile();
16885 workaround_ubo_load_overload_types.push_back(t: id);
16886 }
16887}
16888
16889void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
16890{
16891 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
16892 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
16893 // ensure row_major decoration is actually respected.
16894 auto *var = maybe_get_backing_variable(chain: ptr);
16895 if (!var)
16896 return;
16897
16898 auto &backing_type = get<SPIRType>(id: var->basetype);
16899 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
16900 has_decoration(id: backing_type.self, decoration: DecorationBlock);
16901 if (!is_ubo)
16902 return;
16903
16904 auto *type = &get<SPIRType>(id: loaded_type);
16905 bool rewrite = false;
16906
16907 if (is_matrix(type: *type))
16908 {
16909 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
16910 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
16911 // If there is any row-major action going on, we apply the workaround.
16912 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
16913 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
16914 type = &backing_type;
16915 }
16916
16917 if (type->basetype == SPIRType::Struct)
16918 {
16919 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
16920 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
16921 {
16922 if (combined_decoration_for_member(type: *type, index: i).get(bit: DecorationRowMajor))
16923 {
16924 rewrite = true;
16925 break;
16926 }
16927 }
16928 }
16929
16930 if (rewrite)
16931 {
16932 request_workaround_wrapper_overload(id: loaded_type);
16933 expr = join(ts: "spvWorkaroundRowMajor(", ts&: expr, ts: ")");
16934 }
16935}
16936
16937void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
16938{
16939 masked_output_locations.insert(x: { .location: location, .component: component });
16940}
16941
16942void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
16943{
16944 masked_output_builtins.insert(x: builtin);
16945}
16946
16947bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
16948{
16949 auto &type = get<SPIRType>(id: var.basetype);
16950 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
16951 // Blocks by themselves are never masked. Must be masked per-member.
16952 if (is_block)
16953 return false;
16954
16955 bool is_builtin = has_decoration(id: var.self, decoration: DecorationBuiltIn);
16956
16957 if (is_builtin)
16958 {
16959 return is_stage_output_builtin_masked(builtin: BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)));
16960 }
16961 else
16962 {
16963 if (!has_decoration(id: var.self, decoration: DecorationLocation))
16964 return false;
16965
16966 return is_stage_output_location_masked(
16967 location: get_decoration(id: var.self, decoration: DecorationLocation),
16968 component: get_decoration(id: var.self, decoration: DecorationComponent));
16969 }
16970}
16971
16972bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
16973{
16974 auto &type = get<SPIRType>(id: var.basetype);
16975 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
16976 if (!is_block)
16977 return false;
16978
16979 BuiltIn builtin = BuiltInMax;
16980 if (is_member_builtin(type, index, builtin: &builtin))
16981 {
16982 return is_stage_output_builtin_masked(builtin);
16983 }
16984 else
16985 {
16986 uint32_t location = get_declared_member_location(var, mbr_idx: index, strip_array);
16987 uint32_t component = get_member_decoration(id: type.self, index, decoration: DecorationComponent);
16988 return is_stage_output_location_masked(location, component);
16989 }
16990}
16991
16992bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
16993{
16994 return masked_output_locations.count(x: { .location: location, .component: component }) != 0;
16995}
16996
16997bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
16998{
16999 return masked_output_builtins.count(x: builtin) != 0;
17000}
17001
17002uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
17003{
17004 auto &block_type = get<SPIRType>(id: var.basetype);
17005 if (has_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation))
17006 return get_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation);
17007 else
17008 return get_accumulated_member_location(var, mbr_idx, strip_array);
17009}
17010
17011uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
17012{
17013 auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
17014 uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation);
17015
17016 for (uint32_t i = 0; i < mbr_idx; i++)
17017 {
17018 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
17019
17020 // Start counting from any place we have a new location decoration.
17021 if (has_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation))
17022 location = get_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation);
17023
17024 uint32_t location_count = type_to_location_count(type: mbr_type);
17025 location += location_count;
17026 }
17027
17028 return location;
17029}
17030
17031StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
17032{
17033 auto *var = maybe_get_backing_variable(chain: ptr);
17034
17035 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
17036 // We're looking for the effective storage class of a given expression.
17037 // An access chain or forwarded OpLoads from such access chains
17038 // will generally have the storage class of the underlying variable, but if the load was not forwarded
17039 // we have lost any address space qualifiers.
17040 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(id: ptr).access_chain &&
17041 (forced_temporaries.count(x: ptr) != 0 || forwarded_temporaries.count(x: ptr) == 0);
17042
17043 if (var && !forced_temporary)
17044 {
17045 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassWorkgroup))
17046 return StorageClassWorkgroup;
17047 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassStorageBuffer))
17048 return StorageClassStorageBuffer;
17049
17050 // Normalize SSBOs to StorageBuffer here.
17051 if (var->storage == StorageClassUniform &&
17052 has_decoration(id: get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
17053 return StorageClassStorageBuffer;
17054 else
17055 return var->storage;
17056 }
17057 else
17058 return expression_type(id: ptr).storage;
17059}
17060
17061uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
17062{
17063 uint32_t count;
17064 if (type.basetype == SPIRType::Struct)
17065 {
17066 uint32_t mbr_count = uint32_t(type.member_types.size());
17067 count = 0;
17068 for (uint32_t i = 0; i < mbr_count; i++)
17069 count += type_to_location_count(type: get<SPIRType>(id: type.member_types[i]));
17070 }
17071 else
17072 {
17073 count = type.columns > 1 ? type.columns : 1;
17074 }
17075
17076 uint32_t dim_count = uint32_t(type.array.size());
17077 for (uint32_t i = 0; i < dim_count; i++)
17078 count *= to_array_size_literal(type, index: i);
17079
17080 return count;
17081}
17082

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp