1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_glsl.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_common.hpp"
27#include <algorithm>
28#include <assert.h>
29#include <cmath>
30#include <limits>
31#include <locale.h>
32#include <utility>
33#include <array>
34
35#ifndef _WIN32
36#ifndef __ghs__
37#include <langinfo.h>
38#endif
39#endif
40#include <locale.h>
41
42using namespace spv;
43using namespace SPIRV_CROSS_NAMESPACE;
44using namespace std;
45
46enum ExtraSubExpressionType
47{
48 // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
49 EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
50 EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
51};
52
53static bool is_unsigned_opcode(Op op)
54{
55 // Don't have to be exhaustive, only relevant for legacy target checking ...
56 switch (op)
57 {
58 case OpShiftRightLogical:
59 case OpUGreaterThan:
60 case OpUGreaterThanEqual:
61 case OpULessThan:
62 case OpULessThanEqual:
63 case OpUConvert:
64 case OpUDiv:
65 case OpUMod:
66 case OpUMulExtended:
67 case OpConvertUToF:
68 case OpConvertFToU:
69 return true;
70
71 default:
72 return false;
73 }
74}
75
76static bool is_unsigned_glsl_opcode(GLSLstd450 op)
77{
78 // Don't have to be exhaustive, only relevant for legacy target checking ...
79 switch (op)
80 {
81 case GLSLstd450UClamp:
82 case GLSLstd450UMin:
83 case GLSLstd450UMax:
84 case GLSLstd450FindUMsb:
85 return true;
86
87 default:
88 return false;
89 }
90}
91
92static bool packing_is_vec4_padded(BufferPackingStandard packing)
93{
94 switch (packing)
95 {
96 case BufferPackingHLSLCbuffer:
97 case BufferPackingHLSLCbufferPackOffset:
98 case BufferPackingStd140:
99 case BufferPackingStd140EnhancedLayout:
100 return true;
101
102 default:
103 return false;
104 }
105}
106
107static bool packing_is_hlsl(BufferPackingStandard packing)
108{
109 switch (packing)
110 {
111 case BufferPackingHLSLCbuffer:
112 case BufferPackingHLSLCbufferPackOffset:
113 return true;
114
115 default:
116 return false;
117 }
118}
119
120static bool packing_has_flexible_offset(BufferPackingStandard packing)
121{
122 switch (packing)
123 {
124 case BufferPackingStd140:
125 case BufferPackingStd430:
126 case BufferPackingScalar:
127 case BufferPackingHLSLCbuffer:
128 return false;
129
130 default:
131 return true;
132 }
133}
134
135static bool packing_is_scalar(BufferPackingStandard packing)
136{
137 switch (packing)
138 {
139 case BufferPackingScalar:
140 case BufferPackingScalarEnhancedLayout:
141 return true;
142
143 default:
144 return false;
145 }
146}
147
148static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
149{
150 switch (packing)
151 {
152 case BufferPackingStd140EnhancedLayout:
153 return BufferPackingStd140;
154 case BufferPackingStd430EnhancedLayout:
155 return BufferPackingStd430;
156 case BufferPackingHLSLCbufferPackOffset:
157 return BufferPackingHLSLCbuffer;
158 case BufferPackingScalarEnhancedLayout:
159 return BufferPackingScalar;
160 default:
161 return packing;
162 }
163}
164
165void CompilerGLSL::init()
166{
167 if (ir.source.known)
168 {
169 options.es = ir.source.es;
170 options.version = ir.source.version;
171 }
172
173 // Query the locale to see what the decimal point is.
174 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
175 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
176 // tricky.
177#ifdef _WIN32
178 // On Windows, localeconv uses thread-local storage, so it should be fine.
179 const struct lconv *conv = localeconv();
180 if (conv && conv->decimal_point)
181 current_locale_radix_character = *conv->decimal_point;
182#elif defined(__ANDROID__) && __ANDROID_API__ < 26 || defined(__ghs__) || defined(__QNXNTO__) || defined(__VXWORKS__)
183 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
184 const struct lconv *conv = localeconv();
185 if (conv && conv->decimal_point)
186 current_locale_radix_character = *conv->decimal_point;
187#else
188 // localeconv, the portable function is not MT safe ...
189 const char *decimal_point = nl_langinfo(RADIXCHAR);
190 if (decimal_point && *decimal_point != '\0')
191 current_locale_radix_character = *decimal_point;
192#endif
193}
194
195static const char *to_pls_layout(PlsFormat format)
196{
197 switch (format)
198 {
199 case PlsR11FG11FB10F:
200 return "layout(r11f_g11f_b10f) ";
201 case PlsR32F:
202 return "layout(r32f) ";
203 case PlsRG16F:
204 return "layout(rg16f) ";
205 case PlsRGB10A2:
206 return "layout(rgb10_a2) ";
207 case PlsRGBA8:
208 return "layout(rgba8) ";
209 case PlsRG16:
210 return "layout(rg16) ";
211 case PlsRGBA8I:
212 return "layout(rgba8i)";
213 case PlsRG16I:
214 return "layout(rg16i) ";
215 case PlsRGB10A2UI:
216 return "layout(rgb10_a2ui) ";
217 case PlsRGBA8UI:
218 return "layout(rgba8ui) ";
219 case PlsRG16UI:
220 return "layout(rg16ui) ";
221 case PlsR32UI:
222 return "layout(r32ui) ";
223 default:
224 return "";
225 }
226}
227
228static std::pair<spv::Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format)
229{
230 switch (format)
231 {
232 default:
233 case PlsR11FG11FB10F:
234 case PlsR32F:
235 case PlsRG16F:
236 case PlsRGB10A2:
237 case PlsRGBA8:
238 case PlsRG16:
239 return std::make_pair(x: spv::OpTypeFloat, y: SPIRType::Float);
240
241 case PlsRGBA8I:
242 case PlsRG16I:
243 return std::make_pair(x: spv::OpTypeInt, y: SPIRType::Int);
244
245 case PlsRGB10A2UI:
246 case PlsRGBA8UI:
247 case PlsRG16UI:
248 case PlsR32UI:
249 return std::make_pair(x: spv::OpTypeInt, y: SPIRType::UInt);
250 }
251}
252
253static uint32_t pls_format_to_components(PlsFormat format)
254{
255 switch (format)
256 {
257 default:
258 case PlsR32F:
259 case PlsR32UI:
260 return 1;
261
262 case PlsRG16F:
263 case PlsRG16:
264 case PlsRG16UI:
265 case PlsRG16I:
266 return 2;
267
268 case PlsR11FG11FB10F:
269 return 3;
270
271 case PlsRGB10A2:
272 case PlsRGBA8:
273 case PlsRGBA8I:
274 case PlsRGB10A2UI:
275 case PlsRGBA8UI:
276 return 4;
277 }
278}
279
280const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
281{
282 static const char *const swizzle[4][4] = {
283 { ".x", ".y", ".z", ".w" },
284 { ".xy", ".yz", ".zw", nullptr },
285 { ".xyz", ".yzw", nullptr, nullptr },
286#if defined(__GNUC__) && (__GNUC__ == 9)
287 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
288 // This array ends up being compiled as all nullptrs, tripping the assertions below.
289 { "", nullptr, nullptr, "$" },
290#else
291 { "", nullptr, nullptr, nullptr },
292#endif
293 };
294
295 assert(vecsize >= 1 && vecsize <= 4);
296 assert(index >= 0 && index < 4);
297 assert(swizzle[vecsize - 1][index]);
298
299 return swizzle[vecsize - 1][index];
300}
301
302void CompilerGLSL::reset(uint32_t iteration_count)
303{
304 // Sanity check the iteration count to be robust against a certain class of bugs where
305 // we keep forcing recompilations without making clear forward progress.
306 // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
307 // Certain types of recompilations are considered to make forward progress,
308 // but in almost all situations, we'll never see more than 3 iterations.
309 // It is highly context-sensitive when we need to force recompilation,
310 // and it is not practical with the current architecture
311 // to resolve everything up front.
312 if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
313 SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
314
315 // We do some speculative optimizations which should pretty much always work out,
316 // but just in case the SPIR-V is rather weird, recompile until it's happy.
317 // This typically only means one extra pass.
318 clear_force_recompile();
319
320 // Clear invalid expression tracking.
321 invalid_expressions.clear();
322 composite_insert_overwritten.clear();
323 current_function = nullptr;
324
325 // Clear temporary usage tracking.
326 expression_usage_counts.clear();
327 forwarded_temporaries.clear();
328 suppressed_usage_tracking.clear();
329
330 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
331 flushed_phi_variables.clear();
332
333 current_emitting_switch_stack.clear();
334
335 reset_name_caches();
336
337 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) {
338 func.active = false;
339 func.flush_undeclared = true;
340 });
341
342 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
343
344 ir.reset_all_of_type<SPIRExpression>();
345 ir.reset_all_of_type<SPIRAccessChain>();
346
347 statement_count = 0;
348 indent = 0;
349 current_loop_level = 0;
350}
351
352void CompilerGLSL::remap_pls_variables()
353{
354 for (auto &input : pls_inputs)
355 {
356 auto &var = get<SPIRVariable>(id: input.id);
357
358 bool input_is_target = false;
359 if (var.storage == StorageClassUniformConstant)
360 {
361 auto &type = get<SPIRType>(id: var.basetype);
362 input_is_target = type.image.dim == DimSubpassData;
363 }
364
365 if (var.storage != StorageClassInput && !input_is_target)
366 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
367 var.remapped_variable = true;
368 }
369
370 for (auto &output : pls_outputs)
371 {
372 auto &var = get<SPIRVariable>(id: output.id);
373 if (var.storage != StorageClassOutput)
374 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
375 var.remapped_variable = true;
376 }
377}
378
379void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
380{
381 subpass_to_framebuffer_fetch_attachment.push_back(x: { input_attachment_index, color_location });
382 inout_color_attachments.push_back(x: { color_location, coherent });
383}
384
385bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
386{
387 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
388 pred: [&](const std::pair<uint32_t, bool> &elem) {
389 return elem.first == location;
390 }) != end(cont: inout_color_attachments);
391}
392
393bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
394{
395 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
396 pred: [&](const std::pair<uint32_t, bool> &elem) {
397 return elem.first == location && !elem.second;
398 }) != end(cont: inout_color_attachments);
399}
400
401void CompilerGLSL::find_static_extensions()
402{
403 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
404 if (type.basetype == SPIRType::Double)
405 {
406 if (options.es)
407 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
408 if (!options.es && options.version < 400)
409 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
410 }
411 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
412 {
413 if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
414 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
415 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
416 }
417 else if (type.basetype == SPIRType::Half)
418 {
419 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_float16");
420 if (options.vulkan_semantics)
421 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
422 }
423 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
424 {
425 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int8");
426 if (options.vulkan_semantics)
427 require_extension_internal(ext: "GL_EXT_shader_8bit_storage");
428 }
429 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
430 {
431 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int16");
432 if (options.vulkan_semantics)
433 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
434 }
435 });
436
437 auto &execution = get_entry_point();
438 switch (execution.model)
439 {
440 case ExecutionModelGLCompute:
441 if (!options.es && options.version < 430)
442 require_extension_internal(ext: "GL_ARB_compute_shader");
443 if (options.es && options.version < 310)
444 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
445 break;
446
447 case ExecutionModelGeometry:
448 if (options.es && options.version < 320)
449 require_extension_internal(ext: "GL_EXT_geometry_shader");
450 if (!options.es && options.version < 150)
451 require_extension_internal(ext: "GL_ARB_geometry_shader4");
452
453 if (execution.flags.get(bit: ExecutionModeInvocations) && execution.invocations != 1)
454 {
455 // Instanced GS is part of 400 core or this extension.
456 if (!options.es && options.version < 400)
457 require_extension_internal(ext: "GL_ARB_gpu_shader5");
458 }
459 break;
460
461 case ExecutionModelTessellationEvaluation:
462 case ExecutionModelTessellationControl:
463 if (options.es && options.version < 320)
464 require_extension_internal(ext: "GL_EXT_tessellation_shader");
465 if (!options.es && options.version < 400)
466 require_extension_internal(ext: "GL_ARB_tessellation_shader");
467 break;
468
469 case ExecutionModelRayGenerationKHR:
470 case ExecutionModelIntersectionKHR:
471 case ExecutionModelAnyHitKHR:
472 case ExecutionModelClosestHitKHR:
473 case ExecutionModelMissKHR:
474 case ExecutionModelCallableKHR:
475 // NV enums are aliases.
476 if (options.es || options.version < 460)
477 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
478 if (!options.vulkan_semantics)
479 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
480
481 // Need to figure out if we should target KHR or NV extension based on capabilities.
482 for (auto &cap : ir.declared_capabilities)
483 {
484 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
485 cap == CapabilityRayTraversalPrimitiveCullingKHR)
486 {
487 ray_tracing_is_khr = true;
488 break;
489 }
490 }
491
492 if (ray_tracing_is_khr)
493 {
494 // In KHR ray tracing we pass payloads by pointer instead of location,
495 // so make sure we assign locations properly.
496 ray_tracing_khr_fixup_locations();
497 require_extension_internal(ext: "GL_EXT_ray_tracing");
498 }
499 else
500 require_extension_internal(ext: "GL_NV_ray_tracing");
501 break;
502
503 case ExecutionModelMeshEXT:
504 case ExecutionModelTaskEXT:
505 if (options.es || options.version < 450)
506 SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
507 if (!options.vulkan_semantics)
508 SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
509 require_extension_internal(ext: "GL_EXT_mesh_shader");
510 break;
511
512 default:
513 break;
514 }
515
516 if (!pls_inputs.empty() || !pls_outputs.empty())
517 {
518 if (execution.model != ExecutionModelFragment)
519 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
520 require_extension_internal(ext: "GL_EXT_shader_pixel_local_storage");
521 }
522
523 if (!inout_color_attachments.empty())
524 {
525 if (execution.model != ExecutionModelFragment)
526 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
527 if (options.vulkan_semantics)
528 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
529
530 bool has_coherent = false;
531 bool has_incoherent = false;
532
533 for (auto &att : inout_color_attachments)
534 {
535 if (att.second)
536 has_coherent = true;
537 else
538 has_incoherent = true;
539 }
540
541 if (has_coherent)
542 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch");
543 if (has_incoherent)
544 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch_non_coherent");
545 }
546
547 if (options.separate_shader_objects && !options.es && options.version < 410)
548 require_extension_internal(ext: "GL_ARB_separate_shader_objects");
549
550 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
551 {
552 if (!options.vulkan_semantics)
553 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
554 if (options.es && options.version < 320)
555 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
556 else if (!options.es && options.version < 450)
557 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
558 require_extension_internal(ext: "GL_EXT_buffer_reference2");
559 }
560 else if (ir.addressing_model != AddressingModelLogical)
561 {
562 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
563 }
564
565 // Check for nonuniform qualifier and passthrough.
566 // Instead of looping over all decorations to find this, just look at capabilities.
567 for (auto &cap : ir.declared_capabilities)
568 {
569 switch (cap)
570 {
571 case CapabilityShaderNonUniformEXT:
572 if (!options.vulkan_semantics)
573 require_extension_internal(ext: "GL_NV_gpu_shader5");
574 else
575 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
576 break;
577 case CapabilityRuntimeDescriptorArrayEXT:
578 if (!options.vulkan_semantics)
579 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
580 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
581 break;
582
583 case CapabilityGeometryShaderPassthroughNV:
584 if (execution.model == ExecutionModelGeometry)
585 {
586 require_extension_internal(ext: "GL_NV_geometry_shader_passthrough");
587 execution.geometry_passthrough = true;
588 }
589 break;
590
591 case CapabilityVariablePointers:
592 case CapabilityVariablePointersStorageBuffer:
593 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
594
595 case CapabilityMultiView:
596 if (options.vulkan_semantics)
597 require_extension_internal(ext: "GL_EXT_multiview");
598 else
599 {
600 require_extension_internal(ext: "GL_OVR_multiview2");
601 if (options.ovr_multiview_view_count == 0)
602 SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
603 if (get_execution_model() != ExecutionModelVertex)
604 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
605 }
606 break;
607
608 case CapabilityRayQueryKHR:
609 if (options.es || options.version < 460 || !options.vulkan_semantics)
610 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
611 require_extension_internal(ext: "GL_EXT_ray_query");
612 ray_tracing_is_khr = true;
613 break;
614
615 case CapabilityRayTraversalPrimitiveCullingKHR:
616 if (options.es || options.version < 460 || !options.vulkan_semantics)
617 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
618 require_extension_internal(ext: "GL_EXT_ray_flags_primitive_culling");
619 ray_tracing_is_khr = true;
620 break;
621
622 default:
623 break;
624 }
625 }
626
627 if (options.ovr_multiview_view_count)
628 {
629 if (options.vulkan_semantics)
630 SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
631 if (get_execution_model() != ExecutionModelVertex)
632 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
633 require_extension_internal(ext: "GL_OVR_multiview2");
634 }
635
636 // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
637 for (auto &ext : ir.declared_extensions)
638 if (ext == "SPV_NV_fragment_shader_barycentric")
639 barycentric_is_nv = true;
640}
641
642void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
643{
644 uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ?
645 required_polyfills_relaxed : required_polyfills;
646
647 if ((polyfills & polyfill) == 0)
648 {
649 polyfills |= polyfill;
650 force_recompile();
651 }
652}
653
654void CompilerGLSL::ray_tracing_khr_fixup_locations()
655{
656 uint32_t location = 0;
657 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
658 // Incoming payload storage can also be used for tracing.
659 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
660 var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
661 return;
662 if (is_hidden_variable(var))
663 return;
664 set_decoration(id: var.self, decoration: DecorationLocation, argument: location++);
665 });
666}
667
668string CompilerGLSL::compile()
669{
670 ir.fixup_reserved_names();
671
672 if (!options.vulkan_semantics)
673 {
674 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
675 backend.nonuniform_qualifier = "";
676 backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
677 }
678 backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
679 backend.force_gl_in_out_block = true;
680 backend.supports_extensions = true;
681 backend.use_array_constructor = true;
682 backend.workgroup_size_is_hidden = true;
683 backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
684 backend.support_precise_qualifier =
685 (!options.es && options.version >= 400) || (options.es && options.version >= 320);
686
687 if (is_legacy_es())
688 backend.support_case_fallthrough = false;
689
690 // Scan the SPIR-V to find trivial uses of extensions.
691 fixup_anonymous_struct_names();
692 fixup_type_alias();
693 reorder_type_alias();
694 build_function_control_flow_graphs_and_analyze();
695 find_static_extensions();
696 fixup_image_load_store_access();
697 update_active_builtins();
698 analyze_image_and_sampler_usage();
699 analyze_interlocked_resource_usage();
700 if (!inout_color_attachments.empty())
701 emit_inout_fragment_outputs_copy_to_subpass_inputs();
702
703 // Shaders might cast unrelated data to pointers of non-block types.
704 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
705 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
706 analyze_non_block_pointer_types();
707
708 uint32_t pass_count = 0;
709 do
710 {
711 reset(iteration_count: pass_count);
712
713 buffer.reset();
714
715 emit_header();
716 emit_resources();
717 emit_extension_workarounds(model: get_execution_model());
718
719 if (required_polyfills != 0)
720 emit_polyfills(polyfills: required_polyfills, relaxed: false);
721 if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0)
722 emit_polyfills(polyfills: required_polyfills_relaxed, relaxed: true);
723
724 emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset());
725
726 pass_count++;
727 } while (is_forcing_recompilation());
728
729 // Implement the interlocked wrapper function at the end.
730 // The body was implemented in lieu of main().
731 if (interlocked_is_complex)
732 {
733 statement(ts: "void main()");
734 begin_scope();
735 statement(ts: "// Interlocks were used in a way not compatible with GLSL, this is very slow.");
736 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
737 statement(ts: "spvMainInterlockedBody();");
738 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
739 end_scope();
740 }
741
742 // Entry point in GLSL is always main().
743 get_entry_point().name = "main";
744
745 return buffer.str();
746}
747
748std::string CompilerGLSL::get_partial_source()
749{
750 return buffer.str();
751}
752
753void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
754 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
755{
756 auto &execution = get_entry_point();
757 bool builtin_workgroup = execution.workgroup_size.constant != 0;
758 bool use_local_size_id = !builtin_workgroup && execution.flags.get(bit: ExecutionModeLocalSizeId);
759
760 if (wg_x.id)
761 {
762 if (options.vulkan_semantics)
763 arguments.push_back(t: join(ts: "local_size_x_id = ", ts: wg_x.constant_id));
764 else
765 arguments.push_back(t: join(ts: "local_size_x = ", ts&: get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name));
766 }
767 else if (use_local_size_id && execution.workgroup_size.id_x)
768 arguments.push_back(t: join(ts: "local_size_x = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar()));
769 else
770 arguments.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
771
772 if (wg_y.id)
773 {
774 if (options.vulkan_semantics)
775 arguments.push_back(t: join(ts: "local_size_y_id = ", ts: wg_y.constant_id));
776 else
777 arguments.push_back(t: join(ts: "local_size_y = ", ts&: get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name));
778 }
779 else if (use_local_size_id && execution.workgroup_size.id_y)
780 arguments.push_back(t: join(ts: "local_size_y = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar()));
781 else
782 arguments.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
783
784 if (wg_z.id)
785 {
786 if (options.vulkan_semantics)
787 arguments.push_back(t: join(ts: "local_size_z_id = ", ts: wg_z.constant_id));
788 else
789 arguments.push_back(t: join(ts: "local_size_z = ", ts&: get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name));
790 }
791 else if (use_local_size_id && execution.workgroup_size.id_z)
792 arguments.push_back(t: join(ts: "local_size_z = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar()));
793 else
794 arguments.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
795}
796
797void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
798{
799 if (options.vulkan_semantics)
800 {
801 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
802 require_extension_internal(ext: ShaderSubgroupSupportHelper::get_extension_name(c: khr_extension));
803 }
804 else
805 {
806 if (!shader_subgroup_supporter.is_feature_requested(feature))
807 force_recompile();
808 shader_subgroup_supporter.request_feature(feature);
809 }
810}
811
812void CompilerGLSL::emit_header()
813{
814 auto &execution = get_entry_point();
815 statement(ts: "#version ", ts&: options.version, ts: options.es && options.version > 100 ? " es" : "");
816
817 if (!options.es && options.version < 420)
818 {
819 // Needed for binding = # on UBOs, etc.
820 if (options.enable_420pack_extension)
821 {
822 statement(ts: "#ifdef GL_ARB_shading_language_420pack");
823 statement(ts: "#extension GL_ARB_shading_language_420pack : require");
824 statement(ts: "#endif");
825 }
826 // Needed for: layout(early_fragment_tests) in;
827 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
828 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
829 }
830
831 // Needed for: layout(post_depth_coverage) in;
832 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
833 require_extension_internal(ext: "GL_ARB_post_depth_coverage");
834
835 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
836 bool interlock_used = execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT) ||
837 execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) ||
838 execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) ||
839 execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT);
840
841 if (interlock_used)
842 {
843 if (options.es)
844 {
845 if (options.version < 310)
846 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
847 require_extension_internal(ext: "GL_NV_fragment_shader_interlock");
848 }
849 else
850 {
851 if (options.version < 420)
852 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
853 require_extension_internal(ext: "GL_ARB_fragment_shader_interlock");
854 }
855 }
856
857 for (auto &ext : forced_extensions)
858 {
859 if (ext == "GL_ARB_gpu_shader_int64")
860 {
861 statement(ts: "#if defined(GL_ARB_gpu_shader_int64)");
862 statement(ts: "#extension GL_ARB_gpu_shader_int64 : require");
863 if (!options.vulkan_semantics || options.es)
864 {
865 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
866 statement(ts: "#extension GL_NV_gpu_shader5 : require");
867 }
868 statement(ts: "#else");
869 statement(ts: "#error No extension available for 64-bit integers.");
870 statement(ts: "#endif");
871 }
872 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
873 {
874 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
875 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
876 statement(ts: "#if defined(GL_AMD_gpu_shader_half_float)");
877 statement(ts: "#extension GL_AMD_gpu_shader_half_float : require");
878 if (!options.vulkan_semantics)
879 {
880 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
881 statement(ts: "#extension GL_NV_gpu_shader5 : require");
882 }
883 else
884 {
885 statement(ts: "#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
886 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
887 }
888 statement(ts: "#else");
889 statement(ts: "#error No extension available for FP16.");
890 statement(ts: "#endif");
891 }
892 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
893 {
894 if (options.vulkan_semantics)
895 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
896 else
897 {
898 statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
899 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
900 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
901 statement(ts: "#extension GL_NV_gpu_shader5 : require");
902 statement(ts: "#else");
903 statement(ts: "#error No extension available for Int8.");
904 statement(ts: "#endif");
905 }
906 }
907 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
908 {
909 if (options.vulkan_semantics)
910 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
911 else
912 {
913 statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
914 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
915 statement(ts: "#elif defined(GL_AMD_gpu_shader_int16)");
916 statement(ts: "#extension GL_AMD_gpu_shader_int16 : require");
917 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
918 statement(ts: "#extension GL_NV_gpu_shader5 : require");
919 statement(ts: "#else");
920 statement(ts: "#error No extension available for Int16.");
921 statement(ts: "#endif");
922 }
923 }
924 else if (ext == "GL_ARB_post_depth_coverage")
925 {
926 if (options.es)
927 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
928 else
929 {
930 statement(ts: "#if defined(GL_ARB_post_depth_coverge)");
931 statement(ts: "#extension GL_ARB_post_depth_coverage : require");
932 statement(ts: "#else");
933 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
934 statement(ts: "#endif");
935 }
936 }
937 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
938 {
939 // Soft-enable this extension on plain GLSL.
940 statement(ts: "#ifdef ", ts&: ext);
941 statement(ts: "#extension ", ts&: ext, ts: " : enable");
942 statement(ts: "#endif");
943 }
944 else if (ext == "GL_EXT_control_flow_attributes")
945 {
946 // These are just hints so we can conditionally enable and fallback in the shader.
947 statement(ts: "#if defined(GL_EXT_control_flow_attributes)");
948 statement(ts: "#extension GL_EXT_control_flow_attributes : require");
949 statement(ts: "#define SPIRV_CROSS_FLATTEN [[flatten]]");
950 statement(ts: "#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
951 statement(ts: "#define SPIRV_CROSS_UNROLL [[unroll]]");
952 statement(ts: "#define SPIRV_CROSS_LOOP [[dont_unroll]]");
953 statement(ts: "#else");
954 statement(ts: "#define SPIRV_CROSS_FLATTEN");
955 statement(ts: "#define SPIRV_CROSS_BRANCH");
956 statement(ts: "#define SPIRV_CROSS_UNROLL");
957 statement(ts: "#define SPIRV_CROSS_LOOP");
958 statement(ts: "#endif");
959 }
960 else if (ext == "GL_NV_fragment_shader_interlock")
961 {
962 statement(ts: "#extension GL_NV_fragment_shader_interlock : require");
963 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
964 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
965 }
966 else if (ext == "GL_ARB_fragment_shader_interlock")
967 {
968 statement(ts: "#ifdef GL_ARB_fragment_shader_interlock");
969 statement(ts: "#extension GL_ARB_fragment_shader_interlock : enable");
970 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
971 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
972 statement(ts: "#elif defined(GL_INTEL_fragment_shader_ordering)");
973 statement(ts: "#extension GL_INTEL_fragment_shader_ordering : enable");
974 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
975 statement(ts: "#define SPIRV_Cross_endInvocationInterlock()");
976 statement(ts: "#endif");
977 }
978 else
979 statement(ts: "#extension ", ts&: ext, ts: " : require");
980 }
981
982 if (!options.vulkan_semantics)
983 {
984 using Supp = ShaderSubgroupSupportHelper;
985 auto result = shader_subgroup_supporter.resolve();
986
987 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
988 {
989 auto feature = static_cast<Supp::Feature>(feature_index);
990 if (!shader_subgroup_supporter.is_feature_requested(feature))
991 continue;
992
993 auto exts = Supp::get_candidates_for_feature(ft: feature, r: result);
994 if (exts.empty())
995 continue;
996
997 statement(ts: "");
998
999 for (auto &ext : exts)
1000 {
1001 const char *name = Supp::get_extension_name(c: ext);
1002 const char *extra_predicate = Supp::get_extra_required_extension_predicate(c: ext);
1003 auto extra_names = Supp::get_extra_required_extension_names(c: ext);
1004 statement(ts: &ext != &exts.front() ? "#elif" : "#if", ts: " defined(", ts&: name, ts: ")",
1005 ts: (*extra_predicate != '\0' ? " && " : ""), ts&: extra_predicate);
1006 for (const auto &e : extra_names)
1007 statement(ts: "#extension ", ts: e, ts: " : enable");
1008 statement(ts: "#extension ", ts&: name, ts: " : require");
1009 }
1010
1011 if (!Supp::can_feature_be_implemented_without_extensions(feature))
1012 {
1013 statement(ts: "#else");
1014 statement(ts: "#error No extensions available to emulate requested subgroup feature.");
1015 }
1016
1017 statement(ts: "#endif");
1018 }
1019 }
1020
1021 for (auto &header : header_lines)
1022 statement(ts&: header);
1023
1024 SmallVector<string> inputs;
1025 SmallVector<string> outputs;
1026
1027 switch (execution.model)
1028 {
1029 case ExecutionModelVertex:
1030 if (options.ovr_multiview_view_count)
1031 inputs.push_back(t: join(ts: "num_views = ", ts&: options.ovr_multiview_view_count));
1032 break;
1033 case ExecutionModelGeometry:
1034 if ((execution.flags.get(bit: ExecutionModeInvocations)) && execution.invocations != 1)
1035 inputs.push_back(t: join(ts: "invocations = ", ts&: execution.invocations));
1036 if (execution.flags.get(bit: ExecutionModeInputPoints))
1037 inputs.push_back(t: "points");
1038 if (execution.flags.get(bit: ExecutionModeInputLines))
1039 inputs.push_back(t: "lines");
1040 if (execution.flags.get(bit: ExecutionModeInputLinesAdjacency))
1041 inputs.push_back(t: "lines_adjacency");
1042 if (execution.flags.get(bit: ExecutionModeTriangles))
1043 inputs.push_back(t: "triangles");
1044 if (execution.flags.get(bit: ExecutionModeInputTrianglesAdjacency))
1045 inputs.push_back(t: "triangles_adjacency");
1046
1047 if (!execution.geometry_passthrough)
1048 {
1049 // For passthrough, these are implies and cannot be declared in shader.
1050 outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices));
1051 if (execution.flags.get(bit: ExecutionModeOutputTriangleStrip))
1052 outputs.push_back(t: "triangle_strip");
1053 if (execution.flags.get(bit: ExecutionModeOutputPoints))
1054 outputs.push_back(t: "points");
1055 if (execution.flags.get(bit: ExecutionModeOutputLineStrip))
1056 outputs.push_back(t: "line_strip");
1057 }
1058 break;
1059
1060 case ExecutionModelTessellationControl:
1061 if (execution.flags.get(bit: ExecutionModeOutputVertices))
1062 outputs.push_back(t: join(ts: "vertices = ", ts&: execution.output_vertices));
1063 break;
1064
1065 case ExecutionModelTessellationEvaluation:
1066 if (execution.flags.get(bit: ExecutionModeQuads))
1067 inputs.push_back(t: "quads");
1068 if (execution.flags.get(bit: ExecutionModeTriangles))
1069 inputs.push_back(t: "triangles");
1070 if (execution.flags.get(bit: ExecutionModeIsolines))
1071 inputs.push_back(t: "isolines");
1072 if (execution.flags.get(bit: ExecutionModePointMode))
1073 inputs.push_back(t: "point_mode");
1074
1075 if (!execution.flags.get(bit: ExecutionModeIsolines))
1076 {
1077 if (execution.flags.get(bit: ExecutionModeVertexOrderCw))
1078 inputs.push_back(t: "cw");
1079 if (execution.flags.get(bit: ExecutionModeVertexOrderCcw))
1080 inputs.push_back(t: "ccw");
1081 }
1082
1083 if (execution.flags.get(bit: ExecutionModeSpacingFractionalEven))
1084 inputs.push_back(t: "fractional_even_spacing");
1085 if (execution.flags.get(bit: ExecutionModeSpacingFractionalOdd))
1086 inputs.push_back(t: "fractional_odd_spacing");
1087 if (execution.flags.get(bit: ExecutionModeSpacingEqual))
1088 inputs.push_back(t: "equal_spacing");
1089 break;
1090
1091 case ExecutionModelGLCompute:
1092 case ExecutionModelTaskEXT:
1093 case ExecutionModelMeshEXT:
1094 {
1095 if (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId))
1096 {
1097 SpecializationConstant wg_x, wg_y, wg_z;
1098 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
1099
1100 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1101 // declarations before we can emit the work group size.
1102 if (options.vulkan_semantics ||
1103 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1104 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
1105 }
1106 else
1107 {
1108 inputs.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
1109 inputs.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
1110 inputs.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
1111 }
1112
1113 if (execution.model == ExecutionModelMeshEXT)
1114 {
1115 outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices));
1116 outputs.push_back(t: join(ts: "max_primitives = ", ts&: execution.output_primitives));
1117 if (execution.flags.get(bit: ExecutionModeOutputTrianglesEXT))
1118 outputs.push_back(t: "triangles");
1119 else if (execution.flags.get(bit: ExecutionModeOutputLinesEXT))
1120 outputs.push_back(t: "lines");
1121 else if (execution.flags.get(bit: ExecutionModeOutputPoints))
1122 outputs.push_back(t: "points");
1123 }
1124 break;
1125 }
1126
1127 case ExecutionModelFragment:
1128 if (options.es)
1129 {
1130 switch (options.fragment.default_float_precision)
1131 {
1132 case Options::Lowp:
1133 statement(ts: "precision lowp float;");
1134 break;
1135
1136 case Options::Mediump:
1137 statement(ts: "precision mediump float;");
1138 break;
1139
1140 case Options::Highp:
1141 statement(ts: "precision highp float;");
1142 break;
1143
1144 default:
1145 break;
1146 }
1147
1148 switch (options.fragment.default_int_precision)
1149 {
1150 case Options::Lowp:
1151 statement(ts: "precision lowp int;");
1152 break;
1153
1154 case Options::Mediump:
1155 statement(ts: "precision mediump int;");
1156 break;
1157
1158 case Options::Highp:
1159 statement(ts: "precision highp int;");
1160 break;
1161
1162 default:
1163 break;
1164 }
1165 }
1166
1167 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
1168 inputs.push_back(t: "early_fragment_tests");
1169 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
1170 inputs.push_back(t: "post_depth_coverage");
1171
1172 if (interlock_used)
1173 statement(ts: "#if defined(GL_ARB_fragment_shader_interlock)");
1174
1175 if (execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT))
1176 statement(ts: "layout(pixel_interlock_ordered) in;");
1177 else if (execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT))
1178 statement(ts: "layout(pixel_interlock_unordered) in;");
1179 else if (execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT))
1180 statement(ts: "layout(sample_interlock_ordered) in;");
1181 else if (execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT))
1182 statement(ts: "layout(sample_interlock_unordered) in;");
1183
1184 if (interlock_used)
1185 {
1186 statement(ts: "#elif !defined(GL_INTEL_fragment_shader_ordering)");
1187 statement(ts: "#error Fragment Shader Interlock/Ordering extension missing!");
1188 statement(ts: "#endif");
1189 }
1190
1191 if (!options.es && execution.flags.get(bit: ExecutionModeDepthGreater))
1192 statement(ts: "layout(depth_greater) out float gl_FragDepth;");
1193 else if (!options.es && execution.flags.get(bit: ExecutionModeDepthLess))
1194 statement(ts: "layout(depth_less) out float gl_FragDepth;");
1195
1196 break;
1197
1198 default:
1199 break;
1200 }
1201
1202 for (auto &cap : ir.declared_capabilities)
1203 if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1204 statement(ts: "layout(primitive_culling);");
1205
1206 if (!inputs.empty())
1207 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
1208 if (!outputs.empty())
1209 statement(ts: "layout(", ts: merge(list: outputs), ts: ") out;");
1210
1211 statement(ts: "");
1212}
1213
1214bool CompilerGLSL::type_is_empty(const SPIRType &type)
1215{
1216 return type.basetype == SPIRType::Struct && type.member_types.empty();
1217}
1218
1219void CompilerGLSL::emit_struct(SPIRType &type)
1220{
1221 // Struct types can be stamped out multiple times
1222 // with just different offsets, matrix layouts, etc ...
1223 // Type-punning with these types is legal, which complicates things
1224 // when we are storing struct and array types in an SSBO for example.
1225 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1226 if (type.type_alias != TypeID(0) &&
1227 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
1228 return;
1229
1230 add_resource_name(id: type.self);
1231 auto name = type_to_glsl(type);
1232
1233 statement(ts: !backend.explicit_struct_type ? "struct " : "", ts&: name);
1234 begin_scope();
1235
1236 type.member_name_cache.clear();
1237
1238 uint32_t i = 0;
1239 bool emitted = false;
1240 for (auto &member : type.member_types)
1241 {
1242 add_member_name(type, name: i);
1243 emit_struct_member(type, member_type_id: member, index: i);
1244 i++;
1245 emitted = true;
1246 }
1247
1248 // Don't declare empty structs in GLSL, this is not allowed.
1249 if (type_is_empty(type) && !backend.supports_empty_struct)
1250 {
1251 statement(ts: "int empty_struct_member;");
1252 emitted = true;
1253 }
1254
1255 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationPaddingTarget))
1256 emit_struct_padding_target(type);
1257
1258 end_scope_decl();
1259
1260 if (emitted)
1261 statement(ts: "");
1262}
1263
1264string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1265{
1266 string res;
1267 //if (flags & (1ull << DecorationSmooth))
1268 // res += "smooth ";
1269 if (flags.get(bit: DecorationFlat))
1270 res += "flat ";
1271 if (flags.get(bit: DecorationNoPerspective))
1272 {
1273 if (options.es)
1274 {
1275 if (options.version < 300)
1276 SPIRV_CROSS_THROW("noperspective requires ESSL 300.");
1277 require_extension_internal(ext: "GL_NV_shader_noperspective_interpolation");
1278 }
1279 else if (is_legacy_desktop())
1280 require_extension_internal(ext: "GL_EXT_gpu_shader4");
1281 res += "noperspective ";
1282 }
1283 if (flags.get(bit: DecorationCentroid))
1284 res += "centroid ";
1285 if (flags.get(bit: DecorationPatch))
1286 res += "patch ";
1287 if (flags.get(bit: DecorationSample))
1288 {
1289 if (options.es)
1290 {
1291 if (options.version < 300)
1292 SPIRV_CROSS_THROW("sample requires ESSL 300.");
1293 else if (options.version < 320)
1294 require_extension_internal(ext: "GL_OES_shader_multisample_interpolation");
1295 }
1296 res += "sample ";
1297 }
1298 if (flags.get(bit: DecorationInvariant) && (options.es || options.version >= 120))
1299 res += "invariant ";
1300 if (flags.get(bit: DecorationPerPrimitiveEXT))
1301 {
1302 res += "perprimitiveEXT ";
1303 require_extension_internal(ext: "GL_EXT_mesh_shader");
1304 }
1305
1306 if (flags.get(bit: DecorationExplicitInterpAMD))
1307 {
1308 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
1309 res += "__explicitInterpAMD ";
1310 }
1311
1312 if (flags.get(bit: DecorationPerVertexKHR))
1313 {
1314 if (options.es && options.version < 320)
1315 SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
1316 else if (!options.es && options.version < 450)
1317 SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
1318
1319 if (barycentric_is_nv)
1320 {
1321 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
1322 res += "pervertexNV ";
1323 }
1324 else
1325 {
1326 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
1327 res += "pervertexEXT ";
1328 }
1329 }
1330
1331 return res;
1332}
1333
1334string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1335{
1336 if (is_legacy())
1337 return "";
1338
1339 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1340 if (!is_block)
1341 return "";
1342
1343 auto &memb = ir.meta[type.self].members;
1344 if (index >= memb.size())
1345 return "";
1346 auto &dec = memb[index];
1347
1348 SmallVector<string> attr;
1349
1350 if (has_member_decoration(id: type.self, index, decoration: DecorationPassthroughNV))
1351 attr.push_back(t: "passthrough");
1352
1353 // We can only apply layouts on members in block interfaces.
1354 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1355 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1356 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1357 //
1358 // We would like to go from (SPIR-V style):
1359 //
1360 // struct Foo { layout(row_major) mat4 matrix; };
1361 // buffer UBO { Foo foo; };
1362 //
1363 // to
1364 //
1365 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1366 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1367 auto flags = combined_decoration_for_member(type, index);
1368
1369 if (flags.get(bit: DecorationRowMajor))
1370 attr.push_back(t: "row_major");
1371 // We don't emit any global layouts, so column_major is default.
1372 //if (flags & (1ull << DecorationColMajor))
1373 // attr.push_back("column_major");
1374
1375 if (dec.decoration_flags.get(bit: DecorationLocation) && can_use_io_location(storage: type.storage, block: true))
1376 attr.push_back(t: join(ts: "location = ", ts&: dec.location));
1377
1378 // Can only declare component if we can declare location.
1379 if (dec.decoration_flags.get(bit: DecorationComponent) && can_use_io_location(storage: type.storage, block: true))
1380 {
1381 if (!options.es)
1382 {
1383 if (options.version < 440 && options.version >= 140)
1384 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
1385 else if (options.version < 140)
1386 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1387 attr.push_back(t: join(ts: "component = ", ts&: dec.component));
1388 }
1389 else
1390 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1391 }
1392
1393 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1394 // This is only done selectively in GLSL as needed.
1395 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) &&
1396 dec.decoration_flags.get(bit: DecorationOffset))
1397 attr.push_back(t: join(ts: "offset = ", ts&: dec.offset));
1398 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(bit: DecorationOffset))
1399 attr.push_back(t: join(ts: "xfb_offset = ", ts&: dec.offset));
1400
1401 if (attr.empty())
1402 return "";
1403
1404 string res = "layout(";
1405 res += merge(list: attr);
1406 res += ") ";
1407 return res;
1408}
1409
1410const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1411{
1412 if (options.es && is_desktop_only_format(format))
1413 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1414
1415 switch (format)
1416 {
1417 case ImageFormatRgba32f:
1418 return "rgba32f";
1419 case ImageFormatRgba16f:
1420 return "rgba16f";
1421 case ImageFormatR32f:
1422 return "r32f";
1423 case ImageFormatRgba8:
1424 return "rgba8";
1425 case ImageFormatRgba8Snorm:
1426 return "rgba8_snorm";
1427 case ImageFormatRg32f:
1428 return "rg32f";
1429 case ImageFormatRg16f:
1430 return "rg16f";
1431 case ImageFormatRgba32i:
1432 return "rgba32i";
1433 case ImageFormatRgba16i:
1434 return "rgba16i";
1435 case ImageFormatR32i:
1436 return "r32i";
1437 case ImageFormatRgba8i:
1438 return "rgba8i";
1439 case ImageFormatRg32i:
1440 return "rg32i";
1441 case ImageFormatRg16i:
1442 return "rg16i";
1443 case ImageFormatRgba32ui:
1444 return "rgba32ui";
1445 case ImageFormatRgba16ui:
1446 return "rgba16ui";
1447 case ImageFormatR32ui:
1448 return "r32ui";
1449 case ImageFormatRgba8ui:
1450 return "rgba8ui";
1451 case ImageFormatRg32ui:
1452 return "rg32ui";
1453 case ImageFormatRg16ui:
1454 return "rg16ui";
1455 case ImageFormatR11fG11fB10f:
1456 return "r11f_g11f_b10f";
1457 case ImageFormatR16f:
1458 return "r16f";
1459 case ImageFormatRgb10A2:
1460 return "rgb10_a2";
1461 case ImageFormatR8:
1462 return "r8";
1463 case ImageFormatRg8:
1464 return "rg8";
1465 case ImageFormatR16:
1466 return "r16";
1467 case ImageFormatRg16:
1468 return "rg16";
1469 case ImageFormatRgba16:
1470 return "rgba16";
1471 case ImageFormatR16Snorm:
1472 return "r16_snorm";
1473 case ImageFormatRg16Snorm:
1474 return "rg16_snorm";
1475 case ImageFormatRgba16Snorm:
1476 return "rgba16_snorm";
1477 case ImageFormatR8Snorm:
1478 return "r8_snorm";
1479 case ImageFormatRg8Snorm:
1480 return "rg8_snorm";
1481 case ImageFormatR8ui:
1482 return "r8ui";
1483 case ImageFormatRg8ui:
1484 return "rg8ui";
1485 case ImageFormatR16ui:
1486 return "r16ui";
1487 case ImageFormatRgb10a2ui:
1488 return "rgb10_a2ui";
1489 case ImageFormatR8i:
1490 return "r8i";
1491 case ImageFormatRg8i:
1492 return "rg8i";
1493 case ImageFormatR16i:
1494 return "r16i";
1495 case ImageFormatR64i:
1496 return "r64i";
1497 case ImageFormatR64ui:
1498 return "r64ui";
1499 default:
1500 case ImageFormatUnknown:
1501 return nullptr;
1502 }
1503}
1504
1505uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1506{
1507 switch (type.basetype)
1508 {
1509 case SPIRType::Double:
1510 case SPIRType::Int64:
1511 case SPIRType::UInt64:
1512 return 8;
1513 case SPIRType::Float:
1514 case SPIRType::Int:
1515 case SPIRType::UInt:
1516 return 4;
1517 case SPIRType::Half:
1518 case SPIRType::Short:
1519 case SPIRType::UShort:
1520 return 2;
1521 case SPIRType::SByte:
1522 case SPIRType::UByte:
1523 return 1;
1524
1525 default:
1526 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1527 }
1528}
1529
1530uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1531 BufferPackingStandard packing)
1532{
1533 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1534 // and is 64-bit.
1535 if (is_physical_pointer(type))
1536 {
1537 if (!type.pointer)
1538 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1539
1540 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1541 {
1542 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1543 return 16;
1544 else
1545 return 8;
1546 }
1547 else
1548 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1549 }
1550 else if (is_array(type))
1551 {
1552 uint32_t minimum_alignment = 1;
1553 if (packing_is_vec4_padded(packing))
1554 minimum_alignment = 16;
1555
1556 auto *tmp = &get<SPIRType>(id: type.parent_type);
1557 while (!tmp->array.empty())
1558 tmp = &get<SPIRType>(id: tmp->parent_type);
1559
1560 // Get the alignment of the base type, then maybe round up.
1561 return max(a: minimum_alignment, b: type_to_packed_alignment(type: *tmp, flags, packing));
1562 }
1563
1564 if (type.basetype == SPIRType::Struct)
1565 {
1566 // Rule 9. Structs alignments are maximum alignment of its members.
1567 uint32_t alignment = 1;
1568 for (uint32_t i = 0; i < type.member_types.size(); i++)
1569 {
1570 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1571 alignment =
1572 max(a: alignment, b: type_to_packed_alignment(type: get<SPIRType>(id: type.member_types[i]), flags: member_flags, packing));
1573 }
1574
1575 // In std140, struct alignment is rounded up to 16.
1576 if (packing_is_vec4_padded(packing))
1577 alignment = max<uint32_t>(a: alignment, b: 16u);
1578
1579 return alignment;
1580 }
1581 else
1582 {
1583 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1584
1585 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1586 if (packing_is_scalar(packing))
1587 return base_alignment;
1588
1589 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1590 // a vec4, this is handled outside since that part knows our current offset.
1591 if (type.columns == 1 && packing_is_hlsl(packing))
1592 return base_alignment;
1593
1594 // From 7.6.2.2 in GL 4.5 core spec.
1595 // Rule 1
1596 if (type.vecsize == 1 && type.columns == 1)
1597 return base_alignment;
1598
1599 // Rule 2
1600 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1601 return type.vecsize * base_alignment;
1602
1603 // Rule 3
1604 if (type.vecsize == 3 && type.columns == 1)
1605 return 4 * base_alignment;
1606
1607 // Rule 4 implied. Alignment does not change in std430.
1608
1609 // Rule 5. Column-major matrices are stored as arrays of
1610 // vectors.
1611 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1612 {
1613 if (packing_is_vec4_padded(packing))
1614 return 4 * base_alignment;
1615 else if (type.vecsize == 3)
1616 return 4 * base_alignment;
1617 else
1618 return type.vecsize * base_alignment;
1619 }
1620
1621 // Rule 6 implied.
1622
1623 // Rule 7.
1624 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1625 {
1626 if (packing_is_vec4_padded(packing))
1627 return 4 * base_alignment;
1628 else if (type.columns == 3)
1629 return 4 * base_alignment;
1630 else
1631 return type.columns * base_alignment;
1632 }
1633
1634 // Rule 8 implied.
1635 }
1636
1637 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1638}
1639
1640uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1641 BufferPackingStandard packing)
1642{
1643 // Array stride is equal to aligned size of the underlying type.
1644 uint32_t parent = type.parent_type;
1645 assert(parent);
1646
1647 auto &tmp = get<SPIRType>(id: parent);
1648
1649 uint32_t size = type_to_packed_size(type: tmp, flags, packing);
1650 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1651 return (size + alignment - 1) & ~(alignment - 1);
1652}
1653
1654uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1655{
1656 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1657 // and is 64-bit.
1658 if (is_physical_pointer(type))
1659 {
1660 if (!type.pointer)
1661 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1662
1663 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1664 return 8;
1665 else
1666 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1667 }
1668 else if (is_array(type))
1669 {
1670 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1671
1672 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1673 // so that it is possible to pack other vectors into the last element.
1674 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1675 packed_size -= (4 - type.vecsize) * (type.width / 8);
1676
1677 return packed_size;
1678 }
1679
1680 uint32_t size = 0;
1681
1682 if (type.basetype == SPIRType::Struct)
1683 {
1684 uint32_t pad_alignment = 1;
1685
1686 for (uint32_t i = 0; i < type.member_types.size(); i++)
1687 {
1688 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1689 auto &member_type = get<SPIRType>(id: type.member_types[i]);
1690
1691 uint32_t packed_alignment = type_to_packed_alignment(type: member_type, flags: member_flags, packing);
1692 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1693
1694 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1695 // GL 4.5 spec, 7.6.2.2.
1696 if (member_type.basetype == SPIRType::Struct)
1697 pad_alignment = packed_alignment;
1698 else
1699 pad_alignment = 1;
1700
1701 size = (size + alignment - 1) & ~(alignment - 1);
1702 size += type_to_packed_size(type: member_type, flags: member_flags, packing);
1703 }
1704 }
1705 else
1706 {
1707 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1708
1709 if (packing_is_scalar(packing))
1710 {
1711 size = type.vecsize * type.columns * base_alignment;
1712 }
1713 else
1714 {
1715 if (type.columns == 1)
1716 size = type.vecsize * base_alignment;
1717
1718 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1719 {
1720 if (packing_is_vec4_padded(packing))
1721 size = type.columns * 4 * base_alignment;
1722 else if (type.vecsize == 3)
1723 size = type.columns * 4 * base_alignment;
1724 else
1725 size = type.columns * type.vecsize * base_alignment;
1726 }
1727
1728 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1729 {
1730 if (packing_is_vec4_padded(packing))
1731 size = type.vecsize * 4 * base_alignment;
1732 else if (type.columns == 3)
1733 size = type.vecsize * 4 * base_alignment;
1734 else
1735 size = type.vecsize * type.columns * base_alignment;
1736 }
1737
1738 // For matrices in HLSL, the last element has a size which depends on its vector size,
1739 // so that it is possible to pack other vectors into the last element.
1740 if (packing_is_hlsl(packing) && type.columns > 1)
1741 size -= (4 - type.vecsize) * (type.width / 8);
1742 }
1743 }
1744
1745 return size;
1746}
1747
1748bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1749 uint32_t *failed_validation_index, uint32_t start_offset,
1750 uint32_t end_offset)
1751{
1752 // This is very tricky and error prone, but try to be exhaustive and correct here.
1753 // SPIR-V doesn't directly say if we're using std430 or std140.
1754 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1755 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1756 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1757 //
1758 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1759 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1760 //
1761 // The only two differences between std140 and std430 are related to padding alignment/array stride
1762 // in arrays and structs. In std140 they take minimum vec4 alignment.
1763 // std430 only removes the vec4 requirement.
1764
1765 uint32_t offset = 0;
1766 uint32_t pad_alignment = 1;
1767
1768 bool is_top_level_block =
1769 has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1770
1771 for (uint32_t i = 0; i < type.member_types.size(); i++)
1772 {
1773 auto &memb_type = get<SPIRType>(id: type.member_types[i]);
1774
1775 auto *type_meta = ir.find_meta(id: type.self);
1776 auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{};
1777
1778 // Verify alignment rules.
1779 uint32_t packed_alignment = type_to_packed_alignment(type: memb_type, flags: member_flags, packing);
1780
1781 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1782 // layout(constant_id = 0) const int s = 10;
1783 // const int S = s + 5; // SpecConstantOp
1784 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1785 // we would need full implementation of compile-time constant folding. :(
1786 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1787 // for our analysis (e.g. unsized arrays).
1788 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1789 // Querying size of this member will fail, so just don't call it unless we have to.
1790 //
1791 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1792 bool member_can_be_unsized =
1793 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1794
1795 uint32_t packed_size = 0;
1796 if (!member_can_be_unsized || packing_is_hlsl(packing))
1797 packed_size = type_to_packed_size(type: memb_type, flags: member_flags, packing);
1798
1799 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1800 uint32_t actual_offset = type_struct_member_offset(type, index: i);
1801
1802 if (packing_is_hlsl(packing))
1803 {
1804 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1805 uint32_t target_offset;
1806
1807 // If we intend to use explicit packing, we must check for improper straddle with that offset.
1808 // In implicit packing, we must check with implicit offset, since the explicit offset
1809 // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4.
1810 // This is important when packing sub-structs that don't support packoffset().
1811 if (packing_has_flexible_offset(packing))
1812 target_offset = actual_offset;
1813 else
1814 target_offset = offset;
1815
1816 uint32_t begin_word = target_offset / 16;
1817 uint32_t end_word = (target_offset + packed_size - 1) / 16;
1818
1819 if (begin_word != end_word)
1820 packed_alignment = max<uint32_t>(a: packed_alignment, b: 16u);
1821 }
1822
1823 // Field is not in the specified range anymore and we can ignore any further fields.
1824 if (actual_offset >= end_offset)
1825 break;
1826
1827 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1828 offset = (offset + alignment - 1) & ~(alignment - 1);
1829
1830 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1831 // GL 4.5 spec, 7.6.2.2.
1832 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1833 pad_alignment = packed_alignment;
1834 else
1835 pad_alignment = 1;
1836
1837 // Only care about packing if we are in the given range
1838 if (actual_offset >= start_offset)
1839 {
1840 // We only care about offsets in std140, std430, etc ...
1841 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1842 if (!packing_has_flexible_offset(packing))
1843 {
1844 if (actual_offset != offset) // This cannot be the packing we're looking for.
1845 {
1846 if (failed_validation_index)
1847 *failed_validation_index = i;
1848 return false;
1849 }
1850 }
1851 else if ((actual_offset & (alignment - 1)) != 0)
1852 {
1853 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1854 if (failed_validation_index)
1855 *failed_validation_index = i;
1856 return false;
1857 }
1858
1859 // Verify array stride rules.
1860 if (is_array(type: memb_type) &&
1861 type_to_packed_array_stride(type: memb_type, flags: member_flags, packing) !=
1862 type_struct_member_array_stride(type, index: i))
1863 {
1864 if (failed_validation_index)
1865 *failed_validation_index = i;
1866 return false;
1867 }
1868
1869 // Verify that sub-structs also follow packing rules.
1870 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1871 auto substruct_packing = packing_to_substruct_packing(packing);
1872
1873 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1874 !buffer_is_packing_standard(type: memb_type, packing: substruct_packing))
1875 {
1876 if (failed_validation_index)
1877 *failed_validation_index = i;
1878 return false;
1879 }
1880 }
1881
1882 // Bump size.
1883 offset = actual_offset + packed_size;
1884 }
1885
1886 return true;
1887}
1888
1889bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1890{
1891 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1892 // Be very explicit here about how to solve the issue.
1893 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1894 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1895 {
1896 uint32_t minimum_desktop_version = block ? 440 : 410;
1897 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1898
1899 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1900 return false;
1901 else if (options.es && options.version < 310)
1902 return false;
1903 }
1904
1905 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1906 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1907 {
1908 if (options.es && options.version < 300)
1909 return false;
1910 else if (!options.es && options.version < 330)
1911 return false;
1912 }
1913
1914 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1915 {
1916 if (options.es && options.version < 310)
1917 return false;
1918 else if (!options.es && options.version < 430)
1919 return false;
1920 }
1921
1922 return true;
1923}
1924
1925string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1926{
1927 // FIXME: Come up with a better solution for when to disable layouts.
1928 // Having layouts depend on extensions as well as which types
1929 // of layouts are used. For now, the simple solution is to just disable
1930 // layouts for legacy versions.
1931 if (is_legacy())
1932 return "";
1933
1934 if (subpass_input_is_framebuffer_fetch(id: var.self))
1935 return "";
1936
1937 SmallVector<string> attr;
1938
1939 auto &type = get<SPIRType>(id: var.basetype);
1940 auto &flags = get_decoration_bitset(id: var.self);
1941 auto &typeflags = get_decoration_bitset(id: type.self);
1942
1943 if (flags.get(bit: DecorationPassthroughNV))
1944 attr.push_back(t: "passthrough");
1945
1946 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1947 attr.push_back(t: "push_constant");
1948 else if (var.storage == StorageClassShaderRecordBufferKHR)
1949 attr.push_back(t: ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1950
1951 if (flags.get(bit: DecorationRowMajor))
1952 attr.push_back(t: "row_major");
1953 if (flags.get(bit: DecorationColMajor))
1954 attr.push_back(t: "column_major");
1955
1956 if (options.vulkan_semantics)
1957 {
1958 if (flags.get(bit: DecorationInputAttachmentIndex))
1959 attr.push_back(t: join(ts: "input_attachment_index = ", ts: get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex)));
1960 }
1961
1962 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
1963 if (flags.get(bit: DecorationLocation) && can_use_io_location(storage: var.storage, block: is_block))
1964 {
1965 Bitset combined_decoration;
1966 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1967 combined_decoration.merge_or(other: combined_decoration_for_member(type, index: i));
1968
1969 // If our members have location decorations, we don't need to
1970 // emit location decorations at the top as well (looks weird).
1971 if (!combined_decoration.get(bit: DecorationLocation))
1972 attr.push_back(t: join(ts: "location = ", ts: get_decoration(id: var.self, decoration: DecorationLocation)));
1973 }
1974
1975 if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1976 location_is_non_coherent_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)))
1977 {
1978 attr.push_back(t: "noncoherent");
1979 }
1980
1981 // Transform feedback
1982 bool uses_enhanced_layouts = false;
1983 if (is_block && var.storage == StorageClassOutput)
1984 {
1985 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1986 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1987 // is the xfb_offset.
1988 uint32_t member_count = uint32_t(type.member_types.size());
1989 bool have_xfb_buffer_stride = false;
1990 bool have_any_xfb_offset = false;
1991 bool have_geom_stream = false;
1992 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1993
1994 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride))
1995 {
1996 have_xfb_buffer_stride = true;
1997 xfb_buffer = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
1998 xfb_stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
1999 }
2000
2001 if (flags.get(bit: DecorationStream))
2002 {
2003 have_geom_stream = true;
2004 geom_stream = get_decoration(id: var.self, decoration: DecorationStream);
2005 }
2006
2007 // Verify that none of the members violate our assumption.
2008 for (uint32_t i = 0; i < member_count; i++)
2009 {
2010 if (has_member_decoration(id: type.self, index: i, decoration: DecorationStream))
2011 {
2012 uint32_t member_geom_stream = get_member_decoration(id: type.self, index: i, decoration: DecorationStream);
2013 if (have_geom_stream && member_geom_stream != geom_stream)
2014 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2015 have_geom_stream = true;
2016 geom_stream = member_geom_stream;
2017 }
2018
2019 // Only members with an Offset decoration participate in XFB.
2020 if (!has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
2021 continue;
2022 have_any_xfb_offset = true;
2023
2024 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer))
2025 {
2026 uint32_t buffer_index = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer);
2027 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2028 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2029 have_xfb_buffer_stride = true;
2030 xfb_buffer = buffer_index;
2031 }
2032
2033 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride))
2034 {
2035 uint32_t stride = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride);
2036 if (have_xfb_buffer_stride && stride != xfb_stride)
2037 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
2038 have_xfb_buffer_stride = true;
2039 xfb_stride = stride;
2040 }
2041 }
2042
2043 if (have_xfb_buffer_stride && have_any_xfb_offset)
2044 {
2045 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer));
2046 attr.push_back(t: join(ts: "xfb_stride = ", ts&: xfb_stride));
2047 uses_enhanced_layouts = true;
2048 }
2049
2050 if (have_geom_stream)
2051 {
2052 if (get_execution_model() != ExecutionModelGeometry)
2053 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
2054 if (options.es)
2055 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
2056 if (options.version < 400)
2057 require_extension_internal(ext: "GL_ARB_transform_feedback3");
2058 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
2059 }
2060 }
2061 else if (var.storage == StorageClassOutput)
2062 {
2063 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride) && flags.get(bit: DecorationOffset))
2064 {
2065 // XFB for standalone variables, we can emit all decorations.
2066 attr.push_back(t: join(ts: "xfb_buffer = ", ts: get_decoration(id: var.self, decoration: DecorationXfbBuffer)));
2067 attr.push_back(t: join(ts: "xfb_stride = ", ts: get_decoration(id: var.self, decoration: DecorationXfbStride)));
2068 attr.push_back(t: join(ts: "xfb_offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
2069 uses_enhanced_layouts = true;
2070 }
2071
2072 if (flags.get(bit: DecorationStream))
2073 {
2074 if (get_execution_model() != ExecutionModelGeometry)
2075 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
2076 if (options.es)
2077 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
2078 if (options.version < 400)
2079 require_extension_internal(ext: "GL_ARB_transform_feedback3");
2080 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
2081 }
2082 }
2083
2084 // Can only declare Component if we can declare location.
2085 if (flags.get(bit: DecorationComponent) && can_use_io_location(storage: var.storage, block: is_block))
2086 {
2087 uses_enhanced_layouts = true;
2088 attr.push_back(t: join(ts: "component = ", ts: get_decoration(id: var.self, decoration: DecorationComponent)));
2089 }
2090
2091 if (uses_enhanced_layouts)
2092 {
2093 if (!options.es)
2094 {
2095 if (options.version < 440 && options.version >= 140)
2096 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2097 else if (options.version < 140)
2098 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
2099 if (!options.es && options.version < 440)
2100 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2101 }
2102 else if (options.es)
2103 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
2104 }
2105
2106 if (flags.get(bit: DecorationIndex))
2107 attr.push_back(t: join(ts: "index = ", ts: get_decoration(id: var.self, decoration: DecorationIndex)));
2108
2109 // Do not emit set = decoration in regular GLSL output, but
2110 // we need to preserve it in Vulkan GLSL mode.
2111 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
2112 {
2113 if (flags.get(bit: DecorationDescriptorSet) && options.vulkan_semantics)
2114 attr.push_back(t: join(ts: "set = ", ts: get_decoration(id: var.self, decoration: DecorationDescriptorSet)));
2115 }
2116
2117 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
2118 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2119 (var.storage == StorageClassUniform && typeflags.get(bit: DecorationBufferBlock));
2120 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
2121 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(bit: DecorationBlock);
2122
2123 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
2124 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
2125
2126 // pretend no UBOs when options say so
2127 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
2128 can_use_buffer_blocks = false;
2129
2130 bool can_use_binding;
2131 if (options.es)
2132 can_use_binding = options.version >= 310;
2133 else
2134 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
2135
2136 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
2137 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
2138 can_use_binding = false;
2139
2140 if (var.storage == StorageClassShaderRecordBufferKHR)
2141 can_use_binding = false;
2142
2143 if (can_use_binding && flags.get(bit: DecorationBinding))
2144 attr.push_back(t: join(ts: "binding = ", ts: get_decoration(id: var.self, decoration: DecorationBinding)));
2145
2146 if (var.storage != StorageClassOutput && flags.get(bit: DecorationOffset))
2147 attr.push_back(t: join(ts: "offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
2148
2149 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
2150 // If SPIR-V does not comply with either layout, we cannot really work around it.
2151 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
2152 {
2153 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: false, support_enhanced_layouts: true));
2154 }
2155 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
2156 {
2157 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true));
2158 }
2159
2160 // For images, the type itself adds a layout qualifer.
2161 // Only emit the format for storage images.
2162 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2163 {
2164 const char *fmt = format_to_glsl(format: type.image.format);
2165 if (fmt)
2166 attr.push_back(t: fmt);
2167 }
2168
2169 if (attr.empty())
2170 return "";
2171
2172 string res = "layout(";
2173 res += merge(list: attr);
2174 res += ") ";
2175 return res;
2176}
2177
2178string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type,
2179 bool support_std430_without_scalar_layout,
2180 bool support_enhanced_layouts)
2181{
2182 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, packing: BufferPackingStd430))
2183 return "std430";
2184 else if (buffer_is_packing_standard(type, packing: BufferPackingStd140))
2185 return "std140";
2186 else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalar))
2187 {
2188 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2189 return "scalar";
2190 }
2191 else if (support_std430_without_scalar_layout &&
2192 support_enhanced_layouts &&
2193 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2194 {
2195 if (options.es && !options.vulkan_semantics)
2196 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2197 "not support GL_ARB_enhanced_layouts.");
2198 if (!options.es && !options.vulkan_semantics && options.version < 440)
2199 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2200
2201 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2202 return "std430";
2203 }
2204 else if (support_enhanced_layouts &&
2205 buffer_is_packing_standard(type, packing: BufferPackingStd140EnhancedLayout))
2206 {
2207 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2208 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2209 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2210 if (options.es && !options.vulkan_semantics)
2211 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2212 "not support GL_ARB_enhanced_layouts.");
2213 if (!options.es && !options.vulkan_semantics && options.version < 440)
2214 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2215
2216 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2217 return "std140";
2218 }
2219 else if (options.vulkan_semantics &&
2220 support_enhanced_layouts &&
2221 buffer_is_packing_standard(type, packing: BufferPackingScalarEnhancedLayout))
2222 {
2223 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2224 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2225 return "scalar";
2226 }
2227 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2228 buffer_is_packing_standard(type, packing: BufferPackingStd430))
2229 {
2230 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2231 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2232 return "std430";
2233 }
2234 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2235 support_enhanced_layouts &&
2236 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2237 {
2238 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2239 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2240 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2241 return "std430";
2242 }
2243 else
2244 {
2245 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2246 "layouts. You can try flattening this block to support a more flexible layout.");
2247 }
2248}
2249
2250void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2251{
2252 if (flattened_buffer_blocks.count(x: var.self))
2253 emit_buffer_block_flattened(type: var);
2254 else if (options.vulkan_semantics)
2255 emit_push_constant_block_vulkan(var);
2256 else if (options.emit_push_constant_as_uniform_buffer)
2257 emit_buffer_block_native(var);
2258 else
2259 emit_push_constant_block_glsl(var);
2260}
2261
2262void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2263{
2264 emit_buffer_block(type: var);
2265}
2266
2267void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2268{
2269 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2270 auto &type = get<SPIRType>(id: var.basetype);
2271
2272 unset_decoration(id: var.self, decoration: DecorationBinding);
2273 unset_decoration(id: var.self, decoration: DecorationDescriptorSet);
2274
2275#if 0
2276 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2277 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2278 "Remap to location with reflection API first or disable these decorations.");
2279#endif
2280
2281 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2282 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2283 bool block_flag = has_decoration(id: type.self, decoration: DecorationBlock);
2284 unset_decoration(id: type.self, decoration: DecorationBlock);
2285
2286 emit_struct(type);
2287
2288 if (block_flag)
2289 set_decoration(id: type.self, decoration: DecorationBlock);
2290
2291 emit_uniform(var);
2292 statement(ts: "");
2293}
2294
2295void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2296{
2297 auto &type = get<SPIRType>(id: var.basetype);
2298 bool ubo_block = var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock);
2299
2300 if (flattened_buffer_blocks.count(x: var.self))
2301 emit_buffer_block_flattened(type: var);
2302 else if (is_legacy() || (!options.es && options.version == 130) ||
2303 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2304 emit_buffer_block_legacy(var);
2305 else
2306 emit_buffer_block_native(var);
2307}
2308
2309void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2310{
2311 auto &type = get<SPIRType>(id: var.basetype);
2312 bool ssbo = var.storage == StorageClassStorageBuffer ||
2313 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2314 if (ssbo)
2315 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2316
2317 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2318 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2319 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2320 bool block_flag = block_flags.get(bit: DecorationBlock);
2321 block_flags.clear(bit: DecorationBlock);
2322 emit_struct(type);
2323 if (block_flag)
2324 block_flags.set(DecorationBlock);
2325 emit_uniform(var);
2326 statement(ts: "");
2327}
2328
2329void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
2330{
2331 auto &type = get<SPIRType>(id: type_id);
2332 string buffer_name;
2333
2334 if (forward_declaration && is_physical_pointer_to_buffer_block(type))
2335 {
2336 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2337 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2338 // The names must match up.
2339 buffer_name = to_name(id: type.self, allow_alias: false);
2340
2341 // Shaders never use the block by interface name, so we don't
2342 // have to track this other than updating name caches.
2343 // If we have a collision for any reason, just fallback immediately.
2344 if (ir.meta[type.self].decoration.alias.empty() ||
2345 block_ssbo_names.find(x: buffer_name) != end(cont&: block_ssbo_names) ||
2346 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2347 {
2348 buffer_name = join(ts: "_", ts&: type.self);
2349 }
2350
2351 // Make sure we get something unique for both global name scope and block name scope.
2352 // See GLSL 4.5 spec: section 4.3.9 for details.
2353 add_variable(variables_primary&: block_ssbo_names, variables_secondary: resource_names, name&: buffer_name);
2354
2355 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2356 // This cannot conflict with anything else, so we're safe now.
2357 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2358 if (buffer_name.empty())
2359 buffer_name = join(ts: "_", ts&: type.self);
2360
2361 block_names.insert(x: buffer_name);
2362 block_ssbo_names.insert(x: buffer_name);
2363
2364 // Ensure we emit the correct name when emitting non-forward pointer type.
2365 ir.meta[type.self].decoration.alias = buffer_name;
2366 }
2367 else
2368 {
2369 buffer_name = type_to_glsl(type);
2370 }
2371
2372 if (!forward_declaration)
2373 {
2374 auto itr = physical_storage_type_to_alignment.find(x: type_id);
2375 uint32_t alignment = 0;
2376 if (itr != physical_storage_type_to_alignment.end())
2377 alignment = itr->second.alignment;
2378
2379 if (is_physical_pointer_to_buffer_block(type))
2380 {
2381 SmallVector<std::string> attributes;
2382 attributes.push_back(t: "buffer_reference");
2383 if (alignment)
2384 attributes.push_back(t: join(ts: "buffer_reference_align = ", ts&: alignment));
2385 attributes.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true));
2386
2387 auto flags = ir.get_buffer_block_type_flags(type);
2388 string decorations;
2389 if (flags.get(bit: DecorationRestrict))
2390 decorations += " restrict";
2391 if (flags.get(bit: DecorationCoherent))
2392 decorations += " coherent";
2393 if (flags.get(bit: DecorationNonReadable))
2394 decorations += " writeonly";
2395 if (flags.get(bit: DecorationNonWritable))
2396 decorations += " readonly";
2397
2398 statement(ts: "layout(", ts: merge(list: attributes), ts: ")", ts&: decorations, ts: " buffer ", ts&: buffer_name);
2399 }
2400 else
2401 {
2402 string packing_standard;
2403 if (type.basetype == SPIRType::Struct)
2404 {
2405 // The non-block type is embedded in a block, so we cannot use enhanced layouts :(
2406 packing_standard = buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", ";
2407 }
2408 else if (is_array(type: get_pointee_type(type)))
2409 {
2410 SPIRType wrap_type{OpTypeStruct};
2411 wrap_type.self = ir.increase_bound_by(count: 1);
2412 wrap_type.member_types.push_back(t: get_pointee_type_id(type_id));
2413 ir.set_member_decoration(id: wrap_type.self, index: 0, decoration: DecorationOffset, argument: 0);
2414 packing_standard = buffer_to_packing_standard(type: wrap_type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", ";
2415 }
2416
2417 if (alignment)
2418 statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference, buffer_reference_align = ", ts&: alignment, ts: ") buffer ", ts&: buffer_name);
2419 else
2420 statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference) buffer ", ts&: buffer_name);
2421 }
2422
2423 begin_scope();
2424
2425 if (is_physical_pointer_to_buffer_block(type))
2426 {
2427 type.member_name_cache.clear();
2428
2429 uint32_t i = 0;
2430 for (auto &member : type.member_types)
2431 {
2432 add_member_name(type, name: i);
2433 emit_struct_member(type, member_type_id: member, index: i);
2434 i++;
2435 }
2436 }
2437 else
2438 {
2439 auto &pointee_type = get_pointee_type(type);
2440 statement(ts: type_to_glsl(type: pointee_type), ts: " value", ts: type_to_array_glsl(type: pointee_type, variable_id: 0), ts: ";");
2441 }
2442
2443 end_scope_decl();
2444 statement(ts: "");
2445 }
2446 else
2447 {
2448 statement(ts: "layout(buffer_reference) buffer ", ts&: buffer_name, ts: ";");
2449 }
2450}
2451
2452void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2453{
2454 auto &type = get<SPIRType>(id: var.basetype);
2455
2456 Bitset flags = ir.get_buffer_block_flags(var);
2457 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2458 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2459 bool is_restrict = ssbo && flags.get(bit: DecorationRestrict);
2460 bool is_writeonly = ssbo && flags.get(bit: DecorationNonReadable);
2461 bool is_readonly = ssbo && flags.get(bit: DecorationNonWritable);
2462 bool is_coherent = ssbo && flags.get(bit: DecorationCoherent);
2463
2464 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2465 auto buffer_name = to_name(id: type.self, allow_alias: false);
2466
2467 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2468
2469 // Shaders never use the block by interface name, so we don't
2470 // have to track this other than updating name caches.
2471 // If we have a collision for any reason, just fallback immediately.
2472 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(x: buffer_name) != end(cont&: block_namespace) ||
2473 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2474 {
2475 buffer_name = get_block_fallback_name(id: var.self);
2476 }
2477
2478 // Make sure we get something unique for both global name scope and block name scope.
2479 // See GLSL 4.5 spec: section 4.3.9 for details.
2480 add_variable(variables_primary&: block_namespace, variables_secondary: resource_names, name&: buffer_name);
2481
2482 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2483 // This cannot conflict with anything else, so we're safe now.
2484 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2485 if (buffer_name.empty())
2486 buffer_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2487
2488 block_names.insert(x: buffer_name);
2489 block_namespace.insert(x: buffer_name);
2490
2491 // Save for post-reflection later.
2492 declared_block_names[var.self] = buffer_name;
2493
2494 statement(ts: layout_for_variable(var), ts: is_coherent ? "coherent " : "", ts: is_restrict ? "restrict " : "",
2495 ts: is_writeonly ? "writeonly " : "", ts: is_readonly ? "readonly " : "", ts: ssbo ? "buffer " : "uniform ",
2496 ts&: buffer_name);
2497
2498 begin_scope();
2499
2500 type.member_name_cache.clear();
2501
2502 uint32_t i = 0;
2503 for (auto &member : type.member_types)
2504 {
2505 add_member_name(type, name: i);
2506 emit_struct_member(type, member_type_id: member, index: i);
2507 i++;
2508 }
2509
2510 // Don't declare empty blocks in GLSL, this is not allowed.
2511 if (type_is_empty(type) && !backend.supports_empty_struct)
2512 statement(ts: "int empty_struct_member;");
2513
2514 // var.self can be used as a backup name for the block name,
2515 // so we need to make sure we don't disturb the name here on a recompile.
2516 // It will need to be reset if we have to recompile.
2517 preserve_alias_on_reset(id: var.self);
2518 add_resource_name(id: var.self);
2519 end_scope_decl(decl: to_name(id: var.self) + type_to_array_glsl(type, variable_id: var.self));
2520 statement(ts: "");
2521}
2522
2523void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2524{
2525 auto &type = get<SPIRType>(id: var.basetype);
2526
2527 // Block names should never alias.
2528 auto buffer_name = to_name(id: type.self, allow_alias: false);
2529 size_t buffer_size = (get_declared_struct_size(struct_type: type) + 15) / 16;
2530
2531 SPIRType::BaseType basic_type;
2532 if (get_common_basic_type(type, base_type&: basic_type))
2533 {
2534 SPIRType tmp { OpTypeVector };
2535 tmp.basetype = basic_type;
2536 tmp.vecsize = 4;
2537 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2538 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2539
2540 auto flags = ir.get_buffer_block_flags(var);
2541 statement(ts: "uniform ", ts: flags_to_qualifiers_glsl(type: tmp, flags), ts: type_to_glsl(type: tmp), ts: " ", ts&: buffer_name, ts: "[",
2542 ts&: buffer_size, ts: "];");
2543 }
2544 else
2545 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2546}
2547
2548const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2549{
2550 auto &execution = get_entry_point();
2551
2552 if (subpass_input_is_framebuffer_fetch(id: var.self))
2553 return "";
2554
2555 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2556 {
2557 if (is_legacy() && execution.model == ExecutionModelVertex)
2558 return var.storage == StorageClassInput ? "attribute " : "varying ";
2559 else if (is_legacy() && execution.model == ExecutionModelFragment)
2560 return "varying "; // Fragment outputs are renamed so they never hit this case.
2561 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2562 {
2563 uint32_t loc = get_decoration(id: var.self, decoration: DecorationLocation);
2564 bool is_inout = location_is_framebuffer_fetch(location: loc);
2565 if (is_inout)
2566 return "inout ";
2567 else
2568 return "out ";
2569 }
2570 else
2571 return var.storage == StorageClassInput ? "in " : "out ";
2572 }
2573 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2574 var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter)
2575 {
2576 return "uniform ";
2577 }
2578 else if (var.storage == StorageClassRayPayloadKHR)
2579 {
2580 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2581 }
2582 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2583 {
2584 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2585 }
2586 else if (var.storage == StorageClassHitAttributeKHR)
2587 {
2588 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2589 }
2590 else if (var.storage == StorageClassCallableDataKHR)
2591 {
2592 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2593 }
2594 else if (var.storage == StorageClassIncomingCallableDataKHR)
2595 {
2596 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2597 }
2598
2599 return "";
2600}
2601
2602void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2603 const SmallVector<uint32_t> &indices)
2604{
2605 uint32_t member_type_id = type.self;
2606 const SPIRType *member_type = &type;
2607 const SPIRType *parent_type = nullptr;
2608 auto flattened_name = basename;
2609 for (auto &index : indices)
2610 {
2611 flattened_name += "_";
2612 flattened_name += to_member_name(type: *member_type, index);
2613 parent_type = member_type;
2614 member_type_id = member_type->member_types[index];
2615 member_type = &get<SPIRType>(id: member_type_id);
2616 }
2617
2618 assert(member_type->basetype != SPIRType::Struct);
2619
2620 // We're overriding struct member names, so ensure we do so on the primary type.
2621 if (parent_type->type_alias)
2622 parent_type = &get<SPIRType>(id: parent_type->type_alias);
2623
2624 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2625 // which is not allowed.
2626 ParsedIR::sanitize_underscores(str&: flattened_name);
2627
2628 uint32_t last_index = indices.back();
2629
2630 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2631 // Replace member name while emitting it so it encodes both struct name and member name.
2632 auto backup_name = get_member_name(id: parent_type->self, index: last_index);
2633 auto member_name = to_member_name(type: *parent_type, index: last_index);
2634 set_member_name(id: parent_type->self, index: last_index, name: flattened_name);
2635 emit_struct_member(type: *parent_type, member_type_id, index: last_index, qualifier: qual);
2636 // Restore member name.
2637 set_member_name(id: parent_type->self, index: last_index, name: member_name);
2638}
2639
2640void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2641 const SmallVector<uint32_t> &indices)
2642{
2643 auto sub_indices = indices;
2644 sub_indices.push_back(t: 0);
2645
2646 const SPIRType *member_type = &type;
2647 for (auto &index : indices)
2648 member_type = &get<SPIRType>(id: member_type->member_types[index]);
2649
2650 assert(member_type->basetype == SPIRType::Struct);
2651
2652 if (!member_type->array.empty())
2653 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2654
2655 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2656 {
2657 sub_indices.back() = i;
2658 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
2659 emit_flattened_io_block_struct(basename, type, qual, indices: sub_indices);
2660 else
2661 emit_flattened_io_block_member(basename, type, qual, indices: sub_indices);
2662 }
2663}
2664
2665void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2666{
2667 auto &var_type = get<SPIRType>(id: var.basetype);
2668 if (!var_type.array.empty())
2669 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2670
2671 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2672 // struct declarations for aliased types.
2673 auto &type = var_type.type_alias ? get<SPIRType>(id: var_type.type_alias) : var_type;
2674
2675 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2676 // Emit the members as if they are part of a block to get all qualifiers.
2677 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2678
2679 type.member_name_cache.clear();
2680
2681 SmallVector<uint32_t> member_indices;
2682 member_indices.push_back(t: 0);
2683 auto basename = to_name(id: var.self);
2684
2685 uint32_t i = 0;
2686 for (auto &member : type.member_types)
2687 {
2688 add_member_name(type, name: i);
2689 auto &membertype = get<SPIRType>(id: member);
2690
2691 member_indices.back() = i;
2692 if (membertype.basetype == SPIRType::Struct)
2693 emit_flattened_io_block_struct(basename, type, qual, indices: member_indices);
2694 else
2695 emit_flattened_io_block_member(basename, type, qual, indices: member_indices);
2696 i++;
2697 }
2698
2699 ir.meta[type.self].decoration.decoration_flags = old_flags;
2700
2701 // Treat this variable as fully flattened from now on.
2702 flattened_structs[var.self] = true;
2703}
2704
2705void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2706{
2707 auto &type = get<SPIRType>(id: var.basetype);
2708
2709 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2710 !options.es && options.version < 410)
2711 {
2712 require_extension_internal(ext: "GL_ARB_vertex_attrib_64bit");
2713 }
2714
2715 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2716 bool block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock);
2717 const char *qual = to_storage_qualifiers_glsl(var);
2718
2719 if (block)
2720 {
2721 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2722 // I/O variables which are struct types.
2723 // To support this, flatten the struct into separate varyings instead.
2724 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2725 (!options.es && options.version < 150))
2726 {
2727 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2728 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2729 emit_flattened_io_block(var, qual);
2730 }
2731 else
2732 {
2733 if (options.es && options.version < 320)
2734 {
2735 // Geometry and tessellation extensions imply this extension.
2736 if (!has_extension(ext: "GL_EXT_geometry_shader") && !has_extension(ext: "GL_EXT_tessellation_shader"))
2737 require_extension_internal(ext: "GL_EXT_shader_io_blocks");
2738 }
2739
2740 // Workaround to make sure we can emit "patch in/out" correctly.
2741 fixup_io_block_patch_primitive_qualifiers(var);
2742
2743 // Block names should never alias.
2744 auto block_name = to_name(id: type.self, allow_alias: false);
2745
2746 // The namespace for I/O blocks is separate from other variables in GLSL.
2747 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2748
2749 // Shaders never use the block by interface name, so we don't
2750 // have to track this other than updating name caches.
2751 if (block_name.empty() || block_namespace.find(x: block_name) != end(cont&: block_namespace))
2752 block_name = get_fallback_name(id: type.self);
2753 else
2754 block_namespace.insert(x: block_name);
2755
2756 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2757 // This cannot conflict with anything else, so we're safe now.
2758 if (block_name.empty())
2759 block_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2760
2761 // Instance names cannot alias block names.
2762 resource_names.insert(x: block_name);
2763
2764 const char *block_qualifier;
2765 if (has_decoration(id: var.self, decoration: DecorationPatch))
2766 block_qualifier = "patch ";
2767 else if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT))
2768 block_qualifier = "perprimitiveEXT ";
2769 else if (has_decoration(id: var.self, decoration: DecorationPerVertexKHR))
2770 block_qualifier = "pervertexEXT ";
2771 else
2772 block_qualifier = "";
2773
2774 statement(ts: layout_for_variable(var), ts&: block_qualifier, ts&: qual, ts&: block_name);
2775 begin_scope();
2776
2777 type.member_name_cache.clear();
2778
2779 uint32_t i = 0;
2780 for (auto &member : type.member_types)
2781 {
2782 add_member_name(type, name: i);
2783 emit_struct_member(type, member_type_id: member, index: i);
2784 i++;
2785 }
2786
2787 add_resource_name(id: var.self);
2788 end_scope_decl(decl: join(ts: to_name(id: var.self), ts: type_to_array_glsl(type, variable_id: var.self)));
2789 statement(ts: "");
2790 }
2791 }
2792 else
2793 {
2794 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2795 // I/O variables which are struct types.
2796 // To support this, flatten the struct into separate varyings instead.
2797 if (type.basetype == SPIRType::Struct &&
2798 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2799 (!options.es && options.version < 150)))
2800 {
2801 emit_flattened_io_block(var, qual);
2802 }
2803 else
2804 {
2805 add_resource_name(id: var.self);
2806
2807 // Legacy GLSL did not support int attributes, we automatically
2808 // declare them as float and cast them on load/store
2809 SPIRType newtype = type;
2810 if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int)
2811 newtype.basetype = SPIRType::Float;
2812
2813 // Tessellation control and evaluation shaders must have either
2814 // gl_MaxPatchVertices or unsized arrays for input arrays.
2815 // Opt for unsized as it's the more "correct" variant to use.
2816 if (type.storage == StorageClassInput && !type.array.empty() &&
2817 !has_decoration(id: var.self, decoration: DecorationPatch) &&
2818 (get_entry_point().model == ExecutionModelTessellationControl ||
2819 get_entry_point().model == ExecutionModelTessellationEvaluation))
2820 {
2821 newtype.array.back() = 0;
2822 newtype.array_size_literal.back() = true;
2823 }
2824
2825 statement(ts: layout_for_variable(var), ts: to_qualifiers_glsl(id: var.self),
2826 ts: variable_decl(type: newtype, name: to_name(id: var.self), id: var.self), ts: ";");
2827 }
2828 }
2829}
2830
2831void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2832{
2833 auto &type = get<SPIRType>(id: var.basetype);
2834 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2835 {
2836 if (!options.es && options.version < 420)
2837 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
2838 else if (options.es && options.version < 310)
2839 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2840 }
2841
2842 add_resource_name(id: var.self);
2843 statement(ts: layout_for_variable(var), ts: variable_decl(variable: var), ts: ";");
2844}
2845
2846string CompilerGLSL::constant_value_macro_name(uint32_t id)
2847{
2848 return join(ts: "SPIRV_CROSS_CONSTANT_ID_", ts&: id);
2849}
2850
2851void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2852{
2853 auto &type = get<SPIRType>(id: constant.basetype);
2854 // This will break. It is bogus and should not be legal.
2855 if (type_is_top_level_block(type))
2856 return;
2857 add_resource_name(id: constant.self);
2858 auto name = to_name(id: constant.self);
2859 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_op_expression(cop: constant), ts: ";");
2860}
2861
2862int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
2863{
2864 auto &entry_point = get_entry_point();
2865 int index = -1;
2866
2867 // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
2868 // since the spec constant declarations are never explicitly declared.
2869 if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(bit: ExecutionModeLocalSizeId))
2870 {
2871 if (c.self == entry_point.workgroup_size.id_x)
2872 index = 0;
2873 else if (c.self == entry_point.workgroup_size.id_y)
2874 index = 1;
2875 else if (c.self == entry_point.workgroup_size.id_z)
2876 index = 2;
2877 }
2878
2879 return index;
2880}
2881
2882void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2883{
2884 auto &type = get<SPIRType>(id: constant.constant_type);
2885
2886 // This will break. It is bogus and should not be legal.
2887 if (type_is_top_level_block(type))
2888 return;
2889
2890 SpecializationConstant wg_x, wg_y, wg_z;
2891 ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
2892
2893 // This specialization constant is implicitly declared by emitting layout() in;
2894 if (constant.self == workgroup_size_id)
2895 return;
2896
2897 // These specialization constants are implicitly declared by emitting layout() in;
2898 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2899 // later can use macro overrides for work group size.
2900 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2901 ConstantID(constant.self) == wg_z.id;
2902
2903 if (options.vulkan_semantics && is_workgroup_size_constant)
2904 {
2905 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2906 return;
2907 }
2908 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2909 !has_decoration(id: constant.self, decoration: DecorationSpecId))
2910 {
2911 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2912 return;
2913 }
2914
2915 add_resource_name(id: constant.self);
2916 auto name = to_name(id: constant.self);
2917
2918 // Only scalars have constant IDs.
2919 if (has_decoration(id: constant.self, decoration: DecorationSpecId))
2920 {
2921 if (options.vulkan_semantics)
2922 {
2923 statement(ts: "layout(constant_id = ", ts: get_decoration(id: constant.self, decoration: DecorationSpecId), ts: ") const ",
2924 ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2925 }
2926 else
2927 {
2928 const string &macro_name = constant.specialization_constant_macro_name;
2929 statement(ts: "#ifndef ", ts: macro_name);
2930 statement(ts: "#define ", ts: macro_name, ts: " ", ts: constant_expression(c: constant));
2931 statement(ts: "#endif");
2932
2933 // For workgroup size constants, only emit the macros.
2934 if (!is_workgroup_size_constant)
2935 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: macro_name, ts: ";");
2936 }
2937 }
2938 else
2939 {
2940 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2941 }
2942}
2943
2944void CompilerGLSL::emit_entry_point_declarations()
2945{
2946}
2947
2948void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2949{
2950 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
2951 if (is_hidden_variable(var))
2952 return;
2953
2954 auto *meta = ir.find_meta(id: var.self);
2955 if (!meta)
2956 return;
2957
2958 auto &m = meta->decoration;
2959 if (keywords.find(x: m.alias) != end(cont: keywords))
2960 m.alias = join(ts: "_", ts&: m.alias);
2961 });
2962
2963 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, const SPIRFunction &func) {
2964 auto *meta = ir.find_meta(id: func.self);
2965 if (!meta)
2966 return;
2967
2968 auto &m = meta->decoration;
2969 if (keywords.find(x: m.alias) != end(cont: keywords))
2970 m.alias = join(ts: "_", ts&: m.alias);
2971 });
2972
2973 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
2974 auto *meta = ir.find_meta(id: type.self);
2975 if (!meta)
2976 return;
2977
2978 auto &m = meta->decoration;
2979 if (keywords.find(x: m.alias) != end(cont: keywords))
2980 m.alias = join(ts: "_", ts&: m.alias);
2981
2982 for (auto &memb : meta->members)
2983 if (keywords.find(x: memb.alias) != end(cont: keywords))
2984 memb.alias = join(ts: "_", ts&: memb.alias);
2985 });
2986}
2987
2988void CompilerGLSL::replace_illegal_names()
2989{
2990 // clang-format off
2991 static const unordered_set<string> keywords = {
2992 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2993 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2994 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2995 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2996 "ceil", "cos", "cosh", "cross", "degrees",
2997 "dFdx", "dFdxCoarse", "dFdxFine",
2998 "dFdy", "dFdyCoarse", "dFdyFine",
2999 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
3000 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
3001 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
3002 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
3003 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
3004 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
3005 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
3006 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
3007 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
3008 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
3009 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
3010 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
3011 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
3012 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
3013 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
3014 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
3015 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
3016
3017 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
3018 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
3019 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
3020 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
3021 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
3022 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
3023 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
3024 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
3025 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
3026 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
3027 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
3028 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
3029 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
3030 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
3031 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
3032 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
3033 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
3034 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
3035 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
3036 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
3037 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
3038 "while", "writeonly",
3039 };
3040 // clang-format on
3041
3042 replace_illegal_names(keywords);
3043}
3044
3045void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
3046{
3047 auto &m = ir.meta[var.self].decoration;
3048 uint32_t location = 0;
3049 if (m.decoration_flags.get(bit: DecorationLocation))
3050 location = m.location;
3051
3052 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
3053 // do the access chain part of this for us.
3054 auto &type = get<SPIRType>(id: var.basetype);
3055
3056 if (type.array.empty())
3057 {
3058 // Redirect the write to a specific render target in legacy GLSL.
3059 m.alias = join(ts: "gl_FragData[", ts&: location, ts: "]");
3060
3061 if (is_legacy_es() && location != 0)
3062 require_extension_internal(ext: "GL_EXT_draw_buffers");
3063 }
3064 else if (type.array.size() == 1)
3065 {
3066 // If location is non-zero, we probably have to add an offset.
3067 // This gets really tricky since we'd have to inject an offset in the access chain.
3068 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
3069 m.alias = "gl_FragData";
3070 if (location != 0)
3071 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
3072 "This is unimplemented in SPIRV-Cross.");
3073
3074 if (is_legacy_es())
3075 require_extension_internal(ext: "GL_EXT_draw_buffers");
3076 }
3077 else
3078 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
3079
3080 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
3081}
3082
3083void CompilerGLSL::replace_fragment_outputs()
3084{
3085 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3086 auto &type = this->get<SPIRType>(id: var.basetype);
3087
3088 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
3089 replace_fragment_output(var);
3090 });
3091}
3092
3093string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
3094{
3095 if (out_type.vecsize == input_components)
3096 return expr;
3097 else if (input_components == 1 && !backend.can_swizzle_scalar)
3098 return join(ts: type_to_glsl(type: out_type), ts: "(", ts: expr, ts: ")");
3099 else
3100 {
3101 // FIXME: This will not work with packed expressions.
3102 auto e = enclose_expression(expr) + ".";
3103 // Just clamp the swizzle index if we have more outputs than inputs.
3104 for (uint32_t c = 0; c < out_type.vecsize; c++)
3105 e += index_to_swizzle(index: min(a: c, b: input_components - 1));
3106 if (backend.swizzle_is_function && out_type.vecsize > 1)
3107 e += "()";
3108
3109 remove_duplicate_swizzle(op&: e);
3110 return e;
3111 }
3112}
3113
3114void CompilerGLSL::emit_pls()
3115{
3116 auto &execution = get_entry_point();
3117 if (execution.model != ExecutionModelFragment)
3118 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
3119
3120 if (!options.es)
3121 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
3122
3123 if (options.version < 300)
3124 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
3125
3126 if (!pls_inputs.empty())
3127 {
3128 statement(ts: "__pixel_local_inEXT _PLSIn");
3129 begin_scope();
3130 for (auto &input : pls_inputs)
3131 statement(ts: pls_decl(variable: input), ts: ";");
3132 end_scope_decl();
3133 statement(ts: "");
3134 }
3135
3136 if (!pls_outputs.empty())
3137 {
3138 statement(ts: "__pixel_local_outEXT _PLSOut");
3139 begin_scope();
3140 for (auto &output : pls_outputs)
3141 statement(ts: pls_decl(variable: output), ts: ";");
3142 end_scope_decl();
3143 statement(ts: "");
3144 }
3145}
3146
3147void CompilerGLSL::fixup_image_load_store_access()
3148{
3149 if (!options.enable_storage_image_qualifier_deduction)
3150 return;
3151
3152 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t var, const SPIRVariable &) {
3153 auto &vartype = expression_type(id: var);
3154 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
3155 {
3156 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
3157 // Solve this by making the image access as restricted as possible and loosen up if we need to.
3158 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
3159
3160 if (!has_decoration(id: var, decoration: DecorationNonWritable) && !has_decoration(id: var, decoration: DecorationNonReadable))
3161 {
3162 set_decoration(id: var, decoration: DecorationNonWritable);
3163 set_decoration(id: var, decoration: DecorationNonReadable);
3164 }
3165 }
3166 });
3167}
3168
3169static bool is_block_builtin(BuiltIn builtin)
3170{
3171 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
3172 builtin == BuiltInCullDistance;
3173}
3174
3175bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
3176{
3177 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
3178
3179 if (storage != StorageClassOutput)
3180 return false;
3181 bool should_force = false;
3182
3183 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3184 if (should_force)
3185 return;
3186
3187 auto &type = this->get<SPIRType>(id: var.basetype);
3188 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3189 if (var.storage == storage && block && is_builtin_variable(var))
3190 {
3191 uint32_t member_count = uint32_t(type.member_types.size());
3192 for (uint32_t i = 0; i < member_count; i++)
3193 {
3194 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) &&
3195 is_block_builtin(builtin: BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))) &&
3196 has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
3197 {
3198 should_force = true;
3199 }
3200 }
3201 }
3202 else if (var.storage == storage && !block && is_builtin_variable(var))
3203 {
3204 if (is_block_builtin(builtin: BuiltIn(get_decoration(id: type.self, decoration: DecorationBuiltIn))) &&
3205 has_decoration(id: var.self, decoration: DecorationOffset))
3206 {
3207 should_force = true;
3208 }
3209 }
3210 });
3211
3212 // If we're declaring clip/cull planes with control points we need to force block declaration.
3213 if ((get_execution_model() == ExecutionModelTessellationControl ||
3214 get_execution_model() == ExecutionModelMeshEXT) &&
3215 (clip_distance_count || cull_distance_count))
3216 {
3217 should_force = true;
3218 }
3219
3220 // Either glslang bug or oversight, but global invariant position does not work in mesh shaders.
3221 if (get_execution_model() == ExecutionModelMeshEXT && position_invariant)
3222 should_force = true;
3223
3224 return should_force;
3225}
3226
3227void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
3228{
3229 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3230 auto &type = this->get<SPIRType>(id: var.basetype);
3231 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3232 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
3233 is_builtin_variable(var))
3234 {
3235 if (model != ExecutionModelMeshEXT)
3236 {
3237 // Make sure the array has a supported name in the code.
3238 if (var.storage == StorageClassOutput)
3239 set_name(id: var.self, name: "gl_out");
3240 else if (var.storage == StorageClassInput)
3241 set_name(id: var.self, name: "gl_in");
3242 }
3243 else
3244 {
3245 auto flags = get_buffer_block_flags(id: var.self);
3246 if (flags.get(bit: DecorationPerPrimitiveEXT))
3247 {
3248 set_name(id: var.self, name: "gl_MeshPrimitivesEXT");
3249 set_name(id: type.self, name: "gl_MeshPerPrimitiveEXT");
3250 }
3251 else
3252 {
3253 set_name(id: var.self, name: "gl_MeshVerticesEXT");
3254 set_name(id: type.self, name: "gl_MeshPerVertexEXT");
3255 }
3256 }
3257 }
3258
3259 if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
3260 {
3261 auto *m = ir.find_meta(id: var.self);
3262 if (m && m->decoration.builtin)
3263 {
3264 auto builtin_type = m->decoration.builtin_type;
3265 if (builtin_type == BuiltInPrimitivePointIndicesEXT)
3266 set_name(id: var.self, name: "gl_PrimitivePointIndicesEXT");
3267 else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
3268 set_name(id: var.self, name: "gl_PrimitiveLineIndicesEXT");
3269 else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
3270 set_name(id: var.self, name: "gl_PrimitiveTriangleIndicesEXT");
3271 }
3272 }
3273 });
3274}
3275
3276void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
3277{
3278 Bitset emitted_builtins;
3279 Bitset global_builtins;
3280 const SPIRVariable *block_var = nullptr;
3281 bool emitted_block = false;
3282
3283 // Need to use declared size in the type.
3284 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3285 uint32_t cull_distance_size = 0;
3286 uint32_t clip_distance_size = 0;
3287
3288 bool have_xfb_buffer_stride = false;
3289 bool have_geom_stream = false;
3290 bool have_any_xfb_offset = false;
3291 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3292 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3293
3294 const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
3295 return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
3296 builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
3297 };
3298
3299 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3300 auto &type = this->get<SPIRType>(id: var.basetype);
3301 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3302 Bitset builtins;
3303
3304 if (var.storage == storage && block && is_builtin_variable(var))
3305 {
3306 uint32_t index = 0;
3307 for (auto &m : ir.meta[type.self].members)
3308 {
3309 if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
3310 {
3311 builtins.set(m.builtin_type);
3312 if (m.builtin_type == BuiltInCullDistance)
3313 cull_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3314 else if (m.builtin_type == BuiltInClipDistance)
3315 clip_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3316
3317 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationOffset))
3318 {
3319 have_any_xfb_offset = true;
3320 builtin_xfb_offsets[m.builtin_type] = m.offset;
3321 }
3322
3323 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3324 {
3325 uint32_t stream = m.stream;
3326 if (have_geom_stream && geom_stream != stream)
3327 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3328 have_geom_stream = true;
3329 geom_stream = stream;
3330 }
3331 }
3332 index++;
3333 }
3334
3335 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationXfbBuffer) &&
3336 has_decoration(id: var.self, decoration: DecorationXfbStride))
3337 {
3338 uint32_t buffer_index = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
3339 uint32_t stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
3340 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3341 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3342 if (have_xfb_buffer_stride && stride != xfb_stride)
3343 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3344 have_xfb_buffer_stride = true;
3345 xfb_buffer = buffer_index;
3346 xfb_stride = stride;
3347 }
3348
3349 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationStream))
3350 {
3351 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3352 if (have_geom_stream && geom_stream != stream)
3353 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3354 have_geom_stream = true;
3355 geom_stream = stream;
3356 }
3357 }
3358 else if (var.storage == storage && !block && is_builtin_variable(var))
3359 {
3360 // While we're at it, collect all declared global builtins (HLSL mostly ...).
3361 auto &m = ir.meta[var.self].decoration;
3362 if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
3363 {
3364 // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type
3365 // for correct result.
3366 global_builtins.set(m.builtin_type);
3367 if (m.builtin_type == BuiltInCullDistance)
3368 cull_distance_size = to_array_size_literal(type, index: 0);
3369 else if (m.builtin_type == BuiltInClipDistance)
3370 clip_distance_size = to_array_size_literal(type, index: 0);
3371
3372 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationXfbStride) &&
3373 m.decoration_flags.get(bit: DecorationXfbBuffer) && m.decoration_flags.get(bit: DecorationOffset))
3374 {
3375 have_any_xfb_offset = true;
3376 builtin_xfb_offsets[m.builtin_type] = m.offset;
3377 uint32_t buffer_index = m.xfb_buffer;
3378 uint32_t stride = m.xfb_stride;
3379 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3380 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3381 if (have_xfb_buffer_stride && stride != xfb_stride)
3382 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3383 have_xfb_buffer_stride = true;
3384 xfb_buffer = buffer_index;
3385 xfb_stride = stride;
3386 }
3387
3388 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3389 {
3390 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3391 if (have_geom_stream && geom_stream != stream)
3392 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3393 have_geom_stream = true;
3394 geom_stream = stream;
3395 }
3396 }
3397 }
3398
3399 if (builtins.empty())
3400 return;
3401
3402 if (emitted_block)
3403 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3404
3405 emitted_builtins = builtins;
3406 emitted_block = true;
3407 block_var = &var;
3408 });
3409
3410 global_builtins =
3411 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3412 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3413
3414 // Try to collect all other declared builtins.
3415 if (!emitted_block)
3416 emitted_builtins = global_builtins;
3417
3418 // Can't declare an empty interface block.
3419 if (emitted_builtins.empty())
3420 return;
3421
3422 if (storage == StorageClassOutput)
3423 {
3424 SmallVector<string> attr;
3425 if (have_xfb_buffer_stride && have_any_xfb_offset)
3426 {
3427 if (!options.es)
3428 {
3429 if (options.version < 440 && options.version >= 140)
3430 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3431 else if (options.version < 140)
3432 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3433 if (!options.es && options.version < 440)
3434 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3435 }
3436 else if (options.es)
3437 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3438 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer, ts: ", xfb_stride = ", ts&: xfb_stride));
3439 }
3440
3441 if (have_geom_stream)
3442 {
3443 if (get_execution_model() != ExecutionModelGeometry)
3444 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3445 if (options.es)
3446 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3447 if (options.version < 400)
3448 require_extension_internal(ext: "GL_ARB_transform_feedback3");
3449 attr.push_back(t: join(ts: "stream = ", ts&: geom_stream));
3450 }
3451
3452 if (model == ExecutionModelMeshEXT)
3453 statement(ts: "out gl_MeshPerVertexEXT");
3454 else if (!attr.empty())
3455 statement(ts: "layout(", ts: merge(list: attr), ts: ") out gl_PerVertex");
3456 else
3457 statement(ts: "out gl_PerVertex");
3458 }
3459 else
3460 {
3461 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3462 if (get_entry_point().geometry_passthrough)
3463 statement(ts: "layout(passthrough) in gl_PerVertex");
3464 else
3465 statement(ts: "in gl_PerVertex");
3466 }
3467
3468 begin_scope();
3469 if (emitted_builtins.get(bit: BuiltInPosition))
3470 {
3471 auto itr = builtin_xfb_offsets.find(x: BuiltInPosition);
3472 if (itr != end(cont&: builtin_xfb_offsets))
3473 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") vec4 gl_Position;");
3474 else if (position_invariant)
3475 statement(ts: "invariant vec4 gl_Position;");
3476 else
3477 statement(ts: "vec4 gl_Position;");
3478 }
3479
3480 if (emitted_builtins.get(bit: BuiltInPointSize))
3481 {
3482 auto itr = builtin_xfb_offsets.find(x: BuiltInPointSize);
3483 if (itr != end(cont&: builtin_xfb_offsets))
3484 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_PointSize;");
3485 else
3486 statement(ts: "float gl_PointSize;");
3487 }
3488
3489 if (emitted_builtins.get(bit: BuiltInClipDistance))
3490 {
3491 auto itr = builtin_xfb_offsets.find(x: BuiltInClipDistance);
3492 if (itr != end(cont&: builtin_xfb_offsets))
3493 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3494 else
3495 statement(ts: "float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3496 }
3497
3498 if (emitted_builtins.get(bit: BuiltInCullDistance))
3499 {
3500 auto itr = builtin_xfb_offsets.find(x: BuiltInCullDistance);
3501 if (itr != end(cont&: builtin_xfb_offsets))
3502 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3503 else
3504 statement(ts: "float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3505 }
3506
3507 bool builtin_array = model == ExecutionModelTessellationControl ||
3508 (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
3509 (model == ExecutionModelGeometry && storage == StorageClassInput) ||
3510 (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
3511
3512 if (builtin_array)
3513 {
3514 const char *instance_name;
3515 if (model == ExecutionModelMeshEXT)
3516 instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
3517 else
3518 instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
3519
3520 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3521 end_scope_decl(decl: join(ts&: instance_name, ts: "[", ts&: get_entry_point().output_vertices, ts: "]"));
3522 else
3523 end_scope_decl(decl: join(ts&: instance_name, ts: "[]"));
3524 }
3525 else
3526 end_scope_decl();
3527 statement(ts: "");
3528}
3529
3530bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3531{
3532 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3533
3534 if (statically_assigned)
3535 {
3536 auto *constant = maybe_get<SPIRConstant>(id: var.static_expression);
3537 if (constant && constant->is_used_as_lut)
3538 return true;
3539 }
3540
3541 return false;
3542}
3543
3544void CompilerGLSL::emit_resources()
3545{
3546 auto &execution = get_entry_point();
3547
3548 replace_illegal_names();
3549
3550 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3551 // with builtins.
3552 if (execution.model == ExecutionModelFragment && is_legacy())
3553 replace_fragment_outputs();
3554
3555 // Emit PLS blocks if we have such variables.
3556 if (!pls_inputs.empty() || !pls_outputs.empty())
3557 emit_pls();
3558
3559 switch (execution.model)
3560 {
3561 case ExecutionModelGeometry:
3562 case ExecutionModelTessellationControl:
3563 case ExecutionModelTessellationEvaluation:
3564 case ExecutionModelMeshEXT:
3565 fixup_implicit_builtin_block_names(model: execution.model);
3566 break;
3567
3568 default:
3569 break;
3570 }
3571
3572 bool global_invariant_position = position_invariant && (options.es || options.version >= 120);
3573
3574 // Emit custom gl_PerVertex for SSO compatibility.
3575 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3576 {
3577 switch (execution.model)
3578 {
3579 case ExecutionModelGeometry:
3580 case ExecutionModelTessellationControl:
3581 case ExecutionModelTessellationEvaluation:
3582 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3583 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3584 global_invariant_position = false;
3585 break;
3586
3587 case ExecutionModelVertex:
3588 case ExecutionModelMeshEXT:
3589 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3590 global_invariant_position = false;
3591 break;
3592
3593 default:
3594 break;
3595 }
3596 }
3597 else if (should_force_emit_builtin_block(storage: StorageClassOutput))
3598 {
3599 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3600 global_invariant_position = false;
3601 }
3602 else if (execution.geometry_passthrough)
3603 {
3604 // Need to declare gl_in with Passthrough.
3605 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3606 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3607 }
3608 else
3609 {
3610 // Need to redeclare clip/cull distance with explicit size to use them.
3611 // SPIR-V mandates these builtins have a size declared.
3612 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3613 if (clip_distance_count != 0)
3614 statement(ts&: storage, ts: " float gl_ClipDistance[", ts&: clip_distance_count, ts: "];");
3615 if (cull_distance_count != 0)
3616 statement(ts&: storage, ts: " float gl_CullDistance[", ts&: cull_distance_count, ts: "];");
3617 if (clip_distance_count != 0 || cull_distance_count != 0)
3618 statement(ts: "");
3619 }
3620
3621 if (global_invariant_position)
3622 {
3623 statement(ts: "invariant gl_Position;");
3624 statement(ts: "");
3625 }
3626
3627 bool emitted = false;
3628
3629 // If emitted Vulkan GLSL,
3630 // emit specialization constants as actual floats,
3631 // spec op expressions will redirect to the constant name.
3632 //
3633 {
3634 auto loop_lock = ir.create_loop_hard_lock();
3635 for (auto &id_ : ir.ids_for_constant_undef_or_type)
3636 {
3637 auto &id = ir.ids[id_];
3638
3639 // Skip declaring any bogus constants or undefs which use block types.
3640 // We don't declare block types directly, so this will never work.
3641 // Should not be legal SPIR-V, so this is considered a workaround.
3642
3643 if (id.get_type() == TypeConstant)
3644 {
3645 auto &c = id.get<SPIRConstant>();
3646
3647 bool needs_declaration = c.specialization || c.is_used_as_lut;
3648
3649 if (needs_declaration)
3650 {
3651 if (!options.vulkan_semantics && c.specialization)
3652 {
3653 c.specialization_constant_macro_name =
3654 constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId));
3655 }
3656 emit_constant(constant: c);
3657 emitted = true;
3658 }
3659 }
3660 else if (id.get_type() == TypeConstantOp)
3661 {
3662 emit_specialization_constant_op(constant: id.get<SPIRConstantOp>());
3663 emitted = true;
3664 }
3665 else if (id.get_type() == TypeType)
3666 {
3667 auto *type = &id.get<SPIRType>();
3668
3669 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3670 (!has_decoration(id: type->self, decoration: DecorationBlock) &&
3671 !has_decoration(id: type->self, decoration: DecorationBufferBlock));
3672
3673 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3674 if (type->basetype == SPIRType::Struct && type->pointer &&
3675 has_decoration(id: type->self, decoration: DecorationBlock) &&
3676 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3677 type->storage == StorageClassHitAttributeKHR))
3678 {
3679 type = &get<SPIRType>(id: type->parent_type);
3680 is_natural_struct = true;
3681 }
3682
3683 if (is_natural_struct)
3684 {
3685 if (emitted)
3686 statement(ts: "");
3687 emitted = false;
3688
3689 emit_struct(type&: *type);
3690 }
3691 }
3692 else if (id.get_type() == TypeUndef)
3693 {
3694 auto &undef = id.get<SPIRUndef>();
3695 auto &type = this->get<SPIRType>(id: undef.basetype);
3696 // OpUndef can be void for some reason ...
3697 if (type.basetype == SPIRType::Void)
3698 continue;
3699
3700 // This will break. It is bogus and should not be legal.
3701 if (type_is_top_level_block(type))
3702 continue;
3703
3704 string initializer;
3705 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3706 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: undef.basetype));
3707
3708 // FIXME: If used in a constant, we must declare it as one.
3709 statement(ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";");
3710 emitted = true;
3711 }
3712 }
3713 }
3714
3715 if (emitted)
3716 statement(ts: "");
3717
3718 // If we needed to declare work group size late, check here.
3719 // If the work group size depends on a specialization constant, we need to declare the layout() block
3720 // after constants (and their macros) have been declared.
3721 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3722 (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId)))
3723 {
3724 SpecializationConstant wg_x, wg_y, wg_z;
3725 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
3726
3727 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3728 {
3729 SmallVector<string> inputs;
3730 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
3731 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
3732 statement(ts: "");
3733 }
3734 }
3735
3736 emitted = false;
3737
3738 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3739 {
3740 // Output buffer reference blocks.
3741 // Do this in two stages, one with forward declaration,
3742 // and one without. Buffer reference blocks can reference themselves
3743 // to support things like linked lists.
3744 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) {
3745 if (is_physical_pointer(type))
3746 {
3747 bool emit_type = true;
3748 if (!is_physical_pointer_to_buffer_block(type))
3749 {
3750 // Only forward-declare if we intend to emit it in the non_block_pointer types.
3751 // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
3752 emit_type = std::find(first: physical_storage_non_block_pointer_types.begin(),
3753 last: physical_storage_non_block_pointer_types.end(),
3754 val: id) != physical_storage_non_block_pointer_types.end();
3755 }
3756
3757 if (emit_type)
3758 emit_buffer_reference_block(type_id: id, forward_declaration: true);
3759 }
3760 });
3761
3762 for (auto type : physical_storage_non_block_pointer_types)
3763 emit_buffer_reference_block(type_id: type, forward_declaration: false);
3764
3765 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) {
3766 if (is_physical_pointer_to_buffer_block(type))
3767 emit_buffer_reference_block(type_id: id, forward_declaration: false);
3768 });
3769 }
3770
3771 // Output UBOs and SSBOs
3772 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3773 auto &type = this->get<SPIRType>(id: var.basetype);
3774
3775 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3776 type.storage == StorageClassShaderRecordBufferKHR;
3777 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
3778 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
3779
3780 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3781 has_block_flags)
3782 {
3783 emit_buffer_block(var);
3784 }
3785 });
3786
3787 // Output push constant blocks
3788 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3789 auto &type = this->get<SPIRType>(id: var.basetype);
3790 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3791 !is_hidden_variable(var))
3792 {
3793 emit_push_constant_block(var);
3794 }
3795 });
3796
3797 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3798
3799 // Output Uniform Constants (values, samplers, images, etc).
3800 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3801 auto &type = this->get<SPIRType>(id: var.basetype);
3802
3803 // If we're remapping separate samplers and images, only emit the combined samplers.
3804 if (skip_separate_image_sampler)
3805 {
3806 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3807 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3808 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3809 bool separate_sampler = type.basetype == SPIRType::Sampler;
3810 if (!sampler_buffer && (separate_image || separate_sampler))
3811 return;
3812 }
3813
3814 if (var.storage != StorageClassFunction && type.pointer &&
3815 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3816 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3817 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3818 type.storage == StorageClassHitAttributeKHR) &&
3819 !is_hidden_variable(var))
3820 {
3821 emit_uniform(var);
3822 emitted = true;
3823 }
3824 });
3825
3826 if (emitted)
3827 statement(ts: "");
3828 emitted = false;
3829
3830 bool emitted_base_instance = false;
3831
3832 // Output in/out interfaces.
3833 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3834 auto &type = this->get<SPIRType>(id: var.basetype);
3835
3836 bool is_hidden = is_hidden_variable(var);
3837
3838 // Unused output I/O variables might still be required to implement framebuffer fetch.
3839 if (var.storage == StorageClassOutput && !is_legacy() &&
3840 location_is_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)) != 0)
3841 {
3842 is_hidden = false;
3843 }
3844
3845 if (var.storage != StorageClassFunction && type.pointer &&
3846 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3847 interface_variable_exists_in_entry_point(id: var.self) && !is_hidden)
3848 {
3849 if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
3850 type.array.size() == 1)
3851 {
3852 SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
3853 }
3854 emit_interface_block(var);
3855 emitted = true;
3856 }
3857 else if (is_builtin_variable(var))
3858 {
3859 auto builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn));
3860 // For gl_InstanceIndex emulation on GLES, the API user needs to
3861 // supply this uniform.
3862
3863 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3864 if (!options.vulkan_semantics)
3865 {
3866 if (!emitted_base_instance &&
3867 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3868 (builtin == BuiltInBaseInstance)))
3869 {
3870 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3871 statement(ts: "#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3872 statement(ts: "#else");
3873 // A crude, but simple workaround which should be good enough for non-indirect draws.
3874 statement(ts: "uniform int SPIRV_Cross_BaseInstance;");
3875 statement(ts: "#endif");
3876 emitted = true;
3877 emitted_base_instance = true;
3878 }
3879 else if (builtin == BuiltInBaseVertex)
3880 {
3881 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3882 statement(ts: "#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3883 statement(ts: "#else");
3884 // A crude, but simple workaround which should be good enough for non-indirect draws.
3885 statement(ts: "uniform int SPIRV_Cross_BaseVertex;");
3886 statement(ts: "#endif");
3887 }
3888 else if (builtin == BuiltInDrawIndex)
3889 {
3890 statement(ts: "#ifndef GL_ARB_shader_draw_parameters");
3891 // Cannot really be worked around.
3892 statement(ts: "#error GL_ARB_shader_draw_parameters is not supported.");
3893 statement(ts: "#endif");
3894 }
3895 }
3896 }
3897 });
3898
3899 // Global variables.
3900 for (auto global : global_variables)
3901 {
3902 auto &var = get<SPIRVariable>(id: global);
3903 if (is_hidden_variable(var, include_builtins: true))
3904 continue;
3905
3906 if (var.storage != StorageClassOutput)
3907 {
3908 if (!variable_is_lut(var))
3909 {
3910 add_resource_name(id: var.self);
3911
3912 string initializer;
3913 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3914 !var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var)))
3915 {
3916 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var)));
3917 }
3918
3919 statement(ts: variable_decl(variable: var), ts&: initializer, ts: ";");
3920 emitted = true;
3921 }
3922 }
3923 else if (var.initializer && maybe_get<SPIRConstant>(id: var.initializer) != nullptr)
3924 {
3925 emit_output_variable_initializer(var);
3926 }
3927 }
3928
3929 if (emitted)
3930 statement(ts: "");
3931}
3932
3933void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3934{
3935 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3936 auto &entry_func = this->get<SPIRFunction>(id: ir.default_entry_point);
3937 auto &type = get<SPIRType>(id: var.basetype);
3938 bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch);
3939 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
3940 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3941
3942 if (is_block)
3943 {
3944 uint32_t member_count = uint32_t(type.member_types.size());
3945 bool type_is_array = type.array.size() == 1;
3946 uint32_t array_size = 1;
3947 if (type_is_array)
3948 array_size = to_array_size_literal(type);
3949 uint32_t iteration_count = is_control_point ? 1 : array_size;
3950
3951 // If the initializer is a block, we must initialize each block member one at a time.
3952 for (uint32_t i = 0; i < member_count; i++)
3953 {
3954 // These outputs might not have been properly declared, so don't initialize them in that case.
3955 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
3956 {
3957 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInCullDistance &&
3958 !cull_distance_count)
3959 continue;
3960
3961 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInClipDistance &&
3962 !clip_distance_count)
3963 continue;
3964 }
3965
3966 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3967 // This code path hits when we have an array of blocks.
3968 string lut_name;
3969 if (type_is_array)
3970 {
3971 lut_name = join(ts: "_", ts: var.self, ts: "_", ts&: i, ts: "_init");
3972 uint32_t member_type_id = get<SPIRType>(id: var.basetype).member_types[i];
3973 auto &member_type = get<SPIRType>(id: member_type_id);
3974 auto array_type = member_type;
3975 array_type.parent_type = member_type_id;
3976 array_type.op = OpTypeArray;
3977 array_type.array.push_back(t: array_size);
3978 array_type.array_size_literal.push_back(t: true);
3979
3980 SmallVector<string> exprs;
3981 exprs.reserve(count: array_size);
3982 auto &c = get<SPIRConstant>(id: var.initializer);
3983 for (uint32_t j = 0; j < array_size; j++)
3984 exprs.push_back(t: to_expression(id: get<SPIRConstant>(id: c.subconstants[j]).subconstants[i]));
3985 statement(ts: "const ", ts: type_to_glsl(type: array_type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type: array_type, variable_id: 0), ts: " = ",
3986 ts: type_to_glsl_constructor(type: array_type), ts: "(", ts: merge(list: exprs, between: ", "), ts: ");");
3987 }
3988
3989 for (uint32_t j = 0; j < iteration_count; j++)
3990 {
3991 entry_func.fixup_hooks_in.push_back(t: [=, &var]() {
3992 AccessChainMeta meta;
3993 auto &c = this->get<SPIRConstant>(id: var.initializer);
3994
3995 uint32_t invocation_id = 0;
3996 uint32_t member_index_id = 0;
3997 if (is_control_point)
3998 {
3999 uint32_t ids = ir.increase_bound_by(count: 3);
4000 auto &uint_type = set<SPIRType>(id: ids, args: OpTypeInt);
4001 uint_type.basetype = SPIRType::UInt;
4002 uint_type.width = 32;
4003 set<SPIRExpression>(id: ids + 1, args: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), args&: ids, args: true);
4004 set<SPIRConstant>(id: ids + 2, args&: ids, args: i, args: false);
4005 invocation_id = ids + 1;
4006 member_index_id = ids + 2;
4007 }
4008
4009 if (is_patch)
4010 {
4011 statement(ts: "if (gl_InvocationID == 0)");
4012 begin_scope();
4013 }
4014
4015 if (type_is_array && !is_control_point)
4016 {
4017 uint32_t indices[2] = { j, i };
4018 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
4019 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: j, ts: "];");
4020 }
4021 else if (is_control_point)
4022 {
4023 uint32_t indices[2] = { invocation_id, member_index_id };
4024 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: 0, meta: &meta);
4025 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), ts: "];");
4026 }
4027 else
4028 {
4029 auto chain =
4030 access_chain_internal(base: var.self, indices: &i, count: 1, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
4031 statement(ts&: chain, ts: " = ", ts: to_expression(id: c.subconstants[i]), ts: ";");
4032 }
4033
4034 if (is_patch)
4035 end_scope();
4036 });
4037 }
4038 }
4039 }
4040 else if (is_control_point)
4041 {
4042 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
4043 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type, variable_id: 0),
4044 ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
4045 entry_func.fixup_hooks_in.push_back(t: [&, lut_name]() {
4046 statement(ts: to_expression(id: var.self), ts: "[gl_InvocationID] = ", ts: lut_name, ts: "[gl_InvocationID];");
4047 });
4048 }
4049 else if (has_decoration(id: var.self, decoration: DecorationBuiltIn) &&
4050 BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)) == BuiltInSampleMask)
4051 {
4052 // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
4053 entry_func.fixup_hooks_in.push_back(t: [&] {
4054 auto &c = this->get<SPIRConstant>(id: var.initializer);
4055 uint32_t num_constants = uint32_t(c.subconstants.size());
4056 for (uint32_t i = 0; i < num_constants; i++)
4057 {
4058 // Don't use to_expression on constant since it might be uint, just fish out the raw int.
4059 statement(ts: to_expression(id: var.self), ts: "[", ts&: i, ts: "] = ",
4060 ts: convert_to_string(value: this->get<SPIRConstant>(id: c.subconstants[i]).scalar_i32()), ts: ";");
4061 }
4062 });
4063 }
4064 else
4065 {
4066 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
4067 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name,
4068 ts: type_to_array_glsl(type, variable_id: var.self), ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
4069 entry_func.fixup_hooks_in.push_back(t: [&, lut_name, is_patch]() {
4070 if (is_patch)
4071 {
4072 statement(ts: "if (gl_InvocationID == 0)");
4073 begin_scope();
4074 }
4075 statement(ts: to_expression(id: var.self), ts: " = ", ts: lut_name, ts: ";");
4076 if (is_patch)
4077 end_scope();
4078 });
4079 }
4080}
4081
4082void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
4083{
4084 std::string result;
4085 switch (group_op)
4086 {
4087 case GroupOperationReduce:
4088 result = "reduction";
4089 break;
4090
4091 case GroupOperationExclusiveScan:
4092 result = "excl_scan";
4093 break;
4094
4095 case GroupOperationInclusiveScan:
4096 result = "incl_scan";
4097 break;
4098
4099 default:
4100 SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
4101 }
4102
4103 struct TypeInfo
4104 {
4105 std::string type;
4106 std::string identity;
4107 };
4108
4109 std::vector<TypeInfo> type_infos;
4110 switch (op)
4111 {
4112 case OpGroupNonUniformIAdd:
4113 {
4114 type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "0u" });
4115 type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(0u)" });
4116 type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(0u)" });
4117 type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(0u)" });
4118 type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "0" });
4119 type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(0)" });
4120 type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(0)" });
4121 type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(0)" });
4122 break;
4123 }
4124
4125 case OpGroupNonUniformFAdd:
4126 {
4127 type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "0.0f" });
4128 type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(0.0f)" });
4129 type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(0.0f)" });
4130 type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(0.0f)" });
4131 // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
4132 type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF" });
4133 type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(0.0LF)" });
4134 type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(0.0LF)" });
4135 type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(0.0LF)" });
4136 break;
4137 }
4138
4139 case OpGroupNonUniformIMul:
4140 {
4141 type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "1u" });
4142 type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(1u)" });
4143 type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(1u)" });
4144 type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(1u)" });
4145 type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "1" });
4146 type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(1)" });
4147 type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(1)" });
4148 type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(1)" });
4149 break;
4150 }
4151
4152 case OpGroupNonUniformFMul:
4153 {
4154 type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "1.0f" });
4155 type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(1.0f)" });
4156 type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(1.0f)" });
4157 type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(1.0f)" });
4158 type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF" });
4159 type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(1.0LF)" });
4160 type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(1.0LF)" });
4161 type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(1.0LF)" });
4162 break;
4163 }
4164
4165 default:
4166 SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
4167 }
4168
4169 const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
4170 const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
4171 std::string op_symbol;
4172 if (op_is_addition)
4173 {
4174 op_symbol = "+=";
4175 }
4176 else if (op_is_multiplication)
4177 {
4178 op_symbol = "*=";
4179 }
4180
4181 for (const TypeInfo &t : type_infos)
4182 {
4183 statement(ts: t.type, ts: " ", ts: func, ts: "(", ts: t.type, ts: " v)");
4184 begin_scope();
4185 statement(ts: t.type, ts: " ", ts&: result, ts: " = ", ts: t.identity, ts: ";");
4186 statement(ts: "uvec4 active_threads = subgroupBallot(true);");
4187 statement(ts: "if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
4188 begin_scope();
4189 statement(ts: "uint total = gl_SubgroupSize / 2u;");
4190 statement(ts&: result, ts: " = v;");
4191 statement(ts: "for (uint i = 1u; i <= total; i <<= 1u)");
4192 begin_scope();
4193 statement(ts: "bool valid;");
4194 if (group_op == GroupOperationReduce)
4195 {
4196 statement(ts: t.type, ts: " s = shuffleXorNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);");
4197 }
4198 else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
4199 {
4200 statement(ts: t.type, ts: " s = shuffleUpNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);");
4201 }
4202 if (op_is_addition || op_is_multiplication)
4203 {
4204 statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";");
4205 }
4206 end_scope();
4207 if (group_op == GroupOperationExclusiveScan)
4208 {
4209 statement(ts&: result, ts: " = shuffleUpNV(", ts&: result, ts: ", 1u, gl_SubgroupSize);");
4210 statement(ts: "if (subgroupElect())");
4211 begin_scope();
4212 statement(ts&: result, ts: " = ", ts: t.identity, ts: ";");
4213 end_scope();
4214 }
4215 end_scope();
4216 statement(ts: "else");
4217 begin_scope();
4218 if (group_op == GroupOperationExclusiveScan)
4219 {
4220 statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
4221 }
4222 else if (group_op == GroupOperationInclusiveScan)
4223 {
4224 statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
4225 }
4226 statement(ts: "for (uint i = 0u; i < gl_SubgroupSize; ++i)");
4227 begin_scope();
4228 statement(ts: "bool valid = subgroupBallotBitExtract(active_threads, i);");
4229 statement(ts: t.type, ts: " s = shuffleNV(v, i, gl_SubgroupSize);");
4230 if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
4231 {
4232 statement(ts: "valid = valid && (i < total);");
4233 }
4234 if (op_is_addition || op_is_multiplication)
4235 {
4236 statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";");
4237 }
4238 end_scope();
4239 end_scope();
4240 statement(ts: "return ", ts&: result, ts: ";");
4241 end_scope();
4242 }
4243}
4244
4245void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
4246{
4247 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
4248 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
4249
4250 if (!options.vulkan_semantics)
4251 {
4252 using Supp = ShaderSubgroupSupportHelper;
4253 auto result = shader_subgroup_supporter.resolve();
4254
4255 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMask))
4256 {
4257 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupMask, r: result);
4258
4259 for (auto &e : exts)
4260 {
4261 const char *name = Supp::get_extension_name(c: e);
4262 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4263
4264 switch (e)
4265 {
4266 case Supp::NV_shader_thread_group:
4267 statement(ts: "#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
4268 statement(ts: "#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
4269 statement(ts: "#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
4270 statement(ts: "#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
4271 statement(ts: "#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
4272 break;
4273 case Supp::ARB_shader_ballot:
4274 statement(ts: "#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
4275 statement(ts: "#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
4276 statement(ts: "#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
4277 statement(ts: "#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
4278 statement(ts: "#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
4279 break;
4280 default:
4281 break;
4282 }
4283 }
4284 statement(ts: "#endif");
4285 statement(ts: "");
4286 }
4287
4288 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupSize))
4289 {
4290 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupSize, r: result);
4291
4292 for (auto &e : exts)
4293 {
4294 const char *name = Supp::get_extension_name(c: e);
4295 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4296
4297 switch (e)
4298 {
4299 case Supp::NV_shader_thread_group:
4300 statement(ts: "#define gl_SubgroupSize gl_WarpSizeNV");
4301 break;
4302 case Supp::ARB_shader_ballot:
4303 statement(ts: "#define gl_SubgroupSize gl_SubGroupSizeARB");
4304 break;
4305 case Supp::AMD_gcn_shader:
4306 statement(ts: "#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
4307 break;
4308 default:
4309 break;
4310 }
4311 }
4312 statement(ts: "#endif");
4313 statement(ts: "");
4314 }
4315
4316 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInvocationID))
4317 {
4318 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupInvocationID, r: result);
4319
4320 for (auto &e : exts)
4321 {
4322 const char *name = Supp::get_extension_name(c: e);
4323 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4324
4325 switch (e)
4326 {
4327 case Supp::NV_shader_thread_group:
4328 statement(ts: "#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
4329 break;
4330 case Supp::ARB_shader_ballot:
4331 statement(ts: "#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
4332 break;
4333 default:
4334 break;
4335 }
4336 }
4337 statement(ts: "#endif");
4338 statement(ts: "");
4339 }
4340
4341 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupID))
4342 {
4343 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupID, r: result);
4344
4345 for (auto &e : exts)
4346 {
4347 const char *name = Supp::get_extension_name(c: e);
4348 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4349
4350 switch (e)
4351 {
4352 case Supp::NV_shader_thread_group:
4353 statement(ts: "#define gl_SubgroupID gl_WarpIDNV");
4354 break;
4355 default:
4356 break;
4357 }
4358 }
4359 statement(ts: "#endif");
4360 statement(ts: "");
4361 }
4362
4363 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::NumSubgroups))
4364 {
4365 auto exts = Supp::get_candidates_for_feature(ft: Supp::NumSubgroups, r: result);
4366
4367 for (auto &e : exts)
4368 {
4369 const char *name = Supp::get_extension_name(c: e);
4370 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4371
4372 switch (e)
4373 {
4374 case Supp::NV_shader_thread_group:
4375 statement(ts: "#define gl_NumSubgroups gl_WarpsPerSMNV");
4376 break;
4377 default:
4378 break;
4379 }
4380 }
4381 statement(ts: "#endif");
4382 statement(ts: "");
4383 }
4384
4385 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBroadcast_First))
4386 {
4387 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBroadcast_First, r: result);
4388
4389 for (auto &e : exts)
4390 {
4391 const char *name = Supp::get_extension_name(c: e);
4392 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4393
4394 switch (e)
4395 {
4396 case Supp::NV_shader_thread_shuffle:
4397 for (const char *t : workaround_types)
4398 {
4399 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4400 ts: " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
4401 }
4402 for (const char *t : workaround_types)
4403 {
4404 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4405 ts: " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
4406 }
4407 break;
4408 case Supp::ARB_shader_ballot:
4409 for (const char *t : workaround_types)
4410 {
4411 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4412 ts: " value) { return readFirstInvocationARB(value); }");
4413 }
4414 for (const char *t : workaround_types)
4415 {
4416 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4417 ts: " value, uint id) { return readInvocationARB(value, id); }");
4418 }
4419 break;
4420 default:
4421 break;
4422 }
4423 }
4424 statement(ts: "#endif");
4425 statement(ts: "");
4426 }
4427
4428 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotFindLSB_MSB))
4429 {
4430 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallotFindLSB_MSB, r: result);
4431
4432 for (auto &e : exts)
4433 {
4434 const char *name = Supp::get_extension_name(c: e);
4435 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4436
4437 switch (e)
4438 {
4439 case Supp::NV_shader_thread_group:
4440 statement(ts: "uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
4441 statement(ts: "uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
4442 break;
4443 default:
4444 break;
4445 }
4446 }
4447 statement(ts: "#else");
4448 statement(ts: "uint subgroupBallotFindLSB(uvec4 value)");
4449 begin_scope();
4450 statement(ts: "int firstLive = findLSB(value.x);");
4451 statement(ts: "return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
4452 end_scope();
4453 statement(ts: "uint subgroupBallotFindMSB(uvec4 value)");
4454 begin_scope();
4455 statement(ts: "int firstLive = findMSB(value.y);");
4456 statement(ts: "return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
4457 end_scope();
4458 statement(ts: "#endif");
4459 statement(ts: "");
4460 }
4461
4462 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAll_Any_AllEqualBool))
4463 {
4464 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupAll_Any_AllEqualBool, r: result);
4465
4466 for (auto &e : exts)
4467 {
4468 const char *name = Supp::get_extension_name(c: e);
4469 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4470
4471 switch (e)
4472 {
4473 case Supp::NV_gpu_shader_5:
4474 statement(ts: "bool subgroupAll(bool value) { return allThreadsNV(value); }");
4475 statement(ts: "bool subgroupAny(bool value) { return anyThreadNV(value); }");
4476 statement(ts: "bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
4477 break;
4478 case Supp::ARB_shader_group_vote:
4479 statement(ts: "bool subgroupAll(bool v) { return allInvocationsARB(v); }");
4480 statement(ts: "bool subgroupAny(bool v) { return anyInvocationARB(v); }");
4481 statement(ts: "bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
4482 break;
4483 case Supp::AMD_gcn_shader:
4484 statement(ts: "bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4485 statement(ts: "bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4486 statement(ts: "bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4487 "b == ballotAMD(true); }");
4488 break;
4489 default:
4490 break;
4491 }
4492 }
4493 statement(ts: "#endif");
4494 statement(ts: "");
4495 }
4496
4497 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAllEqualT))
4498 {
4499 statement(ts: "#ifndef GL_KHR_shader_subgroup_vote");
4500 statement(
4501 ts: "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4502 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4503 for (const char *t : workaround_types)
4504 statement(ts: "_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", ts&: t, ts: ")");
4505 statement(ts: "#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4506 statement(ts: "#endif");
4507 statement(ts: "");
4508 }
4509
4510 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallot))
4511 {
4512 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallot, r: result);
4513
4514 for (auto &e : exts)
4515 {
4516 const char *name = Supp::get_extension_name(c: e);
4517 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4518
4519 switch (e)
4520 {
4521 case Supp::NV_shader_thread_group:
4522 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4523 break;
4524 case Supp::ARB_shader_ballot:
4525 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4526 break;
4527 default:
4528 break;
4529 }
4530 }
4531 statement(ts: "#endif");
4532 statement(ts: "");
4533 }
4534
4535 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupElect))
4536 {
4537 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4538 statement(ts: "bool subgroupElect()");
4539 begin_scope();
4540 statement(ts: "uvec4 activeMask = subgroupBallot(true);");
4541 statement(ts: "uint firstLive = subgroupBallotFindLSB(activeMask);");
4542 statement(ts: "return gl_SubgroupInvocationID == firstLive;");
4543 end_scope();
4544 statement(ts: "#endif");
4545 statement(ts: "");
4546 }
4547
4548 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBarrier))
4549 {
4550 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4551 // that subgroup execute in lockstep so this barrier is implicit.
4552 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4553 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
4554 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4555 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4556 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4557 statement(ts: "void subgroupBarrier() { memoryBarrierShared(); }");
4558 statement(ts: "#endif");
4559 statement(ts: "");
4560 }
4561
4562 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMemBarrier))
4563 {
4564 if (model == spv::ExecutionModelGLCompute)
4565 {
4566 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4567 statement(ts: "void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4568 statement(ts: "void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4569 statement(ts: "void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4570 statement(ts: "void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4571 statement(ts: "#endif");
4572 }
4573 else
4574 {
4575 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4576 statement(ts: "void subgroupMemoryBarrier() { memoryBarrier(); }");
4577 statement(ts: "void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4578 statement(ts: "void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4579 statement(ts: "#endif");
4580 }
4581 statement(ts: "");
4582 }
4583
4584 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4585 {
4586 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4587 statement(ts: "bool subgroupInverseBallot(uvec4 value)");
4588 begin_scope();
4589 statement(ts: "return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4590 end_scope();
4591
4592 statement(ts: "uint subgroupBallotInclusiveBitCount(uvec4 value)");
4593 begin_scope();
4594 statement(ts: "uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4595 statement(ts: "ivec2 c = bitCount(v);");
4596 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4597 statement(ts: "return uint(c.x);");
4598 statement_no_indent(ts: "#else");
4599 statement(ts: "return uint(c.x + c.y);");
4600 statement_no_indent(ts: "#endif");
4601 end_scope();
4602
4603 statement(ts: "uint subgroupBallotExclusiveBitCount(uvec4 value)");
4604 begin_scope();
4605 statement(ts: "uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4606 statement(ts: "ivec2 c = bitCount(v);");
4607 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4608 statement(ts: "return uint(c.x);");
4609 statement_no_indent(ts: "#else");
4610 statement(ts: "return uint(c.x + c.y);");
4611 statement_no_indent(ts: "#endif");
4612 end_scope();
4613 statement(ts: "#endif");
4614 statement(ts: "");
4615 }
4616
4617 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitCount))
4618 {
4619 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4620 statement(ts: "uint subgroupBallotBitCount(uvec4 value)");
4621 begin_scope();
4622 statement(ts: "ivec2 c = bitCount(value.xy);");
4623 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4624 statement(ts: "return uint(c.x);");
4625 statement_no_indent(ts: "#else");
4626 statement(ts: "return uint(c.x + c.y);");
4627 statement_no_indent(ts: "#endif");
4628 end_scope();
4629 statement(ts: "#endif");
4630 statement(ts: "");
4631 }
4632
4633 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitExtract))
4634 {
4635 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4636 statement(ts: "bool subgroupBallotBitExtract(uvec4 value, uint index)");
4637 begin_scope();
4638 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4639 statement(ts: "uint shifted = value.x >> index;");
4640 statement_no_indent(ts: "#else");
4641 statement(ts: "uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4642 statement_no_indent(ts: "#endif");
4643 statement(ts: "return (shifted & 1u) != 0u;");
4644 end_scope();
4645 statement(ts: "#endif");
4646 statement(ts: "");
4647 }
4648
4649 auto arithmetic_feature_helper =
4650 [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op)
4651 {
4652 if (shader_subgroup_supporter.is_feature_requested(feature: feat))
4653 {
4654 auto exts = Supp::get_candidates_for_feature(ft: feat, r: result);
4655 for (auto &e : exts)
4656 {
4657 const char *name = Supp::get_extension_name(c: e);
4658 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4659
4660 switch (e)
4661 {
4662 case Supp::NV_shader_thread_shuffle:
4663 emit_subgroup_arithmetic_workaround(func: func_name, op, group_op);
4664 break;
4665 default:
4666 break;
4667 }
4668 }
4669 statement(ts: "#endif");
4670 statement(ts: "");
4671 }
4672 };
4673
4674 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
4675 GroupOperationReduce);
4676 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
4677 OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
4678 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
4679 OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
4680 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
4681 GroupOperationReduce);
4682 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
4683 OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
4684 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
4685 OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
4686
4687 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
4688 GroupOperationReduce);
4689 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
4690 OpGroupNonUniformIMul, GroupOperationExclusiveScan);
4691 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
4692 OpGroupNonUniformIMul, GroupOperationInclusiveScan);
4693 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
4694 GroupOperationReduce);
4695 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
4696 OpGroupNonUniformFMul, GroupOperationExclusiveScan);
4697 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
4698 OpGroupNonUniformFMul, GroupOperationInclusiveScan);
4699 }
4700
4701 if (!workaround_ubo_load_overload_types.empty())
4702 {
4703 for (auto &type_id : workaround_ubo_load_overload_types)
4704 {
4705 auto &type = get<SPIRType>(id: type_id);
4706
4707 if (options.es && is_matrix(type))
4708 {
4709 // Need both variants.
4710 // GLSL cannot overload on precision, so need to dispatch appropriately.
4711 statement(ts: "highp ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(highp ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4712 statement(ts: "mediump ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajorMP(mediump ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4713 }
4714 else
4715 {
4716 statement(ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4717 }
4718 }
4719 statement(ts: "");
4720 }
4721}
4722
4723void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
4724{
4725 const char *qual = "";
4726 const char *suffix = (options.es && relaxed) ? "MP" : "";
4727 if (options.es)
4728 qual = relaxed ? "mediump " : "highp ";
4729
4730 if (polyfills & PolyfillTranspose2x2)
4731 {
4732 statement(ts&: qual, ts: "mat2 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4733 begin_scope();
4734 statement(ts: "return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4735 end_scope();
4736 statement(ts: "");
4737 }
4738
4739 if (polyfills & PolyfillTranspose3x3)
4740 {
4741 statement(ts&: qual, ts: "mat3 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4742 begin_scope();
4743 statement(ts: "return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4744 end_scope();
4745 statement(ts: "");
4746 }
4747
4748 if (polyfills & PolyfillTranspose4x4)
4749 {
4750 statement(ts&: qual, ts: "mat4 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4751 begin_scope();
4752 statement(ts: "return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4753 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4754 end_scope();
4755 statement(ts: "");
4756 }
4757
4758 if (polyfills & PolyfillDeterminant2x2)
4759 {
4760 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4761 begin_scope();
4762 statement(ts: "return m[0][0] * m[1][1] - m[0][1] * m[1][0];");
4763 end_scope();
4764 statement(ts: "");
4765 }
4766
4767 if (polyfills & PolyfillDeterminant3x3)
4768 {
4769 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4770 begin_scope();
4771 statement(ts: "return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], "
4772 "m[1][2] * m[2][0] - m[1][0] * m[2][2], "
4773 "m[1][0] * m[2][1] - m[1][1] * m[2][0]));");
4774 end_scope();
4775 statement(ts: "");
4776 }
4777
4778 if (polyfills & PolyfillDeterminant4x4)
4779 {
4780 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4781 begin_scope();
4782 statement(ts: "return dot(m[0], vec4("
4783 "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
4784 "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
4785 "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
4786 "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));");
4787 end_scope();
4788 statement(ts: "");
4789 }
4790
4791 if (polyfills & PolyfillMatrixInverse2x2)
4792 {
4793 statement(ts&: qual, ts: "mat2 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4794 begin_scope();
4795 statement(ts: "return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) "
4796 "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));");
4797 end_scope();
4798 statement(ts: "");
4799 }
4800
4801 if (polyfills & PolyfillMatrixInverse3x3)
4802 {
4803 statement(ts&: qual, ts: "mat3 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4804 begin_scope();
4805 statement(ts&: qual, ts: "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);");
4806 statement(ts: "return mat3(t[0], "
4807 "m[0][2] * m[2][1] - m[0][1] * m[2][2], "
4808 "m[0][1] * m[1][2] - m[0][2] * m[1][1], "
4809 "t[1], "
4810 "m[0][0] * m[2][2] - m[0][2] * m[2][0], "
4811 "m[0][2] * m[1][0] - m[0][0] * m[1][2], "
4812 "t[2], "
4813 "m[0][1] * m[2][0] - m[0][0] * m[2][1], "
4814 "m[0][0] * m[1][1] - m[0][1] * m[1][0]) "
4815 "* (1.0 / dot(m[0], t));");
4816 end_scope();
4817 statement(ts: "");
4818 }
4819
4820 if (polyfills & PolyfillMatrixInverse4x4)
4821 {
4822 statement(ts&: qual, ts: "mat4 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4823 begin_scope();
4824 statement(ts&: qual, ts: "vec4 t = vec4("
4825 "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
4826 "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
4827 "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
4828 "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);");
4829 statement(ts: "return mat4("
4830 "t[0], "
4831 "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], "
4832 "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], "
4833 "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], "
4834 "t[1], "
4835 "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], "
4836 "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], "
4837 "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], "
4838 "t[2], "
4839 "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], "
4840 "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], "
4841 "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], "
4842 "t[3], "
4843 "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], "
4844 "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], "
4845 "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) "
4846 "* (1.0 / dot(m[0], t));");
4847 end_scope();
4848 statement(ts: "");
4849 }
4850
4851 if (!relaxed)
4852 {
4853 static const Polyfill polys[3][3] = {
4854 { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
4855 { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
4856 { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
4857 };
4858
4859 static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp };
4860 static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
4861 bool has_poly = false;
4862
4863 for (uint32_t i = 0; i < 3; i++)
4864 {
4865 for (uint32_t j = 0; j < 3; j++)
4866 {
4867 if ((polyfills & polys[i][j]) == 0)
4868 continue;
4869
4870 const char *types[3][4] = {
4871 { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
4872 { "float", "vec2", "vec3", "vec4" },
4873 { "double", "dvec2", "dvec3", "dvec4" },
4874 };
4875
4876 for (uint32_t k = 0; k < 4; k++)
4877 {
4878 auto *type = types[j][k];
4879
4880 if (i < 2)
4881 {
4882 statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ",
4883 ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ");");
4884 }
4885 else
4886 {
4887 statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ",
4888 ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ", ", ts&: type, ts: ");");
4889 }
4890
4891 has_poly = true;
4892 }
4893 }
4894 }
4895
4896 if (has_poly)
4897 statement(ts: "");
4898 }
4899 else
4900 {
4901 // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump
4902 // propagation.
4903
4904 static const Polyfill polys[3][3] = {
4905 { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
4906 { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
4907 { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
4908 };
4909
4910 static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
4911
4912 for (uint32_t i = 0; i < 3; i++)
4913 {
4914 for (uint32_t j = 0; j < 3; j++)
4915 {
4916 if ((polyfills & polys[i][j]) == 0)
4917 continue;
4918
4919 const char *types[3][4] = {
4920 { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
4921 { "float", "vec2", "vec3", "vec4" },
4922 { "double", "dvec2", "dvec3", "dvec4" },
4923 };
4924
4925 for (uint32_t k = 0; k < 4; k++)
4926 {
4927 auto *type = types[j][k];
4928
4929 if (i < 2)
4930 {
4931 statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(",
4932 ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b)");
4933 begin_scope();
4934 statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b);");
4935 statement(ts: "return res;");
4936 end_scope();
4937 statement(ts: "");
4938 }
4939 else
4940 {
4941 statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(",
4942 ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b, mediump ", ts&: type, ts: " c)");
4943 begin_scope();
4944 statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b, c);");
4945 statement(ts: "return res;");
4946 end_scope();
4947 statement(ts: "");
4948 }
4949 }
4950 }
4951 }
4952 }
4953}
4954
4955// Returns a string representation of the ID, usable as a function arg.
4956// Default is to simply return the expression representation fo the arg ID.
4957// Subclasses may override to modify the return value.
4958string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4959{
4960 // Make sure that we use the name of the original variable, and not the parameter alias.
4961 uint32_t name_id = id;
4962 auto *var = maybe_get<SPIRVariable>(id);
4963 if (var && var->basevariable)
4964 name_id = var->basevariable;
4965 return to_expression(id: name_id);
4966}
4967
4968void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
4969{
4970 auto res = forced_temporaries.insert(x: id);
4971
4972 // Forcing new temporaries guarantees forward progress.
4973 if (res.second)
4974 force_recompile_guarantee_forward_progress();
4975 else
4976 force_recompile();
4977}
4978
4979uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
4980{
4981 // Constants do not have innate precision.
4982 auto handle_type = ir.ids[id].get_type();
4983 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
4984 return id;
4985
4986 // Ignore anything that isn't 32-bit values.
4987 auto &type = get<SPIRType>(id: type_id);
4988 if (type.pointer)
4989 return id;
4990 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
4991 return id;
4992
4993 if (precision == Options::DontCare)
4994 {
4995 // If precision is consumed as don't care (operations only consisting of constants),
4996 // we need to bind the expression to a temporary,
4997 // otherwise we have no way of controlling the precision later.
4998 auto itr = forced_temporaries.insert(x: id);
4999 if (itr.second)
5000 force_recompile_guarantee_forward_progress();
5001 return id;
5002 }
5003
5004 auto current_precision = has_decoration(id, decoration: DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
5005 if (current_precision == precision)
5006 return id;
5007
5008 auto itr = temporary_to_mirror_precision_alias.find(x: id);
5009 if (itr == temporary_to_mirror_precision_alias.end())
5010 {
5011 uint32_t alias_id = ir.increase_bound_by(count: 1);
5012 auto &m = ir.meta[alias_id];
5013 if (auto *input_m = ir.find_meta(id))
5014 m = *input_m;
5015
5016 const char *prefix;
5017 if (precision == Options::Mediump)
5018 {
5019 set_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
5020 prefix = "mp_copy_";
5021 }
5022 else
5023 {
5024 unset_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
5025 prefix = "hp_copy_";
5026 }
5027
5028 auto alias_name = join(ts&: prefix, ts: to_name(id));
5029 ParsedIR::sanitize_underscores(str&: alias_name);
5030 set_name(id: alias_id, name: alias_name);
5031
5032 emit_op(result_type: type_id, result_id: alias_id, rhs: to_expression(id), forward_rhs: true);
5033 temporary_to_mirror_precision_alias[id] = alias_id;
5034 forced_temporaries.insert(x: id);
5035 forced_temporaries.insert(x: alias_id);
5036 force_recompile_guarantee_forward_progress();
5037 id = alias_id;
5038 }
5039 else
5040 {
5041 id = itr->second;
5042 }
5043
5044 return id;
5045}
5046
5047void CompilerGLSL::handle_invalid_expression(uint32_t id)
5048{
5049 // We tried to read an invalidated expression.
5050 // This means we need another pass at compilation, but next time,
5051 // force temporary variables so that they cannot be invalidated.
5052 force_temporary_and_recompile(id);
5053
5054 // If the invalid expression happened as a result of a CompositeInsert
5055 // overwrite, we must block this from happening next iteration.
5056 if (composite_insert_overwritten.count(x: id))
5057 block_composite_insert_overwrite.insert(x: id);
5058}
5059
5060// Converts the format of the current expression from packed to unpacked,
5061// by wrapping the expression in a constructor of the appropriate type.
5062// GLSL does not support packed formats, so simply return the expression.
5063// Subclasses that do will override.
5064string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
5065{
5066 return expr_str;
5067}
5068
5069// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
5070void CompilerGLSL::strip_enclosed_expression(string &expr)
5071{
5072 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
5073 return;
5074
5075 // Have to make sure that our first and last parens actually enclose everything inside it.
5076 uint32_t paren_count = 0;
5077 for (auto &c : expr)
5078 {
5079 if (c == '(')
5080 paren_count++;
5081 else if (c == ')')
5082 {
5083 paren_count--;
5084
5085 // If we hit 0 and this is not the final char, our first and final parens actually don't
5086 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
5087 if (paren_count == 0 && &c != &expr.back())
5088 return;
5089 }
5090 }
5091 expr.erase(pos: expr.size() - 1, n: 1);
5092 expr.erase(position: begin(cont&: expr));
5093}
5094
5095bool CompilerGLSL::needs_enclose_expression(const std::string &expr)
5096{
5097 bool need_parens = false;
5098
5099 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
5100 // unary expressions.
5101 if (!expr.empty())
5102 {
5103 auto c = expr.front();
5104 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
5105 need_parens = true;
5106 }
5107
5108 if (!need_parens)
5109 {
5110 uint32_t paren_count = 0;
5111 for (auto c : expr)
5112 {
5113 if (c == '(' || c == '[')
5114 paren_count++;
5115 else if (c == ')' || c == ']')
5116 {
5117 assert(paren_count);
5118 paren_count--;
5119 }
5120 else if (c == ' ' && paren_count == 0)
5121 {
5122 need_parens = true;
5123 break;
5124 }
5125 }
5126 assert(paren_count == 0);
5127 }
5128
5129 return need_parens;
5130}
5131
5132string CompilerGLSL::enclose_expression(const string &expr)
5133{
5134 // If this expression contains any spaces which are not enclosed by parentheses,
5135 // we need to enclose it so we can treat the whole string as an expression.
5136 // This happens when two expressions have been part of a binary op earlier.
5137 if (needs_enclose_expression(expr))
5138 return join(ts: '(', ts: expr, ts: ')');
5139 else
5140 return expr;
5141}
5142
5143string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
5144{
5145 // If this expression starts with an address-of operator ('&'), then
5146 // just return the part after the operator.
5147 // TODO: Strip parens if unnecessary?
5148 if (expr.front() == '&')
5149 return expr.substr(pos: 1);
5150 else if (backend.native_pointers)
5151 return join(ts: '*', ts: expr);
5152 else if (is_physical_pointer(type: expr_type) && !is_physical_pointer_to_buffer_block(type: expr_type))
5153 return join(ts: enclose_expression(expr), ts: ".value");
5154 else
5155 return expr;
5156}
5157
5158string CompilerGLSL::address_of_expression(const std::string &expr)
5159{
5160 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
5161 {
5162 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
5163 // the first two and last characters. We might have to enclose the expression.
5164 // This doesn't work for cases like (*foo + 10),
5165 // but this is an r-value expression which we cannot take the address of anyways.
5166 return enclose_expression(expr: expr.substr(pos: 2, n: expr.size() - 3));
5167 }
5168 else if (expr.front() == '*')
5169 {
5170 // If this expression starts with a dereference operator ('*'), then
5171 // just return the part after the operator.
5172 return expr.substr(pos: 1);
5173 }
5174 else
5175 return join(ts: '&', ts: enclose_expression(expr));
5176}
5177
5178// Just like to_expression except that we enclose the expression inside parentheses if needed.
5179string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
5180{
5181 return enclose_expression(expr: to_expression(id, register_expression_read));
5182}
5183
5184// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
5185// need_transpose must be forced to false.
5186string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
5187{
5188 return unpack_expression_type(expr_str: to_expression(id), expression_type(id),
5189 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
5190 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), true);
5191}
5192
5193string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
5194{
5195 // If we need to transpose, it will also take care of unpacking rules.
5196 auto *e = maybe_get<SPIRExpression>(id);
5197 bool need_transpose = e && e->need_transpose;
5198 bool is_remapped = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
5199 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
5200
5201 if (!need_transpose && (is_remapped || is_packed))
5202 {
5203 return unpack_expression_type(expr_str: to_expression(id, register_expression_read),
5204 get_pointee_type(type_id: expression_type_id(id)),
5205 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
5206 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), false);
5207 }
5208 else
5209 return to_expression(id, register_expression_read);
5210}
5211
5212string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
5213{
5214 return enclose_expression(expr: to_unpacked_expression(id, register_expression_read));
5215}
5216
5217string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
5218{
5219 auto &type = expression_type(id);
5220
5221 if (is_pointer(type) && should_dereference(id))
5222 return dereference_expression(expr_type: type, expr: to_enclosed_expression(id, register_expression_read));
5223 else
5224 return to_expression(id, register_expression_read);
5225}
5226
5227string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
5228{
5229 auto &type = expression_type(id);
5230 if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
5231 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
5232 else
5233 return to_unpacked_expression(id, register_expression_read);
5234}
5235
5236string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
5237{
5238 auto &type = expression_type(id);
5239 if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
5240 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
5241 else
5242 return to_enclosed_unpacked_expression(id, register_expression_read);
5243}
5244
5245string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
5246{
5247 auto expr = to_enclosed_expression(id);
5248 if (has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked))
5249 return join(ts&: expr, ts: "[", ts&: index, ts: "]");
5250 else
5251 return join(ts&: expr, ts: ".", ts: index_to_swizzle(index));
5252}
5253
5254string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
5255 const uint32_t *chain, uint32_t length)
5256{
5257 // It is kinda silly if application actually enter this path since they know the constant up front.
5258 // It is useful here to extract the plain constant directly.
5259 SPIRConstant tmp;
5260 tmp.constant_type = result_type;
5261 auto &composite_type = get<SPIRType>(id: c.constant_type);
5262 assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
5263 assert(!c.specialization);
5264
5265 if (is_matrix(type: composite_type))
5266 {
5267 if (length == 2)
5268 {
5269 tmp.m.c[0].vecsize = 1;
5270 tmp.m.columns = 1;
5271 tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
5272 }
5273 else
5274 {
5275 assert(length == 1);
5276 tmp.m.c[0].vecsize = composite_type.vecsize;
5277 tmp.m.columns = 1;
5278 tmp.m.c[0] = c.m.c[chain[0]];
5279 }
5280 }
5281 else
5282 {
5283 assert(length == 1);
5284 tmp.m.c[0].vecsize = 1;
5285 tmp.m.columns = 1;
5286 tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
5287 }
5288
5289 return constant_expression(c: tmp);
5290}
5291
5292string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type,
5293 const string &base_expr, const SPIRType &type)
5294{
5295 bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
5296 type.basetype == SPIRType::Boolean &&
5297 backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
5298
5299 SPIRType tmp_type { OpNop };
5300 if (remapped_boolean)
5301 {
5302 tmp_type = get<SPIRType>(id: type.parent_type);
5303 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5304 }
5305 else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean)
5306 {
5307 // It's possible that we have an r-value expression that was OpLoaded from a struct.
5308 // We have to reroll this and explicitly cast the input to bool, because the r-value is short.
5309 tmp_type = get<SPIRType>(id: type.parent_type);
5310 remapped_boolean = true;
5311 }
5312
5313 uint32_t size = to_array_size_literal(type);
5314 auto &parent = get<SPIRType>(id: type.parent_type);
5315 string expr = "{ ";
5316
5317 for (uint32_t i = 0; i < size; i++)
5318 {
5319 auto subexpr = join(ts: base_expr, ts: "[", ts: convert_to_string(t: i), ts: "]");
5320 if (!is_array(type: parent))
5321 {
5322 if (remapped_boolean)
5323 subexpr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: subexpr, ts: ")");
5324 expr += subexpr;
5325 }
5326 else
5327 expr += to_rerolled_array_expression(parent_type, base_expr: subexpr, type: parent);
5328
5329 if (i + 1 < size)
5330 expr += ", ";
5331 }
5332
5333 expr += " }";
5334 return expr;
5335}
5336
5337string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type)
5338{
5339 auto &type = expression_type(id);
5340
5341 bool reroll_array = false;
5342 bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
5343 type.basetype == SPIRType::Boolean &&
5344 backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
5345
5346 if (is_array(type))
5347 {
5348 reroll_array = !backend.array_is_value_type ||
5349 (block_like_type && !backend.array_is_value_type_in_buffer_blocks);
5350
5351 if (remapped_boolean)
5352 {
5353 // Forced to reroll if we have to change bool[] to short[].
5354 reroll_array = true;
5355 }
5356 }
5357
5358 if (reroll_array)
5359 {
5360 // For this case, we need to "re-roll" an array initializer from a temporary.
5361 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
5362 // participate in a struct initializer. E.g.
5363 // float arr[2] = { 1.0, 2.0 };
5364 // Foo foo = { arr }; must be transformed to
5365 // Foo foo = { { arr[0], arr[1] } };
5366 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
5367
5368 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
5369 // as temporaries anyways.
5370 return to_rerolled_array_expression(parent_type, base_expr: to_enclosed_expression(id), type);
5371 }
5372 else
5373 {
5374 auto expr = to_unpacked_expression(id);
5375 if (remapped_boolean)
5376 {
5377 auto tmp_type = type;
5378 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5379 expr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: expr, ts: ")");
5380 }
5381
5382 return expr;
5383 }
5384}
5385
5386string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
5387{
5388 string expr = to_expression(id);
5389
5390 if (has_decoration(id, decoration: DecorationNonUniform))
5391 convert_non_uniform_expression(expr, ptr_id: id);
5392
5393 return expr;
5394}
5395
5396string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
5397{
5398 auto itr = invalid_expressions.find(x: id);
5399 if (itr != end(cont&: invalid_expressions))
5400 handle_invalid_expression(id);
5401
5402 if (ir.ids[id].get_type() == TypeExpression)
5403 {
5404 // We might have a more complex chain of dependencies.
5405 // A possible scenario is that we
5406 //
5407 // %1 = OpLoad
5408 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
5409 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
5410 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
5411 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
5412 //
5413 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
5414 // and see that we should not forward reads of the original variable.
5415 auto &expr = get<SPIRExpression>(id);
5416 for (uint32_t dep : expr.expression_dependencies)
5417 if (invalid_expressions.find(x: dep) != end(cont&: invalid_expressions))
5418 handle_invalid_expression(id: dep);
5419 }
5420
5421 if (register_expression_read)
5422 track_expression_read(id);
5423
5424 switch (ir.ids[id].get_type())
5425 {
5426 case TypeExpression:
5427 {
5428 auto &e = get<SPIRExpression>(id);
5429 if (e.base_expression)
5430 return to_enclosed_expression(id: e.base_expression) + e.expression;
5431 else if (e.need_transpose)
5432 {
5433 // This should not be reached for access chains, since we always deal explicitly with transpose state
5434 // when consuming an access chain expression.
5435 uint32_t physical_type_id = get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
5436 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
5437 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
5438 return convert_row_major_matrix(exp_str: e.expression, exp_type: get<SPIRType>(id: e.expression_type), physical_type_id,
5439 is_packed, relaxed);
5440 }
5441 else if (flattened_structs.count(x: id))
5442 {
5443 return load_flattened_struct(basename: e.expression, type: get<SPIRType>(id: e.expression_type));
5444 }
5445 else
5446 {
5447 if (is_forcing_recompilation())
5448 {
5449 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
5450 // Avoid this by returning dummy expressions during this phase.
5451 // Do not use empty expressions here, because those are sentinels for other cases.
5452 return "_";
5453 }
5454 else
5455 return e.expression;
5456 }
5457 }
5458
5459 case TypeConstant:
5460 {
5461 auto &c = get<SPIRConstant>(id);
5462 auto &type = get<SPIRType>(id: c.constant_type);
5463
5464 // WorkGroupSize may be a constant.
5465 if (has_decoration(id: c.self, decoration: DecorationBuiltIn))
5466 return builtin_to_glsl(builtin: BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)), storage: StorageClassGeneric);
5467 else if (c.specialization)
5468 {
5469 if (backend.workgroup_size_is_hidden)
5470 {
5471 int wg_index = get_constant_mapping_to_workgroup_component(c);
5472 if (wg_index >= 0)
5473 {
5474 auto wg_size = join(ts: builtin_to_glsl(builtin: BuiltInWorkgroupSize, storage: StorageClassInput), ts: vector_swizzle(vecsize: 1, index: wg_index));
5475 if (type.basetype != SPIRType::UInt)
5476 wg_size = bitcast_expression(target_type: type, expr_type: SPIRType::UInt, expr: wg_size);
5477 return wg_size;
5478 }
5479 }
5480
5481 if (expression_is_forwarded(id))
5482 return constant_expression(c);
5483
5484 return to_name(id);
5485 }
5486 else if (c.is_used_as_lut)
5487 return to_name(id);
5488 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
5489 return to_name(id);
5490 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
5491 return to_name(id);
5492 else
5493 return constant_expression(c);
5494 }
5495
5496 case TypeConstantOp:
5497 return to_name(id);
5498
5499 case TypeVariable:
5500 {
5501 auto &var = get<SPIRVariable>(id);
5502 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
5503 // the variable has not been declared yet.
5504 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
5505 {
5506 // We might try to load from a loop variable before it has been initialized.
5507 // Prefer static expression and fallback to initializer.
5508 if (var.static_expression)
5509 return to_expression(id: var.static_expression);
5510 else if (var.initializer)
5511 return to_expression(id: var.initializer);
5512 else
5513 {
5514 // We cannot declare the variable yet, so have to fake it.
5515 uint32_t undef_id = ir.increase_bound_by(count: 1);
5516 return emit_uninitialized_temporary_expression(type: get_variable_data_type_id(var), id: undef_id).expression;
5517 }
5518 }
5519 else if (var.deferred_declaration)
5520 {
5521 var.deferred_declaration = false;
5522 return variable_decl(variable: var);
5523 }
5524 else if (flattened_structs.count(x: id))
5525 {
5526 return load_flattened_struct(basename: to_name(id), type: get<SPIRType>(id: var.basetype));
5527 }
5528 else
5529 {
5530 auto &dec = ir.meta[var.self].decoration;
5531 if (dec.builtin)
5532 return builtin_to_glsl(builtin: dec.builtin_type, storage: var.storage);
5533 else
5534 return to_name(id);
5535 }
5536 }
5537
5538 case TypeCombinedImageSampler:
5539 // This type should never be taken the expression of directly.
5540 // The intention is that texture sampling functions will extract the image and samplers
5541 // separately and take their expressions as needed.
5542 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
5543 // expression ala sampler2D(texture, sampler).
5544 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
5545
5546 case TypeAccessChain:
5547 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
5548 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
5549
5550 default:
5551 return to_name(id);
5552 }
5553}
5554
5555SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
5556{
5557 if (auto *constant = maybe_get<SPIRConstant>(id: const_id))
5558 {
5559 const auto &type = get<SPIRType>(id: constant->constant_type);
5560 if (is_array(type) || type.basetype == SPIRType::Struct)
5561 return constant->subconstants;
5562 if (is_matrix(type))
5563 return SmallVector<ConstantID>(constant->m.id);
5564 if (is_vector(type))
5565 return SmallVector<ConstantID>(constant->m.c[0].id);
5566 SPIRV_CROSS_THROW("Unexpected scalar constant!");
5567 }
5568 if (!const_composite_insert_ids.count(x: const_id))
5569 SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
5570 return const_composite_insert_ids[const_id];
5571}
5572
5573void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
5574 const SmallVector<ConstantID> &initializers)
5575{
5576 auto &type = get<SPIRType>(id: type_id);
5577 constant.specialization = true;
5578 if (is_array(type) || type.basetype == SPIRType::Struct)
5579 {
5580 constant.subconstants = initializers;
5581 }
5582 else if (is_matrix(type))
5583 {
5584 constant.m.columns = type.columns;
5585 for (uint32_t i = 0; i < type.columns; ++i)
5586 {
5587 constant.m.id[i] = initializers[i];
5588 constant.m.c[i].vecsize = type.vecsize;
5589 }
5590 }
5591 else if (is_vector(type))
5592 {
5593 constant.m.c[0].vecsize = type.vecsize;
5594 for (uint32_t i = 0; i < type.vecsize; ++i)
5595 constant.m.c[0].id[i] = initializers[i];
5596 }
5597 else
5598 SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
5599}
5600
5601void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
5602 const SmallVector<ConstantID> &initializers)
5603{
5604 if (maybe_get<SPIRConstantOp>(id: const_id))
5605 {
5606 const_composite_insert_ids[const_id] = initializers;
5607 return;
5608 }
5609
5610 auto &constant = set<SPIRConstant>(id: const_id, args&: type_id);
5611 fill_composite_constant(constant, type_id, initializers);
5612 forwarded_temporaries.insert(x: const_id);
5613}
5614
5615TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
5616{
5617 auto &type = get<SPIRType>(id: type_id);
5618 if (is_array(type))
5619 return type.parent_type;
5620 if (type.basetype == SPIRType::Struct)
5621 return type.member_types[member_idx];
5622 if (is_matrix(type))
5623 return type.parent_type;
5624 if (is_vector(type))
5625 return type.parent_type;
5626 SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
5627}
5628
5629string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
5630{
5631 auto &type = get<SPIRType>(id: cop.basetype);
5632 bool binary = false;
5633 bool unary = false;
5634 string op;
5635
5636 if (is_legacy() && is_unsigned_opcode(op: cop.opcode))
5637 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
5638
5639 // TODO: Find a clean way to reuse emit_instruction.
5640 switch (cop.opcode)
5641 {
5642 case OpSConvert:
5643 case OpUConvert:
5644 case OpFConvert:
5645 op = type_to_glsl_constructor(type);
5646 break;
5647
5648#define GLSL_BOP(opname, x) \
5649 case Op##opname: \
5650 binary = true; \
5651 op = x; \
5652 break
5653
5654#define GLSL_UOP(opname, x) \
5655 case Op##opname: \
5656 unary = true; \
5657 op = x; \
5658 break
5659
5660 GLSL_UOP(SNegate, "-");
5661 GLSL_UOP(Not, "~");
5662 GLSL_BOP(IAdd, "+");
5663 GLSL_BOP(ISub, "-");
5664 GLSL_BOP(IMul, "*");
5665 GLSL_BOP(SDiv, "/");
5666 GLSL_BOP(UDiv, "/");
5667 GLSL_BOP(UMod, "%");
5668 GLSL_BOP(SMod, "%");
5669 GLSL_BOP(ShiftRightLogical, ">>");
5670 GLSL_BOP(ShiftRightArithmetic, ">>");
5671 GLSL_BOP(ShiftLeftLogical, "<<");
5672 GLSL_BOP(BitwiseOr, "|");
5673 GLSL_BOP(BitwiseXor, "^");
5674 GLSL_BOP(BitwiseAnd, "&");
5675 GLSL_BOP(LogicalOr, "||");
5676 GLSL_BOP(LogicalAnd, "&&");
5677 GLSL_UOP(LogicalNot, "!");
5678 GLSL_BOP(LogicalEqual, "==");
5679 GLSL_BOP(LogicalNotEqual, "!=");
5680 GLSL_BOP(IEqual, "==");
5681 GLSL_BOP(INotEqual, "!=");
5682 GLSL_BOP(ULessThan, "<");
5683 GLSL_BOP(SLessThan, "<");
5684 GLSL_BOP(ULessThanEqual, "<=");
5685 GLSL_BOP(SLessThanEqual, "<=");
5686 GLSL_BOP(UGreaterThan, ">");
5687 GLSL_BOP(SGreaterThan, ">");
5688 GLSL_BOP(UGreaterThanEqual, ">=");
5689 GLSL_BOP(SGreaterThanEqual, ">=");
5690
5691 case OpSRem:
5692 {
5693 uint32_t op0 = cop.arguments[0];
5694 uint32_t op1 = cop.arguments[1];
5695 return join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(",
5696 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
5697 }
5698
5699 case OpSelect:
5700 {
5701 if (cop.arguments.size() < 3)
5702 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5703
5704 // This one is pretty annoying. It's triggered from
5705 // uint(bool), int(bool) from spec constants.
5706 // In order to preserve its compile-time constness in Vulkan GLSL,
5707 // we need to reduce the OpSelect expression back to this simplified model.
5708 // If we cannot, fail.
5709 if (to_trivial_mix_op(type, op, left: cop.arguments[2], right: cop.arguments[1], lerp: cop.arguments[0]))
5710 {
5711 // Implement as a simple cast down below.
5712 }
5713 else
5714 {
5715 // Implement a ternary and pray the compiler understands it :)
5716 return to_ternary_expression(result_type: type, select: cop.arguments[0], true_value: cop.arguments[1], false_value: cop.arguments[2]);
5717 }
5718 break;
5719 }
5720
5721 case OpVectorShuffle:
5722 {
5723 string expr = type_to_glsl_constructor(type);
5724 expr += "(";
5725
5726 uint32_t left_components = expression_type(id: cop.arguments[0]).vecsize;
5727 string left_arg = to_enclosed_expression(id: cop.arguments[0]);
5728 string right_arg = to_enclosed_expression(id: cop.arguments[1]);
5729
5730 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
5731 {
5732 uint32_t index = cop.arguments[i];
5733 if (index == 0xFFFFFFFF)
5734 {
5735 SPIRConstant c;
5736 c.constant_type = type.parent_type;
5737 assert(type.parent_type != ID(0));
5738 expr += constant_expression(c);
5739 }
5740 else if (index >= left_components)
5741 {
5742 expr += right_arg + "." + "xyzw"[index - left_components];
5743 }
5744 else
5745 {
5746 expr += left_arg + "." + "xyzw"[index];
5747 }
5748
5749 if (i + 1 < uint32_t(cop.arguments.size()))
5750 expr += ", ";
5751 }
5752
5753 expr += ")";
5754 return expr;
5755 }
5756
5757 case OpCompositeExtract:
5758 {
5759 auto expr = access_chain_internal(base: cop.arguments[0], indices: &cop.arguments[1], count: uint32_t(cop.arguments.size() - 1),
5760 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
5761 return expr;
5762 }
5763
5764 case OpCompositeInsert:
5765 {
5766 SmallVector<ConstantID> new_init = get_composite_constant_ids(const_id: cop.arguments[1]);
5767 uint32_t idx;
5768 uint32_t target_id = cop.self;
5769 uint32_t target_type_id = cop.basetype;
5770 // We have to drill down to the part we want to modify, and create new
5771 // constants for each containing part.
5772 for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
5773 {
5774 uint32_t new_const = ir.increase_bound_by(count: 1);
5775 uint32_t old_const = new_init[cop.arguments[idx]];
5776 new_init[cop.arguments[idx]] = new_const;
5777 set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init);
5778 new_init = get_composite_constant_ids(const_id: old_const);
5779 target_id = new_const;
5780 target_type_id = get_composite_member_type(type_id: target_type_id, member_idx: cop.arguments[idx]);
5781 }
5782 // Now replace the initializer with the one from this instruction.
5783 new_init[cop.arguments[idx]] = cop.arguments[0];
5784 set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init);
5785 SPIRConstant tmp_const(cop.basetype);
5786 fill_composite_constant(constant&: tmp_const, type_id: cop.basetype, initializers: const_composite_insert_ids[cop.self]);
5787 return constant_expression(c: tmp_const);
5788 }
5789
5790 default:
5791 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
5792 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
5793 }
5794
5795 uint32_t bit_width = 0;
5796 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
5797 bit_width = expression_type(id: cop.arguments[0]).width;
5798
5799 SPIRType::BaseType input_type;
5800 bool skip_cast_if_equal_type = opcode_is_sign_invariant(opcode: cop.opcode);
5801
5802 switch (cop.opcode)
5803 {
5804 case OpIEqual:
5805 case OpINotEqual:
5806 input_type = to_signed_basetype(width: bit_width);
5807 break;
5808
5809 case OpSLessThan:
5810 case OpSLessThanEqual:
5811 case OpSGreaterThan:
5812 case OpSGreaterThanEqual:
5813 case OpSMod:
5814 case OpSDiv:
5815 case OpShiftRightArithmetic:
5816 case OpSConvert:
5817 case OpSNegate:
5818 input_type = to_signed_basetype(width: bit_width);
5819 break;
5820
5821 case OpULessThan:
5822 case OpULessThanEqual:
5823 case OpUGreaterThan:
5824 case OpUGreaterThanEqual:
5825 case OpUMod:
5826 case OpUDiv:
5827 case OpShiftRightLogical:
5828 case OpUConvert:
5829 input_type = to_unsigned_basetype(width: bit_width);
5830 break;
5831
5832 default:
5833 input_type = type.basetype;
5834 break;
5835 }
5836
5837#undef GLSL_BOP
5838#undef GLSL_UOP
5839 if (binary)
5840 {
5841 if (cop.arguments.size() < 2)
5842 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5843
5844 string cast_op0;
5845 string cast_op1;
5846 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0: cop.arguments[0],
5847 op1: cop.arguments[1], skip_cast_if_equal_type);
5848
5849 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
5850 {
5851 expected_type.basetype = input_type;
5852 auto expr = bitcast_glsl_op(result_type: type, argument_type: expected_type);
5853 expr += '(';
5854 expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
5855 expr += ')';
5856 return expr;
5857 }
5858 else
5859 return join(ts: "(", ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1, ts: ")");
5860 }
5861 else if (unary)
5862 {
5863 if (cop.arguments.size() < 1)
5864 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5865
5866 // Auto-bitcast to result type as needed.
5867 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
5868 return join(ts: "(", ts&: op, ts: bitcast_glsl(result_type: type, arg: cop.arguments[0]), ts: ")");
5869 }
5870 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
5871 {
5872 if (cop.arguments.size() < 1)
5873 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5874
5875 auto &arg_type = expression_type(id: cop.arguments[0]);
5876 if (arg_type.width < type.width && input_type != arg_type.basetype)
5877 {
5878 auto expected = arg_type;
5879 expected.basetype = input_type;
5880 return join(ts&: op, ts: "(", ts: bitcast_glsl(result_type: expected, arg: cop.arguments[0]), ts: ")");
5881 }
5882 else
5883 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5884 }
5885 else
5886 {
5887 if (cop.arguments.size() < 1)
5888 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5889 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5890 }
5891}
5892
5893string CompilerGLSL::constant_expression(const SPIRConstant &c,
5894 bool inside_block_like_struct_scope,
5895 bool inside_struct_scope)
5896{
5897 auto &type = get<SPIRType>(id: c.constant_type);
5898
5899 if (is_pointer(type))
5900 {
5901 return backend.null_pointer_literal;
5902 }
5903 else if (!c.subconstants.empty())
5904 {
5905 // Handles Arrays and structures.
5906 string res;
5907
5908 // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
5909 // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
5910 // Should look at ArrayStride here as well, but it's possible to declare a constant struct
5911 // with Offset = 0, using no ArrayStride on the enclosed array type.
5912 // A particular CTS test hits this scenario.
5913 bool array_type_decays = inside_block_like_struct_scope &&
5914 is_array(type) &&
5915 !backend.array_is_value_type_in_buffer_blocks;
5916
5917 // Allow Metal to use the array<T> template to make arrays a value type
5918 bool needs_trailing_tracket = false;
5919 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
5920 !is_array(type))
5921 {
5922 res = type_to_glsl_constructor(type) + "{ ";
5923 }
5924 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
5925 is_array(type) && !array_type_decays)
5926 {
5927 const auto *p_type = &type;
5928 SPIRType tmp_type { OpNop };
5929
5930 if (inside_struct_scope &&
5931 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
5932 type.basetype == SPIRType::Boolean)
5933 {
5934 tmp_type = type;
5935 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5936 p_type = &tmp_type;
5937 }
5938
5939 res = type_to_glsl_constructor(type: *p_type) + "({ ";
5940 needs_trailing_tracket = true;
5941 }
5942 else if (backend.use_initializer_list)
5943 {
5944 res = "{ ";
5945 }
5946 else
5947 {
5948 res = type_to_glsl_constructor(type) + "(";
5949 }
5950
5951 uint32_t subconstant_index = 0;
5952 for (auto &elem : c.subconstants)
5953 {
5954 if (auto *op = maybe_get<SPIRConstantOp>(id: elem))
5955 {
5956 res += constant_op_expression(cop: *op);
5957 }
5958 else if (maybe_get<SPIRUndef>(id: elem) != nullptr)
5959 {
5960 res += to_name(id: elem);
5961 }
5962 else
5963 {
5964 auto &subc = get<SPIRConstant>(id: elem);
5965 if (subc.specialization && !expression_is_forwarded(id: elem))
5966 res += to_name(id: elem);
5967 else
5968 {
5969 if (!is_array(type) && type.basetype == SPIRType::Struct)
5970 {
5971 // When we get down to emitting struct members, override the block-like information.
5972 // For constants, we can freely mix and match block-like state.
5973 inside_block_like_struct_scope =
5974 has_member_decoration(id: type.self, index: subconstant_index, decoration: DecorationOffset);
5975 }
5976
5977 if (type.basetype == SPIRType::Struct)
5978 inside_struct_scope = true;
5979
5980 res += constant_expression(c: subc, inside_block_like_struct_scope, inside_struct_scope);
5981 }
5982 }
5983
5984 if (&elem != &c.subconstants.back())
5985 res += ", ";
5986
5987 subconstant_index++;
5988 }
5989
5990 res += backend.use_initializer_list ? " }" : ")";
5991 if (needs_trailing_tracket)
5992 res += ")";
5993
5994 return res;
5995 }
5996 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
5997 {
5998 // Metal tessellation likes empty structs which are then constant expressions.
5999 if (backend.supports_empty_struct)
6000 return "{ }";
6001 else if (backend.use_typed_initializer_list)
6002 return join(ts: type_to_glsl(type), ts: "{ 0 }");
6003 else if (backend.use_initializer_list)
6004 return "{ 0 }";
6005 else
6006 return join(ts: type_to_glsl(type), ts: "(0)");
6007 }
6008 else if (c.columns() == 1)
6009 {
6010 auto res = constant_expression_vector(c, vector: 0);
6011
6012 if (inside_struct_scope &&
6013 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
6014 type.basetype == SPIRType::Boolean)
6015 {
6016 SPIRType tmp_type = type;
6017 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
6018 res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")");
6019 }
6020
6021 return res;
6022 }
6023 else
6024 {
6025 string res = type_to_glsl(type) + "(";
6026 for (uint32_t col = 0; col < c.columns(); col++)
6027 {
6028 if (c.specialization_constant_id(col) != 0)
6029 res += to_name(id: c.specialization_constant_id(col));
6030 else
6031 res += constant_expression_vector(c, vector: col);
6032
6033 if (col + 1 < c.columns())
6034 res += ", ";
6035 }
6036 res += ")";
6037
6038 if (inside_struct_scope &&
6039 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
6040 type.basetype == SPIRType::Boolean)
6041 {
6042 SPIRType tmp_type = type;
6043 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
6044 res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")");
6045 }
6046
6047 return res;
6048 }
6049}
6050
6051#ifdef _MSC_VER
6052// snprintf does not exist or is buggy on older MSVC versions, some of them
6053// being used by MinGW. Use sprintf instead and disable corresponding warning.
6054#pragma warning(push)
6055#pragma warning(disable : 4996)
6056#endif
6057
6058string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6059{
6060 string res;
6061 float float_value = c.scalar_f16(col, row);
6062
6063 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
6064 // of complicated workarounds, just value-cast to the half type always.
6065 if (std::isnan(x: float_value) || std::isinf(x: float_value))
6066 {
6067 SPIRType type { OpTypeFloat };
6068 type.basetype = SPIRType::Half;
6069 type.vecsize = 1;
6070 type.columns = 1;
6071
6072 if (float_value == numeric_limits<float>::infinity())
6073 res = join(ts: type_to_glsl(type), ts: "(1.0 / 0.0)");
6074 else if (float_value == -numeric_limits<float>::infinity())
6075 res = join(ts: type_to_glsl(type), ts: "(-1.0 / 0.0)");
6076 else if (std::isnan(x: float_value))
6077 res = join(ts: type_to_glsl(type), ts: "(0.0 / 0.0)");
6078 else
6079 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6080 }
6081 else
6082 {
6083 SPIRType type { OpTypeFloat };
6084 type.basetype = SPIRType::Half;
6085 type.vecsize = 1;
6086 type.columns = 1;
6087 res = join(ts: type_to_glsl(type), ts: "(", ts: format_float(value: float_value), ts: ")");
6088 }
6089
6090 return res;
6091}
6092
6093string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6094{
6095 string res;
6096 float float_value = c.scalar_f32(col, row);
6097
6098 if (std::isnan(x: float_value) || std::isinf(x: float_value))
6099 {
6100 // Use special representation.
6101 if (!is_legacy())
6102 {
6103 SPIRType out_type { OpTypeFloat };
6104 SPIRType in_type { OpTypeInt };
6105 out_type.basetype = SPIRType::Float;
6106 in_type.basetype = SPIRType::UInt;
6107 out_type.vecsize = 1;
6108 in_type.vecsize = 1;
6109 out_type.width = 32;
6110 in_type.width = 32;
6111
6112 char print_buffer[32];
6113#ifdef _WIN32
6114 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
6115#else
6116 snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%xu", c.scalar(col, row));
6117#endif
6118
6119 const char *comment = "inf";
6120 if (float_value == -numeric_limits<float>::infinity())
6121 comment = "-inf";
6122 else if (std::isnan(x: float_value))
6123 comment = "nan";
6124 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
6125 }
6126 else
6127 {
6128 if (float_value == numeric_limits<float>::infinity())
6129 {
6130 if (backend.float_literal_suffix)
6131 res = "(1.0f / 0.0f)";
6132 else
6133 res = "(1.0 / 0.0)";
6134 }
6135 else if (float_value == -numeric_limits<float>::infinity())
6136 {
6137 if (backend.float_literal_suffix)
6138 res = "(-1.0f / 0.0f)";
6139 else
6140 res = "(-1.0 / 0.0)";
6141 }
6142 else if (std::isnan(x: float_value))
6143 {
6144 if (backend.float_literal_suffix)
6145 res = "(0.0f / 0.0f)";
6146 else
6147 res = "(0.0 / 0.0)";
6148 }
6149 else
6150 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6151 }
6152 }
6153 else
6154 {
6155 res = format_float(value: float_value);
6156 if (backend.float_literal_suffix)
6157 res += "f";
6158 }
6159
6160 return res;
6161}
6162
6163std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6164{
6165 string res;
6166 double double_value = c.scalar_f64(col, row);
6167
6168 if (std::isnan(x: double_value) || std::isinf(x: double_value))
6169 {
6170 // Use special representation.
6171 if (!is_legacy())
6172 {
6173 SPIRType out_type { OpTypeFloat };
6174 SPIRType in_type { OpTypeInt };
6175 out_type.basetype = SPIRType::Double;
6176 in_type.basetype = SPIRType::UInt64;
6177 out_type.vecsize = 1;
6178 in_type.vecsize = 1;
6179 out_type.width = 64;
6180 in_type.width = 64;
6181
6182 uint64_t u64_value = c.scalar_u64(col, row);
6183
6184 if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
6185 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
6186 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
6187
6188 char print_buffer[64];
6189#ifdef _WIN32
6190 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
6191 backend.long_long_literal_suffix ? "ull" : "ul");
6192#else
6193 snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%llx%s", static_cast<unsigned long long>(u64_value),
6194 backend.long_long_literal_suffix ? "ull" : "ul");
6195#endif
6196
6197 const char *comment = "inf";
6198 if (double_value == -numeric_limits<double>::infinity())
6199 comment = "-inf";
6200 else if (std::isnan(x: double_value))
6201 comment = "nan";
6202 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
6203 }
6204 else
6205 {
6206 if (options.es)
6207 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
6208 if (options.version < 400)
6209 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
6210
6211 if (double_value == numeric_limits<double>::infinity())
6212 {
6213 if (backend.double_literal_suffix)
6214 res = "(1.0lf / 0.0lf)";
6215 else
6216 res = "(1.0 / 0.0)";
6217 }
6218 else if (double_value == -numeric_limits<double>::infinity())
6219 {
6220 if (backend.double_literal_suffix)
6221 res = "(-1.0lf / 0.0lf)";
6222 else
6223 res = "(-1.0 / 0.0)";
6224 }
6225 else if (std::isnan(x: double_value))
6226 {
6227 if (backend.double_literal_suffix)
6228 res = "(0.0lf / 0.0lf)";
6229 else
6230 res = "(0.0 / 0.0)";
6231 }
6232 else
6233 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6234 }
6235 }
6236 else
6237 {
6238 res = format_double(value: double_value);
6239 if (backend.double_literal_suffix)
6240 res += "lf";
6241 }
6242
6243 return res;
6244}
6245
6246#ifdef _MSC_VER
6247#pragma warning(pop)
6248#endif
6249
6250string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
6251{
6252 auto type = get<SPIRType>(id: c.constant_type);
6253 type.columns = 1;
6254
6255 auto scalar_type = type;
6256 scalar_type.vecsize = 1;
6257
6258 string res;
6259 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
6260 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
6261
6262 if (!type_is_floating_point(type))
6263 {
6264 // Cannot swizzle literal integers as a special case.
6265 swizzle_splat = false;
6266 }
6267
6268 if (splat || swizzle_splat)
6269 {
6270 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
6271 for (uint32_t i = 0; i < c.vector_size(); i++)
6272 {
6273 if (c.specialization_constant_id(col: vector, row: i) != 0)
6274 {
6275 splat = false;
6276 swizzle_splat = false;
6277 break;
6278 }
6279 }
6280 }
6281
6282 if (splat || swizzle_splat)
6283 {
6284 if (type.width == 64)
6285 {
6286 uint64_t ident = c.scalar_u64(col: vector, row: 0);
6287 for (uint32_t i = 1; i < c.vector_size(); i++)
6288 {
6289 if (ident != c.scalar_u64(col: vector, row: i))
6290 {
6291 splat = false;
6292 swizzle_splat = false;
6293 break;
6294 }
6295 }
6296 }
6297 else
6298 {
6299 uint32_t ident = c.scalar(col: vector, row: 0);
6300 for (uint32_t i = 1; i < c.vector_size(); i++)
6301 {
6302 if (ident != c.scalar(col: vector, row: i))
6303 {
6304 splat = false;
6305 swizzle_splat = false;
6306 }
6307 }
6308 }
6309 }
6310
6311 if (c.vector_size() > 1 && !swizzle_splat)
6312 res += type_to_glsl(type) + "(";
6313
6314 switch (type.basetype)
6315 {
6316 case SPIRType::Half:
6317 if (splat || swizzle_splat)
6318 {
6319 res += convert_half_to_string(c, col: vector, row: 0);
6320 if (swizzle_splat)
6321 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6322 }
6323 else
6324 {
6325 for (uint32_t i = 0; i < c.vector_size(); i++)
6326 {
6327 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6328 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6329 else
6330 res += convert_half_to_string(c, col: vector, row: i);
6331
6332 if (i + 1 < c.vector_size())
6333 res += ", ";
6334 }
6335 }
6336 break;
6337
6338 case SPIRType::Float:
6339 if (splat || swizzle_splat)
6340 {
6341 res += convert_float_to_string(c, col: vector, row: 0);
6342 if (swizzle_splat)
6343 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6344 }
6345 else
6346 {
6347 for (uint32_t i = 0; i < c.vector_size(); i++)
6348 {
6349 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6350 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6351 else
6352 res += convert_float_to_string(c, col: vector, row: i);
6353
6354 if (i + 1 < c.vector_size())
6355 res += ", ";
6356 }
6357 }
6358 break;
6359
6360 case SPIRType::Double:
6361 if (splat || swizzle_splat)
6362 {
6363 res += convert_double_to_string(c, col: vector, row: 0);
6364 if (swizzle_splat)
6365 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6366 }
6367 else
6368 {
6369 for (uint32_t i = 0; i < c.vector_size(); i++)
6370 {
6371 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6372 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6373 else
6374 res += convert_double_to_string(c, col: vector, row: i);
6375
6376 if (i + 1 < c.vector_size())
6377 res += ", ";
6378 }
6379 }
6380 break;
6381
6382 case SPIRType::Int64:
6383 {
6384 auto tmp = type;
6385 tmp.vecsize = 1;
6386 tmp.columns = 1;
6387 auto int64_type = type_to_glsl(type: tmp);
6388
6389 if (splat)
6390 {
6391 res += convert_to_string(value: c.scalar_i64(col: vector, row: 0), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
6392 }
6393 else
6394 {
6395 for (uint32_t i = 0; i < c.vector_size(); i++)
6396 {
6397 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6398 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6399 else
6400 res += convert_to_string(value: c.scalar_i64(col: vector, row: i), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
6401
6402 if (i + 1 < c.vector_size())
6403 res += ", ";
6404 }
6405 }
6406 break;
6407 }
6408
6409 case SPIRType::UInt64:
6410 if (splat)
6411 {
6412 res += convert_to_string(t: c.scalar_u64(col: vector, row: 0));
6413 if (backend.long_long_literal_suffix)
6414 res += "ull";
6415 else
6416 res += "ul";
6417 }
6418 else
6419 {
6420 for (uint32_t i = 0; i < c.vector_size(); i++)
6421 {
6422 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6423 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6424 else
6425 {
6426 res += convert_to_string(t: c.scalar_u64(col: vector, row: i));
6427 if (backend.long_long_literal_suffix)
6428 res += "ull";
6429 else
6430 res += "ul";
6431 }
6432
6433 if (i + 1 < c.vector_size())
6434 res += ", ";
6435 }
6436 }
6437 break;
6438
6439 case SPIRType::UInt:
6440 if (splat)
6441 {
6442 res += convert_to_string(t: c.scalar(col: vector, row: 0));
6443 if (is_legacy() && !has_extension(ext: "GL_EXT_gpu_shader4"))
6444 {
6445 // Fake unsigned constant literals with signed ones if possible.
6446 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
6447 if (c.scalar_i32(col: vector, row: 0) < 0)
6448 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
6449 }
6450 else if (backend.uint32_t_literal_suffix)
6451 res += "u";
6452 }
6453 else
6454 {
6455 for (uint32_t i = 0; i < c.vector_size(); i++)
6456 {
6457 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6458 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6459 else
6460 {
6461 res += convert_to_string(t: c.scalar(col: vector, row: i));
6462 if (is_legacy() && !has_extension(ext: "GL_EXT_gpu_shader4"))
6463 {
6464 // Fake unsigned constant literals with signed ones if possible.
6465 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
6466 if (c.scalar_i32(col: vector, row: i) < 0)
6467 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
6468 "the literal negative.");
6469 }
6470 else if (backend.uint32_t_literal_suffix)
6471 res += "u";
6472 }
6473
6474 if (i + 1 < c.vector_size())
6475 res += ", ";
6476 }
6477 }
6478 break;
6479
6480 case SPIRType::Int:
6481 if (splat)
6482 res += convert_to_string(value: c.scalar_i32(col: vector, row: 0));
6483 else
6484 {
6485 for (uint32_t i = 0; i < c.vector_size(); i++)
6486 {
6487 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6488 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6489 else
6490 res += convert_to_string(value: c.scalar_i32(col: vector, row: i));
6491 if (i + 1 < c.vector_size())
6492 res += ", ";
6493 }
6494 }
6495 break;
6496
6497 case SPIRType::UShort:
6498 if (splat)
6499 {
6500 res += convert_to_string(t: c.scalar(col: vector, row: 0));
6501 }
6502 else
6503 {
6504 for (uint32_t i = 0; i < c.vector_size(); i++)
6505 {
6506 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6507 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6508 else
6509 {
6510 if (*backend.uint16_t_literal_suffix)
6511 {
6512 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
6513 res += backend.uint16_t_literal_suffix;
6514 }
6515 else
6516 {
6517 // If backend doesn't have a literal suffix, we need to value cast.
6518 res += type_to_glsl(type: scalar_type);
6519 res += "(";
6520 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
6521 res += ")";
6522 }
6523 }
6524
6525 if (i + 1 < c.vector_size())
6526 res += ", ";
6527 }
6528 }
6529 break;
6530
6531 case SPIRType::Short:
6532 if (splat)
6533 {
6534 res += convert_to_string(t: c.scalar_i16(col: vector, row: 0));
6535 }
6536 else
6537 {
6538 for (uint32_t i = 0; i < c.vector_size(); i++)
6539 {
6540 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6541 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6542 else
6543 {
6544 if (*backend.int16_t_literal_suffix)
6545 {
6546 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
6547 res += backend.int16_t_literal_suffix;
6548 }
6549 else
6550 {
6551 // If backend doesn't have a literal suffix, we need to value cast.
6552 res += type_to_glsl(type: scalar_type);
6553 res += "(";
6554 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
6555 res += ")";
6556 }
6557 }
6558
6559 if (i + 1 < c.vector_size())
6560 res += ", ";
6561 }
6562 }
6563 break;
6564
6565 case SPIRType::UByte:
6566 if (splat)
6567 {
6568 res += convert_to_string(t: c.scalar_u8(col: vector, row: 0));
6569 }
6570 else
6571 {
6572 for (uint32_t i = 0; i < c.vector_size(); i++)
6573 {
6574 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6575 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6576 else
6577 {
6578 res += type_to_glsl(type: scalar_type);
6579 res += "(";
6580 res += convert_to_string(t: c.scalar_u8(col: vector, row: i));
6581 res += ")";
6582 }
6583
6584 if (i + 1 < c.vector_size())
6585 res += ", ";
6586 }
6587 }
6588 break;
6589
6590 case SPIRType::SByte:
6591 if (splat)
6592 {
6593 res += convert_to_string(t: c.scalar_i8(col: vector, row: 0));
6594 }
6595 else
6596 {
6597 for (uint32_t i = 0; i < c.vector_size(); i++)
6598 {
6599 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6600 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6601 else
6602 {
6603 res += type_to_glsl(type: scalar_type);
6604 res += "(";
6605 res += convert_to_string(t: c.scalar_i8(col: vector, row: i));
6606 res += ")";
6607 }
6608
6609 if (i + 1 < c.vector_size())
6610 res += ", ";
6611 }
6612 }
6613 break;
6614
6615 case SPIRType::Boolean:
6616 if (splat)
6617 res += c.scalar(col: vector, row: 0) ? "true" : "false";
6618 else
6619 {
6620 for (uint32_t i = 0; i < c.vector_size(); i++)
6621 {
6622 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6623 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6624 else
6625 res += c.scalar(col: vector, row: i) ? "true" : "false";
6626
6627 if (i + 1 < c.vector_size())
6628 res += ", ";
6629 }
6630 }
6631 break;
6632
6633 default:
6634 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
6635 }
6636
6637 if (c.vector_size() > 1 && !swizzle_splat)
6638 res += ")";
6639
6640 return res;
6641}
6642
6643SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
6644{
6645 forced_temporaries.insert(x: id);
6646 emit_uninitialized_temporary(type, id);
6647 return set<SPIRExpression>(id, args: to_name(id), args&: type, args: true);
6648}
6649
6650void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
6651{
6652 // If we're declaring temporaries inside continue blocks,
6653 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
6654 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
6655 {
6656 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
6657 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
6658 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
6659 return tmp.first == result_type && tmp.second == result_id;
6660 }) == end(cont&: header.declare_temporary))
6661 {
6662 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
6663 hoisted_temporaries.insert(x: result_id);
6664 force_recompile();
6665 }
6666 }
6667 else if (hoisted_temporaries.count(x: result_id) == 0)
6668 {
6669 auto &type = get<SPIRType>(id: result_type);
6670 auto &flags = get_decoration_bitset(id: result_id);
6671
6672 // The result_id has not been made into an expression yet, so use flags interface.
6673 add_local_variable_name(id: result_id);
6674
6675 string initializer;
6676 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
6677 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: result_type));
6678
6679 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts&: initializer, ts: ";");
6680 }
6681}
6682
6683string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
6684{
6685 auto &type = get<SPIRType>(id: result_type);
6686
6687 // If we're declaring temporaries inside continue blocks,
6688 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
6689 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
6690 {
6691 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
6692 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
6693 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
6694 return tmp.first == result_type && tmp.second == result_id;
6695 }) == end(cont&: header.declare_temporary))
6696 {
6697 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
6698 hoisted_temporaries.insert(x: result_id);
6699 force_recompile_guarantee_forward_progress();
6700 }
6701
6702 return join(ts: to_name(id: result_id), ts: " = ");
6703 }
6704 else if (hoisted_temporaries.count(x: result_id))
6705 {
6706 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
6707 return join(ts: to_name(id: result_id), ts: " = ");
6708 }
6709 else
6710 {
6711 // The result_id has not been made into an expression yet, so use flags interface.
6712 add_local_variable_name(id: result_id);
6713 auto &flags = get_decoration_bitset(id: result_id);
6714 return join(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts: " = ");
6715 }
6716}
6717
6718bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
6719{
6720 return forwarded_temporaries.count(x: id) != 0;
6721}
6722
6723bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
6724{
6725 return suppressed_usage_tracking.count(x: id) != 0;
6726}
6727
6728bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
6729{
6730 auto *expr = maybe_get<SPIRExpression>(id);
6731 if (!expr)
6732 return false;
6733
6734 // If we're emitting code at a deeper loop level than when we emitted the expression,
6735 // we're probably reading the same expression over and over.
6736 return current_loop_level > expr->emitted_loop_level;
6737}
6738
6739SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
6740 bool suppress_usage_tracking)
6741{
6742 if (forwarding && (forced_temporaries.find(x: result_id) == end(cont&: forced_temporaries)))
6743 {
6744 // Just forward it without temporary.
6745 // If the forward is trivial, we do not force flushing to temporary for this expression.
6746 forwarded_temporaries.insert(x: result_id);
6747 if (suppress_usage_tracking)
6748 suppressed_usage_tracking.insert(x: result_id);
6749
6750 return set<SPIRExpression>(id: result_id, args: rhs, args&: result_type, args: true);
6751 }
6752 else
6753 {
6754 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
6755 statement(ts: declare_temporary(result_type, result_id), ts: rhs, ts: ";");
6756 return set<SPIRExpression>(id: result_id, args: to_name(id: result_id), args&: result_type, args: true);
6757 }
6758}
6759
6760void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6761{
6762 bool forward = should_forward(id: op0);
6763 emit_op(result_type, result_id, rhs: join(ts&: op, ts: to_enclosed_unpacked_expression(id: op0)), forwarding: forward);
6764 inherit_expression_dependencies(dst: result_id, source: op0);
6765}
6766
6767void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6768{
6769 auto &type = get<SPIRType>(id: result_type);
6770 bool forward = should_forward(id: op0);
6771 emit_op(result_type, result_id, rhs: join(ts: type_to_glsl(type), ts: "(", ts&: op, ts: to_enclosed_unpacked_expression(id: op0), ts: ")"), forwarding: forward);
6772 inherit_expression_dependencies(dst: result_id, source: op0);
6773}
6774
6775void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block)
6776{
6777 statement(ts: "EmitMeshTasksEXT(",
6778 ts: to_unpacked_expression(id: block.mesh.groups[0]), ts: ", ",
6779 ts: to_unpacked_expression(id: block.mesh.groups[1]), ts: ", ",
6780 ts: to_unpacked_expression(id: block.mesh.groups[2]), ts: ");");
6781}
6782
6783void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
6784{
6785 // Various FP arithmetic opcodes such as add, sub, mul will hit this.
6786 bool force_temporary_precise = backend.support_precise_qualifier &&
6787 has_decoration(id: result_id, decoration: DecorationNoContraction) &&
6788 type_is_floating_point(type: get<SPIRType>(id: result_type));
6789 bool forward = should_forward(id: op0) && should_forward(id: op1) && !force_temporary_precise;
6790
6791 emit_op(result_type, result_id,
6792 rhs: join(ts: to_enclosed_unpacked_expression(id: op0), ts: " ", ts&: op, ts: " ", ts: to_enclosed_unpacked_expression(id: op1)), forwarding: forward);
6793
6794 inherit_expression_dependencies(dst: result_id, source: op0);
6795 inherit_expression_dependencies(dst: result_id, source: op1);
6796}
6797
6798void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
6799{
6800 auto &type = get<SPIRType>(id: result_type);
6801 auto expr = type_to_glsl_constructor(type);
6802 expr += '(';
6803 for (uint32_t i = 0; i < type.vecsize; i++)
6804 {
6805 // Make sure to call to_expression multiple times to ensure
6806 // that these expressions are properly flushed to temporaries if needed.
6807 expr += op;
6808 expr += to_extract_component_expression(id: operand, index: i);
6809
6810 if (i + 1 < type.vecsize)
6811 expr += ", ";
6812 }
6813 expr += ')';
6814 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand));
6815
6816 inherit_expression_dependencies(dst: result_id, source: operand);
6817}
6818
6819void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6820 const char *op, bool negate, SPIRType::BaseType expected_type)
6821{
6822 auto &type0 = expression_type(id: op0);
6823 auto &type1 = expression_type(id: op1);
6824
6825 SPIRType target_type0 = type0;
6826 SPIRType target_type1 = type1;
6827 target_type0.basetype = expected_type;
6828 target_type1.basetype = expected_type;
6829 target_type0.vecsize = 1;
6830 target_type1.vecsize = 1;
6831
6832 auto &type = get<SPIRType>(id: result_type);
6833 auto expr = type_to_glsl_constructor(type);
6834 expr += '(';
6835 for (uint32_t i = 0; i < type.vecsize; i++)
6836 {
6837 // Make sure to call to_expression multiple times to ensure
6838 // that these expressions are properly flushed to temporaries if needed.
6839 if (negate)
6840 expr += "!(";
6841
6842 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
6843 expr += bitcast_expression(target_type: target_type0, expr_type: type0.basetype, expr: to_extract_component_expression(id: op0, index: i));
6844 else
6845 expr += to_extract_component_expression(id: op0, index: i);
6846
6847 expr += ' ';
6848 expr += op;
6849 expr += ' ';
6850
6851 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
6852 expr += bitcast_expression(target_type: target_type1, expr_type: type1.basetype, expr: to_extract_component_expression(id: op1, index: i));
6853 else
6854 expr += to_extract_component_expression(id: op1, index: i);
6855
6856 if (negate)
6857 expr += ")";
6858
6859 if (i + 1 < type.vecsize)
6860 expr += ", ";
6861 }
6862 expr += ')';
6863 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6864
6865 inherit_expression_dependencies(dst: result_id, source: op0);
6866 inherit_expression_dependencies(dst: result_id, source: op1);
6867}
6868
6869SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
6870 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
6871{
6872 auto &type0 = expression_type(id: op0);
6873 auto &type1 = expression_type(id: op1);
6874
6875 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
6876 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
6877 // since equality test is exactly the same.
6878 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
6879
6880 // Create a fake type so we can bitcast to it.
6881 // We only deal with regular arithmetic types here like int, uints and so on.
6882 SPIRType expected_type{type0.op};
6883 expected_type.basetype = input_type;
6884 expected_type.vecsize = type0.vecsize;
6885 expected_type.columns = type0.columns;
6886 expected_type.width = type0.width;
6887
6888 if (cast)
6889 {
6890 cast_op0 = bitcast_glsl(result_type: expected_type, arg: op0);
6891 cast_op1 = bitcast_glsl(result_type: expected_type, arg: op1);
6892 }
6893 else
6894 {
6895 // If we don't cast, our actual input type is that of the first (or second) argument.
6896 cast_op0 = to_enclosed_unpacked_expression(id: op0);
6897 cast_op1 = to_enclosed_unpacked_expression(id: op1);
6898 input_type = type0.basetype;
6899 }
6900
6901 return expected_type;
6902}
6903
6904bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
6905{
6906 // Some bitcasts may require complex casting sequences, and are implemented here.
6907 // Otherwise a simply unary function will do with bitcast_glsl_op.
6908
6909 auto &output_type = get<SPIRType>(id: result_type);
6910 auto &input_type = expression_type(id: op0);
6911 string expr;
6912
6913 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
6914 expr = join(ts: "unpackFloat2x16(floatBitsToUint(", ts: to_unpacked_expression(id: op0), ts: "))");
6915 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
6916 input_type.vecsize == 2)
6917 expr = join(ts: "uintBitsToFloat(packFloat2x16(", ts: to_unpacked_expression(id: op0), ts: "))");
6918 else
6919 return false;
6920
6921 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: op0));
6922 return true;
6923}
6924
6925void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6926 const char *op, SPIRType::BaseType input_type,
6927 bool skip_cast_if_equal_type,
6928 bool implicit_integer_promotion)
6929{
6930 string cast_op0, cast_op1;
6931 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6932 auto &out_type = get<SPIRType>(id: result_type);
6933
6934 // We might have casted away from the result type, so bitcast again.
6935 // For example, arithmetic right shift with uint inputs.
6936 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6937 auto bitop = join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
6938 string expr;
6939
6940 if (implicit_integer_promotion)
6941 {
6942 // Simple value cast.
6943 expr = join(ts: type_to_glsl(type: out_type), ts: '(', ts&: bitop, ts: ')');
6944 }
6945 else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6946 {
6947 expected_type.basetype = input_type;
6948 expr = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: expected_type), ts: '(', ts&: bitop, ts: ')');
6949 }
6950 else
6951 {
6952 expr = std::move(bitop);
6953 }
6954
6955 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6956 inherit_expression_dependencies(dst: result_id, source: op0);
6957 inherit_expression_dependencies(dst: result_id, source: op1);
6958}
6959
6960void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6961{
6962 bool forward = should_forward(id: op0);
6963 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ")"), forwarding: forward);
6964 inherit_expression_dependencies(dst: result_id, source: op0);
6965}
6966
6967void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6968 const char *op)
6969{
6970 // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL
6971 const auto &type = get_type(id: result_type);
6972 bool must_forward = type_is_opaque_value(type);
6973 bool forward = must_forward || (should_forward(id: op0) && should_forward(id: op1));
6974 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ")"),
6975 forwarding: forward);
6976 inherit_expression_dependencies(dst: result_id, source: op0);
6977 inherit_expression_dependencies(dst: result_id, source: op1);
6978}
6979
6980void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6981 const char *op)
6982{
6983 auto &type = get<SPIRType>(id: result_type);
6984 if (type_is_floating_point(type))
6985 {
6986 if (!options.vulkan_semantics)
6987 SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
6988 if (options.es)
6989 SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
6990 require_extension_internal(ext: "GL_EXT_shader_atomic_float");
6991 }
6992
6993 forced_temporaries.insert(x: result_id);
6994 emit_op(result_type, result_id,
6995 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
6996 ts: to_unpacked_expression(id: op1), ts: ")"), forwarding: false);
6997 flush_all_atomic_capable_variables();
6998}
6999
7000void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
7001 uint32_t op0, uint32_t op1, uint32_t op2,
7002 const char *op)
7003{
7004 forced_temporaries.insert(x: result_id);
7005 emit_op(result_type, result_id,
7006 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
7007 ts: to_unpacked_expression(id: op1), ts: ", ", ts: to_unpacked_expression(id: op2), ts: ")"), forwarding: false);
7008 flush_all_atomic_capable_variables();
7009}
7010
7011void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
7012 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
7013{
7014 auto &out_type = get<SPIRType>(id: result_type);
7015 auto &expr_type = expression_type(id: op0);
7016 auto expected_type = out_type;
7017
7018 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
7019 expected_type.basetype = input_type;
7020 expected_type.width = expr_type.width;
7021
7022 string cast_op;
7023 if (expr_type.basetype != input_type)
7024 {
7025 if (expr_type.basetype == SPIRType::Boolean)
7026 cast_op = join(ts: type_to_glsl(type: expected_type), ts: "(", ts: to_unpacked_expression(id: op0), ts: ")");
7027 else
7028 cast_op = bitcast_glsl(result_type: expected_type, arg: op0);
7029 }
7030 else
7031 cast_op = to_unpacked_expression(id: op0);
7032
7033 string expr;
7034 if (out_type.basetype != expected_result_type)
7035 {
7036 expected_type.basetype = expected_result_type;
7037 expected_type.width = out_type.width;
7038 if (out_type.basetype == SPIRType::Boolean)
7039 expr = type_to_glsl(type: out_type);
7040 else
7041 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7042 expr += '(';
7043 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
7044 expr += ')';
7045 }
7046 else
7047 {
7048 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
7049 }
7050
7051 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
7052 inherit_expression_dependencies(dst: result_id, source: op0);
7053}
7054
7055// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
7056// and different vector sizes all at once. Need a special purpose method here.
7057void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7058 uint32_t op2, const char *op,
7059 SPIRType::BaseType expected_result_type,
7060 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
7061 SPIRType::BaseType input_type2)
7062{
7063 auto &out_type = get<SPIRType>(id: result_type);
7064 auto expected_type = out_type;
7065 expected_type.basetype = input_type0;
7066
7067 string cast_op0 =
7068 expression_type(id: op0).basetype != input_type0 ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7069
7070 auto op1_expr = to_unpacked_expression(id: op1);
7071 auto op2_expr = to_unpacked_expression(id: op2);
7072
7073 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
7074 expected_type.basetype = input_type1;
7075 expected_type.vecsize = 1;
7076 string cast_op1 = expression_type(id: op1).basetype != input_type1 ?
7077 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op1_expr, ts: ")") :
7078 op1_expr;
7079
7080 expected_type.basetype = input_type2;
7081 expected_type.vecsize = 1;
7082 string cast_op2 = expression_type(id: op2).basetype != input_type2 ?
7083 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op2_expr, ts: ")") :
7084 op2_expr;
7085
7086 string expr;
7087 if (out_type.basetype != expected_result_type)
7088 {
7089 expected_type.vecsize = out_type.vecsize;
7090 expected_type.basetype = expected_result_type;
7091 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7092 expr += '(';
7093 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7094 expr += ')';
7095 }
7096 else
7097 {
7098 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7099 }
7100
7101 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
7102 inherit_expression_dependencies(dst: result_id, source: op0);
7103 inherit_expression_dependencies(dst: result_id, source: op1);
7104 inherit_expression_dependencies(dst: result_id, source: op2);
7105}
7106
7107void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7108 uint32_t op2, const char *op, SPIRType::BaseType input_type)
7109{
7110 auto &out_type = get<SPIRType>(id: result_type);
7111 auto expected_type = out_type;
7112 expected_type.basetype = input_type;
7113 string cast_op0 =
7114 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7115 string cast_op1 =
7116 expression_type(id: op1).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op1) : to_unpacked_expression(id: op1);
7117 string cast_op2 =
7118 expression_type(id: op2).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op2) : to_unpacked_expression(id: op2);
7119
7120 string expr;
7121 if (out_type.basetype != input_type)
7122 {
7123 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7124 expr += '(';
7125 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7126 expr += ')';
7127 }
7128 else
7129 {
7130 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7131 }
7132
7133 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
7134 inherit_expression_dependencies(dst: result_id, source: op0);
7135 inherit_expression_dependencies(dst: result_id, source: op1);
7136 inherit_expression_dependencies(dst: result_id, source: op2);
7137}
7138
7139void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
7140 uint32_t op1, const char *op, SPIRType::BaseType input_type)
7141{
7142 // Special purpose method for implementing clustered subgroup opcodes.
7143 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
7144 auto &out_type = get<SPIRType>(id: result_type);
7145 auto expected_type = out_type;
7146 expected_type.basetype = input_type;
7147 string cast_op0 =
7148 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7149
7150 string expr;
7151 if (out_type.basetype != input_type)
7152 {
7153 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7154 expr += '(';
7155 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
7156 expr += ')';
7157 }
7158 else
7159 {
7160 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
7161 }
7162
7163 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
7164 inherit_expression_dependencies(dst: result_id, source: op0);
7165}
7166
7167void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7168 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
7169{
7170 string cast_op0, cast_op1;
7171 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
7172 auto &out_type = get<SPIRType>(id: result_type);
7173
7174 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
7175 string expr;
7176 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
7177 {
7178 expected_type.basetype = input_type;
7179 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7180 expr += '(';
7181 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
7182 expr += ')';
7183 }
7184 else
7185 {
7186 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
7187 }
7188
7189 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
7190 inherit_expression_dependencies(dst: result_id, source: op0);
7191 inherit_expression_dependencies(dst: result_id, source: op1);
7192}
7193
7194void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7195 uint32_t op2, const char *op)
7196{
7197 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2);
7198 emit_op(result_type, result_id,
7199 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
7200 ts: to_unpacked_expression(id: op2), ts: ")"),
7201 forwarding: forward);
7202
7203 inherit_expression_dependencies(dst: result_id, source: op0);
7204 inherit_expression_dependencies(dst: result_id, source: op1);
7205 inherit_expression_dependencies(dst: result_id, source: op2);
7206}
7207
7208void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7209 uint32_t op2, uint32_t op3, const char *op)
7210{
7211 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
7212 emit_op(result_type, result_id,
7213 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
7214 ts: to_unpacked_expression(id: op2), ts: ", ", ts: to_unpacked_expression(id: op3), ts: ")"),
7215 forwarding: forward);
7216
7217 inherit_expression_dependencies(dst: result_id, source: op0);
7218 inherit_expression_dependencies(dst: result_id, source: op1);
7219 inherit_expression_dependencies(dst: result_id, source: op2);
7220 inherit_expression_dependencies(dst: result_id, source: op3);
7221}
7222
7223void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7224 uint32_t op2, uint32_t op3, const char *op,
7225 SPIRType::BaseType offset_count_type)
7226{
7227 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
7228 // and bitfieldInsert is sign invariant.
7229 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
7230
7231 auto op0_expr = to_unpacked_expression(id: op0);
7232 auto op1_expr = to_unpacked_expression(id: op1);
7233 auto op2_expr = to_unpacked_expression(id: op2);
7234 auto op3_expr = to_unpacked_expression(id: op3);
7235
7236 assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int);
7237 SPIRType target_type { OpTypeInt };
7238 target_type.width = 32;
7239 target_type.vecsize = 1;
7240 target_type.basetype = offset_count_type;
7241
7242 if (expression_type(id: op2).basetype != offset_count_type)
7243 {
7244 // Value-cast here. Input might be 16-bit. GLSL requires int.
7245 op2_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op2_expr, ts: ")");
7246 }
7247
7248 if (expression_type(id: op3).basetype != offset_count_type)
7249 {
7250 // Value-cast here. Input might be 16-bit. GLSL requires int.
7251 op3_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op3_expr, ts: ")");
7252 }
7253
7254 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts&: op0_expr, ts: ", ", ts&: op1_expr, ts: ", ", ts&: op2_expr, ts: ", ", ts&: op3_expr, ts: ")"),
7255 forwarding: forward);
7256
7257 inherit_expression_dependencies(dst: result_id, source: op0);
7258 inherit_expression_dependencies(dst: result_id, source: op1);
7259 inherit_expression_dependencies(dst: result_id, source: op2);
7260 inherit_expression_dependencies(dst: result_id, source: op3);
7261}
7262
7263string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
7264{
7265 const char *type;
7266 switch (imgtype.image.dim)
7267 {
7268 case spv::Dim1D:
7269 // Force 2D path for ES.
7270 if (options.es)
7271 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
7272 else
7273 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
7274 break;
7275 case spv::Dim2D:
7276 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
7277 break;
7278 case spv::Dim3D:
7279 type = "3D";
7280 break;
7281 case spv::DimCube:
7282 type = "Cube";
7283 break;
7284 case spv::DimRect:
7285 type = "2DRect";
7286 break;
7287 case spv::DimBuffer:
7288 type = "Buffer";
7289 break;
7290 case spv::DimSubpassData:
7291 type = "2D";
7292 break;
7293 default:
7294 type = "";
7295 break;
7296 }
7297
7298 // In legacy GLSL, an extension is required for textureLod in the fragment
7299 // shader or textureGrad anywhere.
7300 bool legacy_lod_ext = false;
7301 auto &execution = get_entry_point();
7302 if (op == "textureGrad" || op == "textureProjGrad" ||
7303 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
7304 {
7305 if (is_legacy_es())
7306 {
7307 legacy_lod_ext = true;
7308 require_extension_internal(ext: "GL_EXT_shader_texture_lod");
7309 }
7310 else if (is_legacy_desktop())
7311 require_extension_internal(ext: "GL_ARB_shader_texture_lod");
7312 }
7313
7314 if (op == "textureLodOffset" || op == "textureProjLodOffset")
7315 {
7316 if (is_legacy_es())
7317 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
7318
7319 require_extension_internal(ext: "GL_EXT_gpu_shader4");
7320 }
7321
7322 // GLES has very limited support for shadow samplers.
7323 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
7324 // everything else can just throw
7325 bool is_comparison = is_depth_image(type: imgtype, id: tex);
7326 if (is_comparison && is_legacy_es())
7327 {
7328 if (op == "texture" || op == "textureProj")
7329 require_extension_internal(ext: "GL_EXT_shadow_samplers");
7330 else
7331 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
7332
7333 if (imgtype.image.dim == spv::DimCube)
7334 return "shadowCubeNV";
7335 }
7336
7337 if (op == "textureSize")
7338 {
7339 if (is_legacy_es())
7340 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
7341 if (is_comparison)
7342 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
7343 require_extension_internal(ext: "GL_EXT_gpu_shader4");
7344 }
7345
7346 if (op == "texelFetch" && is_legacy_es())
7347 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
7348
7349 bool is_es_and_depth = is_legacy_es() && is_comparison;
7350 std::string type_prefix = is_comparison ? "shadow" : "texture";
7351
7352 if (op == "texture")
7353 return is_es_and_depth ? join(ts&: type_prefix, ts&: type, ts: "EXT") : join(ts&: type_prefix, ts&: type);
7354 else if (op == "textureLod")
7355 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "LodEXT" : "Lod");
7356 else if (op == "textureProj")
7357 return join(ts&: type_prefix, ts&: type, ts: is_es_and_depth ? "ProjEXT" : "Proj");
7358 else if (op == "textureGrad")
7359 return join(ts&: type_prefix, ts&: type, ts: is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
7360 else if (op == "textureProjLod")
7361 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
7362 else if (op == "textureLodOffset")
7363 return join(ts&: type_prefix, ts&: type, ts: "LodOffset");
7364 else if (op == "textureProjGrad")
7365 return join(ts&: type_prefix, ts&: type,
7366 ts: is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
7367 else if (op == "textureProjLodOffset")
7368 return join(ts&: type_prefix, ts&: type, ts: "ProjLodOffset");
7369 else if (op == "textureSize")
7370 return join(ts: "textureSize", ts&: type);
7371 else if (op == "texelFetch")
7372 return join(ts: "texelFetch", ts&: type);
7373 else
7374 {
7375 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
7376 }
7377}
7378
7379bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
7380{
7381 auto *cleft = maybe_get<SPIRConstant>(id: left);
7382 auto *cright = maybe_get<SPIRConstant>(id: right);
7383 auto &lerptype = expression_type(id: lerp);
7384
7385 // If our targets aren't constants, we cannot use construction.
7386 if (!cleft || !cright)
7387 return false;
7388
7389 // If our targets are spec constants, we cannot use construction.
7390 if (cleft->specialization || cright->specialization)
7391 return false;
7392
7393 auto &value_type = get<SPIRType>(id: cleft->constant_type);
7394
7395 if (lerptype.basetype != SPIRType::Boolean)
7396 return false;
7397 if (value_type.basetype == SPIRType::Struct || is_array(type: value_type))
7398 return false;
7399 if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
7400 return false;
7401
7402 // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
7403 // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
7404 // Just avoid this case.
7405 if (value_type.columns > 1)
7406 return false;
7407
7408 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
7409 bool ret = true;
7410 for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
7411 {
7412 switch (type.basetype)
7413 {
7414 case SPIRType::Short:
7415 case SPIRType::UShort:
7416 ret = cleft->scalar_u16(col: 0, row) == 0 && cright->scalar_u16(col: 0, row) == 1;
7417 break;
7418
7419 case SPIRType::Int:
7420 case SPIRType::UInt:
7421 ret = cleft->scalar(col: 0, row) == 0 && cright->scalar(col: 0, row) == 1;
7422 break;
7423
7424 case SPIRType::Half:
7425 ret = cleft->scalar_f16(col: 0, row) == 0.0f && cright->scalar_f16(col: 0, row) == 1.0f;
7426 break;
7427
7428 case SPIRType::Float:
7429 ret = cleft->scalar_f32(col: 0, row) == 0.0f && cright->scalar_f32(col: 0, row) == 1.0f;
7430 break;
7431
7432 case SPIRType::Double:
7433 ret = cleft->scalar_f64(col: 0, row) == 0.0 && cright->scalar_f64(col: 0, row) == 1.0;
7434 break;
7435
7436 case SPIRType::Int64:
7437 case SPIRType::UInt64:
7438 ret = cleft->scalar_u64(col: 0, row) == 0 && cright->scalar_u64(col: 0, row) == 1;
7439 break;
7440
7441 default:
7442 ret = false;
7443 break;
7444 }
7445 }
7446
7447 if (ret)
7448 op = type_to_glsl_constructor(type);
7449 return ret;
7450}
7451
7452string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
7453 uint32_t false_value)
7454{
7455 string expr;
7456 auto &lerptype = expression_type(id: select);
7457
7458 if (lerptype.vecsize == 1)
7459 expr = join(ts: to_enclosed_expression(id: select), ts: " ? ", ts: to_enclosed_pointer_expression(id: true_value), ts: " : ",
7460 ts: to_enclosed_pointer_expression(id: false_value));
7461 else
7462 {
7463 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(id: expression, index: i); };
7464
7465 expr = type_to_glsl_constructor(type: restype);
7466 expr += "(";
7467 for (uint32_t i = 0; i < restype.vecsize; i++)
7468 {
7469 expr += swiz(select, i);
7470 expr += " ? ";
7471 expr += swiz(true_value, i);
7472 expr += " : ";
7473 expr += swiz(false_value, i);
7474 if (i + 1 < restype.vecsize)
7475 expr += ", ";
7476 }
7477 expr += ")";
7478 }
7479
7480 return expr;
7481}
7482
7483void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
7484{
7485 auto &lerptype = expression_type(id: lerp);
7486 auto &restype = get<SPIRType>(id: result_type);
7487
7488 // If this results in a variable pointer, assume it may be written through.
7489 if (restype.pointer)
7490 {
7491 register_write(chain: left);
7492 register_write(chain: right);
7493 }
7494
7495 string mix_op;
7496 bool has_boolean_mix = *backend.boolean_mix_function &&
7497 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
7498 bool trivial_mix = to_trivial_mix_op(type: restype, op&: mix_op, left, right, lerp);
7499
7500 // Cannot use boolean mix when the lerp argument is just one boolean,
7501 // fall back to regular trinary statements.
7502 if (lerptype.vecsize == 1)
7503 has_boolean_mix = false;
7504
7505 // If we can reduce the mix to a simple cast, do so.
7506 // This helps for cases like int(bool), uint(bool) which is implemented with
7507 // OpSelect bool 1 0.
7508 if (trivial_mix)
7509 {
7510 emit_unary_func_op(result_type, result_id: id, op0: lerp, op: mix_op.c_str());
7511 }
7512 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
7513 {
7514 // Boolean mix not supported on desktop without extension.
7515 // Was added in OpenGL 4.5 with ES 3.1 compat.
7516 //
7517 // Could use GL_EXT_shader_integer_mix on desktop at least,
7518 // but Apple doesn't support it. :(
7519 // Just implement it as ternary expressions.
7520 auto expr = to_ternary_expression(restype: get<SPIRType>(id: result_type), select: lerp, true_value: right, false_value: left);
7521 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: left) && should_forward(id: right) && should_forward(id: lerp));
7522 inherit_expression_dependencies(dst: id, source: left);
7523 inherit_expression_dependencies(dst: id, source: right);
7524 inherit_expression_dependencies(dst: id, source: lerp);
7525 }
7526 else if (lerptype.basetype == SPIRType::Boolean)
7527 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: backend.boolean_mix_function);
7528 else
7529 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: "mix");
7530}
7531
7532string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
7533{
7534 // Keep track of the array indices we have used to load the image.
7535 // We'll need to use the same array index into the combined image sampler array.
7536 auto image_expr = to_non_uniform_aware_expression(id: image_id);
7537 string array_expr;
7538 auto array_index = image_expr.find_first_of(c: '[');
7539 if (array_index != string::npos)
7540 array_expr = image_expr.substr(pos: array_index, n: string::npos);
7541
7542 auto &args = current_function->arguments;
7543
7544 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
7545 // all possible combinations into new sampler2D uniforms.
7546 auto *image = maybe_get_backing_variable(chain: image_id);
7547 auto *samp = maybe_get_backing_variable(chain: samp_id);
7548 if (image)
7549 image_id = image->self;
7550 if (samp)
7551 samp_id = samp->self;
7552
7553 auto image_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
7554 pred: [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
7555
7556 auto sampler_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
7557 pred: [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
7558
7559 if (image_itr != end(cont&: args) || sampler_itr != end(cont&: args))
7560 {
7561 // If any parameter originates from a parameter, we will find it in our argument list.
7562 bool global_image = image_itr == end(cont&: args);
7563 bool global_sampler = sampler_itr == end(cont&: args);
7564 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(cont&: args)));
7565 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(cont&: args)));
7566
7567 auto &combined = current_function->combined_parameters;
7568 auto itr = find_if(first: begin(cont&: combined), last: end(cont&: combined), pred: [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
7569 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
7570 p.sampler_id == sid;
7571 });
7572
7573 if (itr != end(cont&: combined))
7574 return to_expression(id: itr->id) + array_expr;
7575 else
7576 {
7577 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
7578 "build_combined_image_samplers() used "
7579 "before compile() was called?");
7580 }
7581 }
7582 else
7583 {
7584 // For global sampler2D, look directly at the global remapping table.
7585 auto &mapping = combined_image_samplers;
7586 auto itr = find_if(first: begin(cont&: mapping), last: end(cont&: mapping), pred: [image_id, samp_id](const CombinedImageSampler &combined) {
7587 return combined.image_id == image_id && combined.sampler_id == samp_id;
7588 });
7589
7590 if (itr != end(cont&: combined_image_samplers))
7591 return to_expression(id: itr->combined_id) + array_expr;
7592 else
7593 {
7594 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
7595 "before compile() was called?");
7596 }
7597 }
7598}
7599
7600bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops)
7601{
7602 switch (op)
7603 {
7604 case OpGroupNonUniformElect:
7605 case OpGroupNonUniformBallot:
7606 case OpGroupNonUniformBallotFindLSB:
7607 case OpGroupNonUniformBallotFindMSB:
7608 case OpGroupNonUniformBroadcast:
7609 case OpGroupNonUniformBroadcastFirst:
7610 case OpGroupNonUniformAll:
7611 case OpGroupNonUniformAny:
7612 case OpGroupNonUniformAllEqual:
7613 case OpControlBarrier:
7614 case OpMemoryBarrier:
7615 case OpGroupNonUniformBallotBitCount:
7616 case OpGroupNonUniformBallotBitExtract:
7617 case OpGroupNonUniformInverseBallot:
7618 return true;
7619 case OpGroupNonUniformIAdd:
7620 case OpGroupNonUniformFAdd:
7621 case OpGroupNonUniformIMul:
7622 case OpGroupNonUniformFMul:
7623 {
7624 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7625 if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
7626 operation == GroupOperationExclusiveScan)
7627 {
7628 return true;
7629 }
7630 else
7631 {
7632 return false;
7633 }
7634 }
7635 default:
7636 return false;
7637 }
7638}
7639
7640void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
7641{
7642 if (options.vulkan_semantics && combined_image_samplers.empty())
7643 {
7644 emit_binary_func_op(result_type, result_id, op0: image_id, op1: samp_id,
7645 op: type_to_glsl(type: get<SPIRType>(id: result_type), id: result_id).c_str());
7646 }
7647 else
7648 {
7649 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
7650 emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forwarding: true, suppress_usage_tracking: true);
7651 }
7652
7653 // Make sure to suppress usage tracking and any expression invalidation.
7654 // It is illegal to create temporaries of opaque types.
7655 forwarded_temporaries.erase(x: result_id);
7656}
7657
7658static inline bool image_opcode_is_sample_no_dref(Op op)
7659{
7660 switch (op)
7661 {
7662 case OpImageSampleExplicitLod:
7663 case OpImageSampleImplicitLod:
7664 case OpImageSampleProjExplicitLod:
7665 case OpImageSampleProjImplicitLod:
7666 case OpImageFetch:
7667 case OpImageRead:
7668 case OpImageSparseSampleExplicitLod:
7669 case OpImageSparseSampleImplicitLod:
7670 case OpImageSparseSampleProjExplicitLod:
7671 case OpImageSparseSampleProjImplicitLod:
7672 case OpImageSparseFetch:
7673 case OpImageSparseRead:
7674 return true;
7675
7676 default:
7677 return false;
7678 }
7679}
7680
7681void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
7682 uint32_t &texel_id)
7683{
7684 // Need to allocate two temporaries.
7685 if (options.es)
7686 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
7687 require_extension_internal(ext: "GL_ARB_sparse_texture2");
7688
7689 auto &temps = extra_sub_expressions[id];
7690 if (temps == 0)
7691 temps = ir.increase_bound_by(count: 2);
7692
7693 feedback_id = temps + 0;
7694 texel_id = temps + 1;
7695
7696 auto &return_type = get<SPIRType>(id: result_type_id);
7697 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
7698 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
7699 emit_uninitialized_temporary(result_type: return_type.member_types[0], result_id: feedback_id);
7700 emit_uninitialized_temporary(result_type: return_type.member_types[1], result_id: texel_id);
7701}
7702
7703uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
7704{
7705 auto itr = extra_sub_expressions.find(x: id);
7706 if (itr == extra_sub_expressions.end())
7707 return 0;
7708 else
7709 return itr->second + 1;
7710}
7711
7712void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
7713{
7714 auto *ops = stream(instr: i);
7715 auto op = static_cast<Op>(i.op);
7716
7717 SmallVector<uint32_t> inherited_expressions;
7718
7719 uint32_t result_type_id = ops[0];
7720 uint32_t id = ops[1];
7721 auto &return_type = get<SPIRType>(id: result_type_id);
7722
7723 uint32_t sparse_code_id = 0;
7724 uint32_t sparse_texel_id = 0;
7725 if (sparse)
7726 emit_sparse_feedback_temporaries(result_type_id, id, feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
7727
7728 bool forward = false;
7729 string expr = to_texture_op(i, sparse, forward: &forward, inherited_expressions);
7730
7731 if (sparse)
7732 {
7733 statement(ts: to_expression(id: sparse_code_id), ts: " = ", ts&: expr, ts: ";");
7734 expr = join(ts: type_to_glsl(type: return_type), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ", ts: to_expression(id: sparse_texel_id),
7735 ts: ")");
7736 forward = true;
7737 inherited_expressions.clear();
7738 }
7739
7740 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
7741 for (auto &inherit : inherited_expressions)
7742 inherit_expression_dependencies(dst: id, source: inherit);
7743
7744 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
7745 switch (op)
7746 {
7747 case OpImageSampleDrefImplicitLod:
7748 case OpImageSampleImplicitLod:
7749 case OpImageSampleProjImplicitLod:
7750 case OpImageSampleProjDrefImplicitLod:
7751 register_control_dependent_expression(expr: id);
7752 break;
7753
7754 default:
7755 break;
7756 }
7757}
7758
7759std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
7760 SmallVector<uint32_t> &inherited_expressions)
7761{
7762 auto *ops = stream(instr: i);
7763 auto op = static_cast<Op>(i.op);
7764 uint32_t length = i.length;
7765
7766 uint32_t result_type_id = ops[0];
7767 VariableID img = ops[2];
7768 uint32_t coord = ops[3];
7769 uint32_t dref = 0;
7770 uint32_t comp = 0;
7771 bool gather = false;
7772 bool proj = false;
7773 bool fetch = false;
7774 bool nonuniform_expression = false;
7775 const uint32_t *opt = nullptr;
7776
7777 auto &result_type = get<SPIRType>(id: result_type_id);
7778
7779 inherited_expressions.push_back(t: coord);
7780 if (has_decoration(id: img, decoration: DecorationNonUniform) && !maybe_get_backing_variable(chain: img))
7781 nonuniform_expression = true;
7782
7783 switch (op)
7784 {
7785 case OpImageSampleDrefImplicitLod:
7786 case OpImageSampleDrefExplicitLod:
7787 case OpImageSparseSampleDrefImplicitLod:
7788 case OpImageSparseSampleDrefExplicitLod:
7789 dref = ops[4];
7790 opt = &ops[5];
7791 length -= 5;
7792 break;
7793
7794 case OpImageSampleProjDrefImplicitLod:
7795 case OpImageSampleProjDrefExplicitLod:
7796 case OpImageSparseSampleProjDrefImplicitLod:
7797 case OpImageSparseSampleProjDrefExplicitLod:
7798 dref = ops[4];
7799 opt = &ops[5];
7800 length -= 5;
7801 proj = true;
7802 break;
7803
7804 case OpImageDrefGather:
7805 case OpImageSparseDrefGather:
7806 dref = ops[4];
7807 opt = &ops[5];
7808 length -= 5;
7809 gather = true;
7810 if (options.es && options.version < 310)
7811 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
7812 else if (!options.es && options.version < 400)
7813 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
7814 break;
7815
7816 case OpImageGather:
7817 case OpImageSparseGather:
7818 comp = ops[4];
7819 opt = &ops[5];
7820 length -= 5;
7821 gather = true;
7822 if (options.es && options.version < 310)
7823 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
7824 else if (!options.es && options.version < 400)
7825 {
7826 if (!expression_is_constant_null(id: comp))
7827 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
7828 require_extension_internal(ext: "GL_ARB_texture_gather");
7829 }
7830 break;
7831
7832 case OpImageFetch:
7833 case OpImageSparseFetch:
7834 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
7835 opt = &ops[4];
7836 length -= 4;
7837 fetch = true;
7838 break;
7839
7840 case OpImageSampleProjImplicitLod:
7841 case OpImageSampleProjExplicitLod:
7842 case OpImageSparseSampleProjImplicitLod:
7843 case OpImageSparseSampleProjExplicitLod:
7844 opt = &ops[4];
7845 length -= 4;
7846 proj = true;
7847 break;
7848
7849 default:
7850 opt = &ops[4];
7851 length -= 4;
7852 break;
7853 }
7854
7855 // Bypass pointers because we need the real image struct
7856 auto &type = expression_type(id: img);
7857 auto &imgtype = get<SPIRType>(id: type.self);
7858
7859 uint32_t coord_components = 0;
7860 switch (imgtype.image.dim)
7861 {
7862 case spv::Dim1D:
7863 coord_components = 1;
7864 break;
7865 case spv::Dim2D:
7866 coord_components = 2;
7867 break;
7868 case spv::Dim3D:
7869 coord_components = 3;
7870 break;
7871 case spv::DimCube:
7872 coord_components = 3;
7873 break;
7874 case spv::DimBuffer:
7875 coord_components = 1;
7876 break;
7877 default:
7878 coord_components = 2;
7879 break;
7880 }
7881
7882 if (dref)
7883 inherited_expressions.push_back(t: dref);
7884
7885 if (proj)
7886 coord_components++;
7887 if (imgtype.image.arrayed)
7888 coord_components++;
7889
7890 uint32_t bias = 0;
7891 uint32_t lod = 0;
7892 uint32_t grad_x = 0;
7893 uint32_t grad_y = 0;
7894 uint32_t coffset = 0;
7895 uint32_t offset = 0;
7896 uint32_t coffsets = 0;
7897 uint32_t sample = 0;
7898 uint32_t minlod = 0;
7899 uint32_t flags = 0;
7900
7901 if (length)
7902 {
7903 flags = *opt++;
7904 length--;
7905 }
7906
7907 auto test = [&](uint32_t &v, uint32_t flag) {
7908 if (length && (flags & flag))
7909 {
7910 v = *opt++;
7911 inherited_expressions.push_back(t: v);
7912 length--;
7913 }
7914 };
7915
7916 test(bias, ImageOperandsBiasMask);
7917 test(lod, ImageOperandsLodMask);
7918 test(grad_x, ImageOperandsGradMask);
7919 test(grad_y, ImageOperandsGradMask);
7920 test(coffset, ImageOperandsConstOffsetMask);
7921 test(offset, ImageOperandsOffsetMask);
7922 test(coffsets, ImageOperandsConstOffsetsMask);
7923 test(sample, ImageOperandsSampleMask);
7924 test(minlod, ImageOperandsMinLodMask);
7925
7926 TextureFunctionBaseArguments base_args = {};
7927 base_args.img = img;
7928 base_args.imgtype = &imgtype;
7929 base_args.is_fetch = fetch != 0;
7930 base_args.is_gather = gather != 0;
7931 base_args.is_proj = proj != 0;
7932
7933 string expr;
7934 TextureFunctionNameArguments name_args = {};
7935
7936 name_args.base = base_args;
7937 name_args.has_array_offsets = coffsets != 0;
7938 name_args.has_offset = coffset != 0 || offset != 0;
7939 name_args.has_grad = grad_x != 0 || grad_y != 0;
7940 name_args.has_dref = dref != 0;
7941 name_args.is_sparse_feedback = sparse;
7942 name_args.has_min_lod = minlod != 0;
7943 name_args.lod = lod;
7944 expr += to_function_name(args: name_args);
7945 expr += "(";
7946
7947 uint32_t sparse_texel_id = 0;
7948 if (sparse)
7949 sparse_texel_id = get_sparse_feedback_texel_id(id: ops[1]);
7950
7951 TextureFunctionArguments args = {};
7952 args.base = base_args;
7953 args.coord = coord;
7954 args.coord_components = coord_components;
7955 args.dref = dref;
7956 args.grad_x = grad_x;
7957 args.grad_y = grad_y;
7958 args.lod = lod;
7959 args.has_array_offsets = coffsets != 0;
7960
7961 if (coffsets)
7962 args.offset = coffsets;
7963 else if (coffset)
7964 args.offset = coffset;
7965 else
7966 args.offset = offset;
7967
7968 args.bias = bias;
7969 args.component = comp;
7970 args.sample = sample;
7971 args.sparse_texel = sparse_texel_id;
7972 args.min_lod = minlod;
7973 args.nonuniform_expression = nonuniform_expression;
7974 expr += to_function_args(args, p_forward: forward);
7975 expr += ")";
7976
7977 // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
7978 if (is_legacy() && !options.es && is_depth_image(type: imgtype, id: img))
7979 expr += ".r";
7980
7981 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
7982 // Remap back to 4 components as sampling opcodes expect.
7983 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
7984 {
7985 bool image_is_depth = false;
7986 const auto *combined = maybe_get<SPIRCombinedImageSampler>(id: img);
7987 VariableID image_id = combined ? combined->image : img;
7988
7989 if (combined && is_depth_image(type: imgtype, id: combined->image))
7990 image_is_depth = true;
7991 else if (is_depth_image(type: imgtype, id: img))
7992 image_is_depth = true;
7993
7994 // We must also check the backing variable for the image.
7995 // We might have loaded an OpImage, and used that handle for two different purposes.
7996 // Once with comparison, once without.
7997 auto *image_variable = maybe_get_backing_variable(chain: image_id);
7998 if (image_variable && is_depth_image(type: get<SPIRType>(id: image_variable->basetype), id: image_variable->self))
7999 image_is_depth = true;
8000
8001 if (image_is_depth)
8002 expr = remap_swizzle(out_type: result_type, input_components: 1, expr);
8003 }
8004
8005 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
8006 {
8007 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
8008 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
8009 expr = join(ts: type_to_glsl_constructor(type: result_type), ts: "(", ts&: expr, ts: ")");
8010 }
8011
8012 // Deals with reads from MSL. We might need to downconvert to fewer components.
8013 if (op == OpImageRead)
8014 expr = remap_swizzle(out_type: result_type, input_components: 4, expr);
8015
8016 return expr;
8017}
8018
8019bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
8020{
8021 auto *c = maybe_get<SPIRConstant>(id);
8022 if (!c)
8023 return false;
8024 return c->constant_is_null();
8025}
8026
8027bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
8028{
8029 auto &type = expression_type(id: ptr);
8030 if (!is_array(type: get_pointee_type(type)))
8031 return false;
8032
8033 if (!backend.array_is_value_type)
8034 return true;
8035
8036 auto *var = maybe_get_backing_variable(chain: ptr);
8037 if (!var)
8038 return false;
8039
8040 auto &backed_type = get<SPIRType>(id: var->basetype);
8041 return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
8042 has_member_decoration(id: backed_type.self, index: 0, decoration: DecorationOffset);
8043}
8044
8045// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
8046// For some subclasses, the function is a method on the specified image.
8047string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
8048{
8049 if (args.has_min_lod)
8050 {
8051 if (options.es)
8052 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
8053 require_extension_internal(ext: "GL_ARB_sparse_texture_clamp");
8054 }
8055
8056 string fname;
8057 auto &imgtype = *args.base.imgtype;
8058 VariableID tex = args.base.img;
8059
8060 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
8061 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
8062 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
8063 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
8064 bool workaround_lod_array_shadow_as_grad = false;
8065 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
8066 is_depth_image(type: imgtype, id: tex) && args.lod && !args.base.is_fetch)
8067 {
8068 if (!expression_is_constant_null(id: args.lod))
8069 {
8070 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
8071 "expressed in GLSL.");
8072 }
8073 workaround_lod_array_shadow_as_grad = true;
8074 }
8075
8076 if (args.is_sparse_feedback)
8077 fname += "sparse";
8078
8079 if (args.base.is_fetch)
8080 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
8081 else
8082 {
8083 fname += args.is_sparse_feedback ? "Texture" : "texture";
8084
8085 if (args.base.is_gather)
8086 fname += "Gather";
8087 if (args.has_array_offsets)
8088 fname += "Offsets";
8089 if (args.base.is_proj)
8090 fname += "Proj";
8091 if (args.has_grad || workaround_lod_array_shadow_as_grad)
8092 fname += "Grad";
8093 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
8094 fname += "Lod";
8095 }
8096
8097 if (args.has_offset)
8098 fname += "Offset";
8099
8100 if (args.has_min_lod)
8101 fname += "Clamp";
8102
8103 if (args.is_sparse_feedback || args.has_min_lod)
8104 fname += "ARB";
8105
8106 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(op: fname, imgtype, tex) : fname;
8107}
8108
8109std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
8110{
8111 auto *var = maybe_get_backing_variable(chain: id);
8112
8113 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
8114 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
8115 if (var)
8116 {
8117 auto &type = get<SPIRType>(id: var->basetype);
8118 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
8119 {
8120 if (options.vulkan_semantics)
8121 {
8122 if (dummy_sampler_id)
8123 {
8124 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
8125 auto sampled_type = type;
8126 sampled_type.basetype = SPIRType::SampledImage;
8127 return join(ts: type_to_glsl(type: sampled_type), ts: "(", ts: to_non_uniform_aware_expression(id), ts: ", ",
8128 ts: to_expression(id: dummy_sampler_id), ts: ")");
8129 }
8130 else
8131 {
8132 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
8133 require_extension_internal(ext: "GL_EXT_samplerless_texture_functions");
8134 }
8135 }
8136 else
8137 {
8138 if (!dummy_sampler_id)
8139 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
8140 "build_dummy_sampler_for_combined_images() called?");
8141
8142 return to_combined_image_sampler(image_id: id, samp_id: dummy_sampler_id);
8143 }
8144 }
8145 }
8146
8147 return to_non_uniform_aware_expression(id);
8148}
8149
8150// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
8151string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
8152{
8153 VariableID img = args.base.img;
8154 auto &imgtype = *args.base.imgtype;
8155
8156 string farg_str;
8157 if (args.base.is_fetch)
8158 farg_str = convert_separate_image_to_expression(id: img);
8159 else
8160 farg_str = to_non_uniform_aware_expression(id: img);
8161
8162 if (args.nonuniform_expression && farg_str.find_first_of(c: '[') != string::npos)
8163 {
8164 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
8165 farg_str = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: farg_str, ts: ")");
8166 }
8167
8168 bool swizz_func = backend.swizzle_is_function;
8169 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
8170 if (comps == in_comps)
8171 return "";
8172
8173 switch (comps)
8174 {
8175 case 1:
8176 return ".x";
8177 case 2:
8178 return swizz_func ? ".xy()" : ".xy";
8179 case 3:
8180 return swizz_func ? ".xyz()" : ".xyz";
8181 default:
8182 return "";
8183 }
8184 };
8185
8186 bool forward = should_forward(id: args.coord);
8187
8188 // The IR can give us more components than we need, so chop them off as needed.
8189 auto swizzle_expr = swizzle(args.coord_components, expression_type(id: args.coord).vecsize);
8190 // Only enclose the UV expression if needed.
8191 auto coord_expr =
8192 (*swizzle_expr == '\0') ? to_expression(id: args.coord) : (to_enclosed_expression(id: args.coord) + swizzle_expr);
8193
8194 // texelFetch only takes int, not uint.
8195 auto &coord_type = expression_type(id: args.coord);
8196 if (coord_type.basetype == SPIRType::UInt)
8197 {
8198 auto expected_type = coord_type;
8199 expected_type.vecsize = args.coord_components;
8200 expected_type.basetype = SPIRType::Int;
8201 coord_expr = bitcast_expression(target_type: expected_type, expr_type: coord_type.basetype, expr: coord_expr);
8202 }
8203
8204 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
8205 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
8206 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
8207 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
8208 bool workaround_lod_array_shadow_as_grad =
8209 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
8210 is_depth_image(type: imgtype, id: img) && args.lod != 0 && !args.base.is_fetch;
8211
8212 if (args.dref)
8213 {
8214 forward = forward && should_forward(id: args.dref);
8215
8216 // SPIR-V splits dref and coordinate.
8217 if (args.base.is_gather ||
8218 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
8219 {
8220 farg_str += ", ";
8221 farg_str += to_expression(id: args.coord);
8222 farg_str += ", ";
8223 farg_str += to_expression(id: args.dref);
8224 }
8225 else if (args.base.is_proj)
8226 {
8227 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
8228 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
8229 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
8230 farg_str += ", vec4(";
8231
8232 if (imgtype.image.dim == Dim1D)
8233 {
8234 // Could reuse coord_expr, but we will mess up the temporary usage checking.
8235 farg_str += to_enclosed_expression(id: args.coord) + ".x";
8236 farg_str += ", ";
8237 farg_str += "0.0, ";
8238 farg_str += to_expression(id: args.dref);
8239 farg_str += ", ";
8240 farg_str += to_enclosed_expression(id: args.coord) + ".y)";
8241 }
8242 else if (imgtype.image.dim == Dim2D)
8243 {
8244 // Could reuse coord_expr, but we will mess up the temporary usage checking.
8245 farg_str += to_enclosed_expression(id: args.coord) + (swizz_func ? ".xy()" : ".xy");
8246 farg_str += ", ";
8247 farg_str += to_expression(id: args.dref);
8248 farg_str += ", ";
8249 farg_str += to_enclosed_expression(id: args.coord) + ".z)";
8250 }
8251 else
8252 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
8253 }
8254 else
8255 {
8256 // Create a composite which merges coord/dref into a single vector.
8257 auto type = expression_type(id: args.coord);
8258 type.vecsize = args.coord_components + 1;
8259 if (imgtype.image.dim == Dim1D && options.es)
8260 type.vecsize++;
8261 farg_str += ", ";
8262 farg_str += type_to_glsl_constructor(type);
8263 farg_str += "(";
8264
8265 if (imgtype.image.dim == Dim1D && options.es)
8266 {
8267 if (imgtype.image.arrayed)
8268 {
8269 farg_str += enclose_expression(expr: coord_expr) + ".x";
8270 farg_str += ", 0.0, ";
8271 farg_str += enclose_expression(expr: coord_expr) + ".y";
8272 }
8273 else
8274 {
8275 farg_str += coord_expr;
8276 farg_str += ", 0.0";
8277 }
8278 }
8279 else
8280 farg_str += coord_expr;
8281
8282 farg_str += ", ";
8283 farg_str += to_expression(id: args.dref);
8284 farg_str += ")";
8285 }
8286 }
8287 else
8288 {
8289 if (imgtype.image.dim == Dim1D && options.es)
8290 {
8291 // Have to fake a second coordinate.
8292 if (type_is_floating_point(type: coord_type))
8293 {
8294 // Cannot mix proj and array.
8295 if (imgtype.image.arrayed || args.base.is_proj)
8296 {
8297 coord_expr = join(ts: "vec3(", ts: enclose_expression(expr: coord_expr), ts: ".x, 0.0, ",
8298 ts: enclose_expression(expr: coord_expr), ts: ".y)");
8299 }
8300 else
8301 coord_expr = join(ts: "vec2(", ts&: coord_expr, ts: ", 0.0)");
8302 }
8303 else
8304 {
8305 if (imgtype.image.arrayed)
8306 {
8307 coord_expr = join(ts: "ivec3(", ts: enclose_expression(expr: coord_expr),
8308 ts: ".x, 0, ",
8309 ts: enclose_expression(expr: coord_expr), ts: ".y)");
8310 }
8311 else
8312 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
8313 }
8314 }
8315
8316 farg_str += ", ";
8317 farg_str += coord_expr;
8318 }
8319
8320 if (args.grad_x || args.grad_y)
8321 {
8322 forward = forward && should_forward(id: args.grad_x);
8323 forward = forward && should_forward(id: args.grad_y);
8324 farg_str += ", ";
8325 farg_str += to_expression(id: args.grad_x);
8326 farg_str += ", ";
8327 farg_str += to_expression(id: args.grad_y);
8328 }
8329
8330 if (args.lod)
8331 {
8332 if (workaround_lod_array_shadow_as_grad)
8333 {
8334 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
8335 // Implementing this as plain texture() is not safe on some implementations.
8336 if (imgtype.image.dim == Dim2D)
8337 farg_str += ", vec2(0.0), vec2(0.0)";
8338 else if (imgtype.image.dim == DimCube)
8339 farg_str += ", vec3(0.0), vec3(0.0)";
8340 }
8341 else
8342 {
8343 forward = forward && should_forward(id: args.lod);
8344 farg_str += ", ";
8345
8346 // Lod expression for TexelFetch in GLSL must be int, and only int.
8347 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
8348 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.lod);
8349 else
8350 farg_str += to_expression(id: args.lod);
8351 }
8352 }
8353 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
8354 {
8355 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
8356 farg_str += ", 0";
8357 }
8358
8359 if (args.offset)
8360 {
8361 forward = forward && should_forward(id: args.offset);
8362 farg_str += ", ";
8363 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.offset);
8364 }
8365
8366 if (args.sample)
8367 {
8368 farg_str += ", ";
8369 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.sample);
8370 }
8371
8372 if (args.min_lod)
8373 {
8374 farg_str += ", ";
8375 farg_str += to_expression(id: args.min_lod);
8376 }
8377
8378 if (args.sparse_texel)
8379 {
8380 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
8381 farg_str += ", ";
8382 farg_str += to_expression(id: args.sparse_texel);
8383 }
8384
8385 if (args.bias)
8386 {
8387 forward = forward && should_forward(id: args.bias);
8388 farg_str += ", ";
8389 farg_str += to_expression(id: args.bias);
8390 }
8391
8392 if (args.component && !expression_is_constant_null(id: args.component))
8393 {
8394 forward = forward && should_forward(id: args.component);
8395 farg_str += ", ";
8396 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.component);
8397 }
8398
8399 *p_forward = forward;
8400
8401 return farg_str;
8402}
8403
8404Op CompilerGLSL::get_remapped_spirv_op(Op op) const
8405{
8406 if (options.relax_nan_checks)
8407 {
8408 switch (op)
8409 {
8410 case OpFUnordLessThan:
8411 op = OpFOrdLessThan;
8412 break;
8413 case OpFUnordLessThanEqual:
8414 op = OpFOrdLessThanEqual;
8415 break;
8416 case OpFUnordGreaterThan:
8417 op = OpFOrdGreaterThan;
8418 break;
8419 case OpFUnordGreaterThanEqual:
8420 op = OpFOrdGreaterThanEqual;
8421 break;
8422 case OpFUnordEqual:
8423 op = OpFOrdEqual;
8424 break;
8425 case OpFOrdNotEqual:
8426 op = OpFUnordNotEqual;
8427 break;
8428
8429 default:
8430 break;
8431 }
8432 }
8433
8434 return op;
8435}
8436
8437GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
8438{
8439 // Relax to non-NaN aware opcodes.
8440 if (options.relax_nan_checks)
8441 {
8442 switch (std450_op)
8443 {
8444 case GLSLstd450NClamp:
8445 std450_op = GLSLstd450FClamp;
8446 break;
8447 case GLSLstd450NMin:
8448 std450_op = GLSLstd450FMin;
8449 break;
8450 case GLSLstd450NMax:
8451 std450_op = GLSLstd450FMax;
8452 break;
8453 default:
8454 break;
8455 }
8456 }
8457
8458 return std450_op;
8459}
8460
8461void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
8462{
8463 auto op = static_cast<GLSLstd450>(eop);
8464
8465 if (is_legacy() && is_unsigned_glsl_opcode(op))
8466 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
8467
8468 // If we need to do implicit bitcasts, make sure we do it with the correct type.
8469 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length);
8470 auto int_type = to_signed_basetype(width: integer_width);
8471 auto uint_type = to_unsigned_basetype(width: integer_width);
8472
8473 op = get_remapped_glsl_op(std450_op: op);
8474
8475 switch (op)
8476 {
8477 // FP fiddling
8478 case GLSLstd450Round:
8479 if (!is_legacy())
8480 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
8481 else
8482 {
8483 auto op0 = to_enclosed_expression(id: args[0]);
8484 auto &op0_type = expression_type(id: args[0]);
8485 auto expr = join(ts: "floor(", ts&: op0, ts: " + ", ts: type_to_glsl_constructor(type: op0_type), ts: "(0.5))");
8486 bool forward = should_forward(id: args[0]);
8487 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8488 inherit_expression_dependencies(dst: id, source: args[0]);
8489 }
8490 break;
8491
8492 case GLSLstd450RoundEven:
8493 if (!is_legacy())
8494 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "roundEven");
8495 else if (!options.es)
8496 {
8497 // This extension provides round() with round-to-even semantics.
8498 require_extension_internal(ext: "GL_EXT_gpu_shader4");
8499 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
8500 }
8501 else
8502 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
8503 break;
8504
8505 case GLSLstd450Trunc:
8506 if (!is_legacy())
8507 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "trunc");
8508 else
8509 {
8510 // Implement by value-casting to int and back.
8511 bool forward = should_forward(id: args[0]);
8512 auto op0 = to_unpacked_expression(id: args[0]);
8513 auto &op0_type = expression_type(id: args[0]);
8514 auto via_type = op0_type;
8515 via_type.basetype = SPIRType::Int;
8516 auto expr = join(ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts&: op0, ts: "))");
8517 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8518 inherit_expression_dependencies(dst: id, source: args[0]);
8519 }
8520 break;
8521
8522 case GLSLstd450SAbs:
8523 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "abs", input_type: int_type, expected_result_type: int_type);
8524 break;
8525 case GLSLstd450FAbs:
8526 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "abs");
8527 break;
8528 case GLSLstd450SSign:
8529 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "sign", input_type: int_type, expected_result_type: int_type);
8530 break;
8531 case GLSLstd450FSign:
8532 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sign");
8533 break;
8534 case GLSLstd450Floor:
8535 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "floor");
8536 break;
8537 case GLSLstd450Ceil:
8538 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "ceil");
8539 break;
8540 case GLSLstd450Fract:
8541 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "fract");
8542 break;
8543 case GLSLstd450Radians:
8544 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "radians");
8545 break;
8546 case GLSLstd450Degrees:
8547 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "degrees");
8548 break;
8549 case GLSLstd450Fma:
8550 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
8551 {
8552 auto expr = join(ts: to_enclosed_expression(id: args[0]), ts: " * ", ts: to_enclosed_expression(id: args[1]), ts: " + ",
8553 ts: to_enclosed_expression(id: args[2]));
8554
8555 emit_op(result_type, result_id: id, rhs: expr,
8556 forwarding: should_forward(id: args[0]) && should_forward(id: args[1]) && should_forward(id: args[2]));
8557 for (uint32_t i = 0; i < 3; i++)
8558 inherit_expression_dependencies(dst: id, source: args[i]);
8559 }
8560 else
8561 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "fma");
8562 break;
8563
8564 case GLSLstd450Modf:
8565 register_call_out_argument(id: args[1]);
8566 if (!is_legacy())
8567 {
8568 forced_temporaries.insert(x: id);
8569 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "modf");
8570 }
8571 else
8572 {
8573 //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
8574 auto &op1_type = expression_type(id: args[1]);
8575 auto via_type = op1_type;
8576 via_type.basetype = SPIRType::Int;
8577 statement(ts: to_expression(id: args[1]), ts: " = ",
8578 ts: type_to_glsl(type: op1_type), ts: "(", ts: type_to_glsl(type: via_type),
8579 ts: "(", ts: to_expression(id: args[0]), ts: "));");
8580 emit_binary_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "-");
8581 }
8582 break;
8583
8584 case GLSLstd450ModfStruct:
8585 {
8586 auto &type = get<SPIRType>(id: result_type);
8587 emit_uninitialized_temporary_expression(type: result_type, id);
8588 if (!is_legacy())
8589 {
8590 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "modf(", ts: to_expression(id: args[0]), ts: ", ",
8591 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
8592 }
8593 else
8594 {
8595 //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
8596 auto &op0_type = expression_type(id: args[0]);
8597 auto via_type = op0_type;
8598 via_type.basetype = SPIRType::Int;
8599 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: " = ", ts: type_to_glsl(type: op0_type),
8600 ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts: to_expression(id: args[0]), ts: "));");
8601 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: to_enclosed_expression(id: args[0]), ts: " - ",
8602 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ";");
8603 }
8604 break;
8605 }
8606
8607 // Minmax
8608 case GLSLstd450UMin:
8609 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: uint_type, skip_cast_if_equal_type: false);
8610 break;
8611
8612 case GLSLstd450SMin:
8613 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: int_type, skip_cast_if_equal_type: false);
8614 break;
8615
8616 case GLSLstd450FMin:
8617 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "min");
8618 break;
8619
8620 case GLSLstd450FMax:
8621 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "max");
8622 break;
8623
8624 case GLSLstd450UMax:
8625 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: uint_type, skip_cast_if_equal_type: false);
8626 break;
8627
8628 case GLSLstd450SMax:
8629 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: int_type, skip_cast_if_equal_type: false);
8630 break;
8631
8632 case GLSLstd450FClamp:
8633 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp");
8634 break;
8635
8636 case GLSLstd450UClamp:
8637 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: uint_type);
8638 break;
8639
8640 case GLSLstd450SClamp:
8641 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: int_type);
8642 break;
8643
8644 // Trig
8645 case GLSLstd450Sin:
8646 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sin");
8647 break;
8648 case GLSLstd450Cos:
8649 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cos");
8650 break;
8651 case GLSLstd450Tan:
8652 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tan");
8653 break;
8654 case GLSLstd450Asin:
8655 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asin");
8656 break;
8657 case GLSLstd450Acos:
8658 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acos");
8659 break;
8660 case GLSLstd450Atan:
8661 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atan");
8662 break;
8663 case GLSLstd450Sinh:
8664 if (!is_legacy())
8665 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sinh");
8666 else
8667 {
8668 bool forward = should_forward(id: args[0]);
8669 auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") - exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5");
8670 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8671 inherit_expression_dependencies(dst: id, source: args[0]);
8672 }
8673 break;
8674 case GLSLstd450Cosh:
8675 if (!is_legacy())
8676 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cosh");
8677 else
8678 {
8679 bool forward = should_forward(id: args[0]);
8680 auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") + exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5");
8681 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8682 inherit_expression_dependencies(dst: id, source: args[0]);
8683 }
8684 break;
8685 case GLSLstd450Tanh:
8686 if (!is_legacy())
8687 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tanh");
8688 else
8689 {
8690 // Create temporaries to store the result of exp(arg) and exp(-arg).
8691 uint32_t &ids = extra_sub_expressions[id];
8692 if (!ids)
8693 {
8694 ids = ir.increase_bound_by(count: 2);
8695
8696 // Inherit precision qualifier (legacy has no NoContraction).
8697 if (has_decoration(id, decoration: DecorationRelaxedPrecision))
8698 {
8699 set_decoration(id: ids, decoration: DecorationRelaxedPrecision);
8700 set_decoration(id: ids + 1, decoration: DecorationRelaxedPrecision);
8701 }
8702 }
8703 uint32_t epos_id = ids;
8704 uint32_t eneg_id = ids + 1;
8705
8706 emit_op(result_type, result_id: epos_id, rhs: join(ts: "exp(", ts: to_expression(id: args[0]), ts: ")"), forwarding: false);
8707 emit_op(result_type, result_id: eneg_id, rhs: join(ts: "exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")"), forwarding: false);
8708 inherit_expression_dependencies(dst: epos_id, source: args[0]);
8709 inherit_expression_dependencies(dst: eneg_id, source: args[0]);
8710
8711 auto expr = join(ts: "(", ts: to_enclosed_expression(id: epos_id), ts: " - ", ts: to_enclosed_expression(id: eneg_id), ts: ") / "
8712 "(", ts: to_enclosed_expression(id: epos_id), ts: " + ", ts: to_enclosed_expression(id: eneg_id), ts: ")");
8713 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
8714 inherit_expression_dependencies(dst: id, source: epos_id);
8715 inherit_expression_dependencies(dst: id, source: eneg_id);
8716 }
8717 break;
8718 case GLSLstd450Asinh:
8719 if (!is_legacy())
8720 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asinh");
8721 else
8722 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Asinh);
8723 break;
8724 case GLSLstd450Acosh:
8725 if (!is_legacy())
8726 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acosh");
8727 else
8728 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Acosh);
8729 break;
8730 case GLSLstd450Atanh:
8731 if (!is_legacy())
8732 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atanh");
8733 else
8734 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Atanh);
8735 break;
8736 case GLSLstd450Atan2:
8737 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "atan");
8738 break;
8739
8740 // Exponentials
8741 case GLSLstd450Pow:
8742 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "pow");
8743 break;
8744 case GLSLstd450Exp:
8745 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp");
8746 break;
8747 case GLSLstd450Log:
8748 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log");
8749 break;
8750 case GLSLstd450Exp2:
8751 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp2");
8752 break;
8753 case GLSLstd450Log2:
8754 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log2");
8755 break;
8756 case GLSLstd450Sqrt:
8757 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sqrt");
8758 break;
8759 case GLSLstd450InverseSqrt:
8760 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inversesqrt");
8761 break;
8762
8763 // Matrix math
8764 case GLSLstd450Determinant:
8765 {
8766 // No need to transpose - it doesn't affect the determinant
8767 auto *e = maybe_get<SPIRExpression>(id: args[0]);
8768 bool old_transpose = e && e->need_transpose;
8769 if (old_transpose)
8770 e->need_transpose = false;
8771
8772 if (options.version < 150) // also matches ES 100
8773 {
8774 auto &type = expression_type(id: args[0]);
8775 assert(type.vecsize >= 2 && type.vecsize <= 4);
8776 assert(type.vecsize == type.columns);
8777
8778 // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid
8779 if (type.basetype != SPIRType::Float)
8780 SPIRV_CROSS_THROW("Unsupported type for matrix determinant");
8781
8782 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8783 require_polyfill(polyfill: static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)),
8784 relaxed);
8785 emit_unary_func_op(result_type, result_id: id, op0: args[0],
8786 op: (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant");
8787 }
8788 else
8789 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "determinant");
8790
8791 if (old_transpose)
8792 e->need_transpose = true;
8793 break;
8794 }
8795
8796 case GLSLstd450MatrixInverse:
8797 {
8798 // The inverse of the transpose is the same as the transpose of
8799 // the inverse, so we can just flip need_transpose of the result.
8800 auto *a = maybe_get<SPIRExpression>(id: args[0]);
8801 bool old_transpose = a && a->need_transpose;
8802 if (old_transpose)
8803 a->need_transpose = false;
8804
8805 const char *func = "inverse";
8806 if (options.version < 140) // also matches ES 100
8807 {
8808 auto &type = get<SPIRType>(id: result_type);
8809 assert(type.vecsize >= 2 && type.vecsize <= 4);
8810 assert(type.vecsize == type.columns);
8811
8812 // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid
8813 if (type.basetype != SPIRType::Float)
8814 SPIRV_CROSS_THROW("Unsupported type for matrix inverse");
8815
8816 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8817 require_polyfill(polyfill: static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)),
8818 relaxed);
8819 func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse";
8820 }
8821
8822 bool forward = should_forward(id: args[0]);
8823 auto &e = emit_op(result_type, result_id: id, rhs: join(ts&: func, ts: "(", ts: to_unpacked_expression(id: args[0]), ts: ")"), forwarding: forward);
8824 inherit_expression_dependencies(dst: id, source: args[0]);
8825
8826 if (old_transpose)
8827 {
8828 e.need_transpose = true;
8829 a->need_transpose = true;
8830 }
8831 break;
8832 }
8833
8834 // Lerping
8835 case GLSLstd450FMix:
8836 case GLSLstd450IMix:
8837 {
8838 emit_mix_op(result_type, id, left: args[0], right: args[1], lerp: args[2]);
8839 break;
8840 }
8841 case GLSLstd450Step:
8842 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "step");
8843 break;
8844 case GLSLstd450SmoothStep:
8845 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "smoothstep");
8846 break;
8847
8848 // Packing
8849 case GLSLstd450Frexp:
8850 register_call_out_argument(id: args[1]);
8851 forced_temporaries.insert(x: id);
8852 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "frexp");
8853 break;
8854
8855 case GLSLstd450FrexpStruct:
8856 {
8857 auto &type = get<SPIRType>(id: result_type);
8858 emit_uninitialized_temporary_expression(type: result_type, id);
8859 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "frexp(", ts: to_expression(id: args[0]), ts: ", ",
8860 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
8861 break;
8862 }
8863
8864 case GLSLstd450Ldexp:
8865 {
8866 bool forward = should_forward(id: args[0]) && should_forward(id: args[1]);
8867
8868 auto op0 = to_unpacked_expression(id: args[0]);
8869 auto op1 = to_unpacked_expression(id: args[1]);
8870 auto &op1_type = expression_type(id: args[1]);
8871 if (op1_type.basetype != SPIRType::Int)
8872 {
8873 // Need a value cast here.
8874 auto target_type = op1_type;
8875 target_type.basetype = SPIRType::Int;
8876 op1 = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op1, ts: ")");
8877 }
8878
8879 auto expr = join(ts: "ldexp(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
8880
8881 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8882 inherit_expression_dependencies(dst: id, source: args[0]);
8883 inherit_expression_dependencies(dst: id, source: args[1]);
8884 break;
8885 }
8886
8887 case GLSLstd450PackSnorm4x8:
8888 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm4x8");
8889 break;
8890 case GLSLstd450PackUnorm4x8:
8891 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm4x8");
8892 break;
8893 case GLSLstd450PackSnorm2x16:
8894 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm2x16");
8895 break;
8896 case GLSLstd450PackUnorm2x16:
8897 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm2x16");
8898 break;
8899 case GLSLstd450PackHalf2x16:
8900 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packHalf2x16");
8901 break;
8902 case GLSLstd450UnpackSnorm4x8:
8903 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm4x8");
8904 break;
8905 case GLSLstd450UnpackUnorm4x8:
8906 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm4x8");
8907 break;
8908 case GLSLstd450UnpackSnorm2x16:
8909 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm2x16");
8910 break;
8911 case GLSLstd450UnpackUnorm2x16:
8912 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm2x16");
8913 break;
8914 case GLSLstd450UnpackHalf2x16:
8915 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackHalf2x16");
8916 break;
8917
8918 case GLSLstd450PackDouble2x32:
8919 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packDouble2x32");
8920 break;
8921 case GLSLstd450UnpackDouble2x32:
8922 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackDouble2x32");
8923 break;
8924
8925 // Vector math
8926 case GLSLstd450Length:
8927 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "length");
8928 break;
8929 case GLSLstd450Distance:
8930 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "distance");
8931 break;
8932 case GLSLstd450Cross:
8933 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "cross");
8934 break;
8935 case GLSLstd450Normalize:
8936 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "normalize");
8937 break;
8938 case GLSLstd450FaceForward:
8939 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "faceforward");
8940 break;
8941 case GLSLstd450Reflect:
8942 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "reflect");
8943 break;
8944 case GLSLstd450Refract:
8945 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "refract");
8946 break;
8947
8948 // Bit-fiddling
8949 case GLSLstd450FindILsb:
8950 // findLSB always returns int.
8951 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findLSB", input_type: expression_type(id: args[0]).basetype, expected_result_type: int_type);
8952 break;
8953
8954 case GLSLstd450FindSMsb:
8955 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: int_type, expected_result_type: int_type);
8956 break;
8957
8958 case GLSLstd450FindUMsb:
8959 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: uint_type,
8960 expected_result_type: int_type); // findMSB always returns int.
8961 break;
8962
8963 // Multisampled varying
8964 case GLSLstd450InterpolateAtCentroid:
8965 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "interpolateAtCentroid");
8966 break;
8967 case GLSLstd450InterpolateAtSample:
8968 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtSample");
8969 break;
8970 case GLSLstd450InterpolateAtOffset:
8971 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtOffset");
8972 break;
8973
8974 case GLSLstd450NMin:
8975 case GLSLstd450NMax:
8976 {
8977 if (options.vulkan_semantics)
8978 {
8979 require_extension_internal(ext: "GL_EXT_spirv_intrinsics");
8980 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8981 Polyfill poly = {};
8982 switch (get<SPIRType>(id: result_type).width)
8983 {
8984 case 16:
8985 poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16;
8986 break;
8987
8988 case 32:
8989 poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32;
8990 break;
8991
8992 case 64:
8993 poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64;
8994 break;
8995
8996 default:
8997 SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
8998 }
8999
9000 require_polyfill(polyfill: poly, relaxed);
9001
9002 // Function return decorations are broken, so need to do double polyfill.
9003 if (relaxed)
9004 require_polyfill(polyfill: poly, relaxed: false);
9005
9006 const char *op_str;
9007 if (relaxed)
9008 op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed";
9009 else
9010 op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax";
9011
9012 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: op_str);
9013 }
9014 else
9015 {
9016 emit_nminmax_op(result_type, id, op0: args[0], op1: args[1], op);
9017 }
9018 break;
9019 }
9020
9021 case GLSLstd450NClamp:
9022 {
9023 if (options.vulkan_semantics)
9024 {
9025 require_extension_internal(ext: "GL_EXT_spirv_intrinsics");
9026 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
9027 Polyfill poly = {};
9028 switch (get<SPIRType>(id: result_type).width)
9029 {
9030 case 16:
9031 poly = PolyfillNClamp16;
9032 break;
9033
9034 case 32:
9035 poly = PolyfillNClamp32;
9036 break;
9037
9038 case 64:
9039 poly = PolyfillNClamp64;
9040 break;
9041
9042 default:
9043 SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
9044 }
9045
9046 require_polyfill(polyfill: poly, relaxed);
9047
9048 // Function return decorations are broken, so need to do double polyfill.
9049 if (relaxed)
9050 require_polyfill(polyfill: poly, relaxed: false);
9051
9052 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: relaxed ? "spvNClampRelaxed" : "spvNClamp");
9053 }
9054 else
9055 {
9056 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
9057 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
9058 uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
9059 if (!max_id)
9060 max_id = ir.increase_bound_by(count: 1);
9061
9062 // Inherit precision qualifiers.
9063 ir.meta[max_id] = ir.meta[id];
9064
9065 emit_nminmax_op(result_type, id: max_id, op0: args[0], op1: args[1], op: GLSLstd450NMax);
9066 emit_nminmax_op(result_type, id, op0: max_id, op1: args[2], op: GLSLstd450NMin);
9067 }
9068 break;
9069 }
9070
9071 default:
9072 statement(ts: "// unimplemented GLSL op ", ts&: eop);
9073 break;
9074 }
9075}
9076
9077void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
9078{
9079 // Need to emulate this call.
9080 uint32_t &ids = extra_sub_expressions[id];
9081 if (!ids)
9082 {
9083 ids = ir.increase_bound_by(count: 5);
9084 auto btype = get<SPIRType>(id: result_type);
9085 btype.basetype = SPIRType::Boolean;
9086 set<SPIRType>(id: ids, args&: btype);
9087 }
9088
9089 uint32_t btype_id = ids + 0;
9090 uint32_t left_nan_id = ids + 1;
9091 uint32_t right_nan_id = ids + 2;
9092 uint32_t tmp_id = ids + 3;
9093 uint32_t mixed_first_id = ids + 4;
9094
9095 // Inherit precision qualifiers.
9096 ir.meta[tmp_id] = ir.meta[id];
9097 ir.meta[mixed_first_id] = ir.meta[id];
9098
9099 if (!is_legacy())
9100 {
9101 emit_unary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op: "isnan");
9102 emit_unary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op: "isnan");
9103 }
9104 else if (expression_type(id: op0).vecsize > 1)
9105 {
9106 // If the number doesn't equal itself, it must be NaN
9107 emit_binary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "notEqual");
9108 emit_binary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "notEqual");
9109 }
9110 else
9111 {
9112 emit_binary_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "!=");
9113 emit_binary_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "!=");
9114 }
9115 emit_binary_func_op(result_type, result_id: tmp_id, op0, op1, op: op == GLSLstd450NMin ? "min" : "max");
9116 emit_mix_op(result_type, id: mixed_first_id, left: tmp_id, right: op1, lerp: left_nan_id);
9117 emit_mix_op(result_type, id, left: mixed_first_id, right: op0, lerp: right_nan_id);
9118}
9119
9120void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
9121{
9122 const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
9123 std::string expr;
9124 bool forward = should_forward(id: op0);
9125
9126 switch (op)
9127 {
9128 case GLSLstd450Asinh:
9129 expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(",
9130 ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " + ", ts&: one, ts: "))");
9131 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
9132 break;
9133
9134 case GLSLstd450Acosh:
9135 expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(",
9136 ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " - ", ts&: one, ts: "))");
9137 break;
9138
9139 case GLSLstd450Atanh:
9140 expr = join(ts: "log((", ts&: one, ts: " + ", ts: to_enclosed_expression(id: op0), ts: ") / "
9141 "(", ts&: one, ts: " - ", ts: to_enclosed_expression(id: op0), ts: ")) * 0.5",
9142 ts: backend.float_literal_suffix ? "f" : "");
9143 break;
9144
9145 default:
9146 SPIRV_CROSS_THROW("Invalid op.");
9147 }
9148
9149 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
9150 inherit_expression_dependencies(dst: id, source: op0);
9151}
9152
9153void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
9154 uint32_t)
9155{
9156 require_extension_internal(ext: "GL_AMD_shader_ballot");
9157
9158 enum AMDShaderBallot
9159 {
9160 SwizzleInvocationsAMD = 1,
9161 SwizzleInvocationsMaskedAMD = 2,
9162 WriteInvocationAMD = 3,
9163 MbcntAMD = 4
9164 };
9165
9166 auto op = static_cast<AMDShaderBallot>(eop);
9167
9168 switch (op)
9169 {
9170 case SwizzleInvocationsAMD:
9171 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsAMD");
9172 register_control_dependent_expression(expr: id);
9173 break;
9174
9175 case SwizzleInvocationsMaskedAMD:
9176 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsMaskedAMD");
9177 register_control_dependent_expression(expr: id);
9178 break;
9179
9180 case WriteInvocationAMD:
9181 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "writeInvocationAMD");
9182 register_control_dependent_expression(expr: id);
9183 break;
9184
9185 case MbcntAMD:
9186 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "mbcntAMD");
9187 register_control_dependent_expression(expr: id);
9188 break;
9189
9190 default:
9191 statement(ts: "// unimplemented SPV AMD shader ballot op ", ts&: eop);
9192 break;
9193 }
9194}
9195
9196void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
9197 const uint32_t *args, uint32_t)
9198{
9199 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
9200
9201 enum AMDShaderExplicitVertexParameter
9202 {
9203 InterpolateAtVertexAMD = 1
9204 };
9205
9206 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
9207
9208 switch (op)
9209 {
9210 case InterpolateAtVertexAMD:
9211 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtVertexAMD");
9212 break;
9213
9214 default:
9215 statement(ts: "// unimplemented SPV AMD shader explicit vertex parameter op ", ts&: eop);
9216 break;
9217 }
9218}
9219
9220void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
9221 const uint32_t *args, uint32_t)
9222{
9223 require_extension_internal(ext: "GL_AMD_shader_trinary_minmax");
9224
9225 enum AMDShaderTrinaryMinMax
9226 {
9227 FMin3AMD = 1,
9228 UMin3AMD = 2,
9229 SMin3AMD = 3,
9230 FMax3AMD = 4,
9231 UMax3AMD = 5,
9232 SMax3AMD = 6,
9233 FMid3AMD = 7,
9234 UMid3AMD = 8,
9235 SMid3AMD = 9
9236 };
9237
9238 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
9239
9240 switch (op)
9241 {
9242 case FMin3AMD:
9243 case UMin3AMD:
9244 case SMin3AMD:
9245 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "min3");
9246 break;
9247
9248 case FMax3AMD:
9249 case UMax3AMD:
9250 case SMax3AMD:
9251 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "max3");
9252 break;
9253
9254 case FMid3AMD:
9255 case UMid3AMD:
9256 case SMid3AMD:
9257 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "mid3");
9258 break;
9259
9260 default:
9261 statement(ts: "// unimplemented SPV AMD shader trinary minmax op ", ts&: eop);
9262 break;
9263 }
9264}
9265
9266void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
9267 uint32_t)
9268{
9269 require_extension_internal(ext: "GL_AMD_gcn_shader");
9270
9271 enum AMDGCNShader
9272 {
9273 CubeFaceIndexAMD = 1,
9274 CubeFaceCoordAMD = 2,
9275 TimeAMD = 3
9276 };
9277
9278 auto op = static_cast<AMDGCNShader>(eop);
9279
9280 switch (op)
9281 {
9282 case CubeFaceIndexAMD:
9283 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceIndexAMD");
9284 break;
9285 case CubeFaceCoordAMD:
9286 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceCoordAMD");
9287 break;
9288 case TimeAMD:
9289 {
9290 string expr = "timeAMD()";
9291 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
9292 register_control_dependent_expression(expr: id);
9293 break;
9294 }
9295
9296 default:
9297 statement(ts: "// unimplemented SPV AMD gcn shader op ", ts&: eop);
9298 break;
9299 }
9300}
9301
9302void CompilerGLSL::emit_subgroup_op(const Instruction &i)
9303{
9304 const uint32_t *ops = stream(instr: i);
9305 auto op = static_cast<Op>(i.op);
9306
9307 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
9308 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
9309
9310 // If we need to do implicit bitcasts, make sure we do it with the correct type.
9311 uint32_t integer_width = get_integer_width_for_instruction(instr: i);
9312 auto int_type = to_signed_basetype(width: integer_width);
9313 auto uint_type = to_unsigned_basetype(width: integer_width);
9314
9315 switch (op)
9316 {
9317 case OpGroupNonUniformElect:
9318 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupElect);
9319 break;
9320
9321 case OpGroupNonUniformBallotBitCount:
9322 {
9323 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
9324 if (operation == GroupOperationReduce)
9325 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
9326 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
9327 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
9328 }
9329 break;
9330
9331 case OpGroupNonUniformBallotBitExtract:
9332 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
9333 break;
9334
9335 case OpGroupNonUniformInverseBallot:
9336 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
9337 break;
9338
9339 case OpGroupNonUniformBallot:
9340 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallot);
9341 break;
9342
9343 case OpGroupNonUniformBallotFindLSB:
9344 case OpGroupNonUniformBallotFindMSB:
9345 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
9346 break;
9347
9348 case OpGroupNonUniformBroadcast:
9349 case OpGroupNonUniformBroadcastFirst:
9350 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
9351 break;
9352
9353 case OpGroupNonUniformShuffle:
9354 case OpGroupNonUniformShuffleXor:
9355 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle");
9356 break;
9357
9358 case OpGroupNonUniformShuffleUp:
9359 case OpGroupNonUniformShuffleDown:
9360 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle_relative");
9361 break;
9362
9363 case OpGroupNonUniformAll:
9364 case OpGroupNonUniformAny:
9365 case OpGroupNonUniformAllEqual:
9366 {
9367 const SPIRType &type = expression_type(id: ops[3]);
9368 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
9369 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
9370 else
9371 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAllEqualT);
9372 }
9373 break;
9374
9375 // clang-format off
9376#define GLSL_GROUP_OP(OP)\
9377 case OpGroupNonUniform##OP:\
9378 {\
9379 auto operation = static_cast<GroupOperation>(ops[3]);\
9380 if (operation == GroupOperationClusteredReduce)\
9381 require_extension_internal("GL_KHR_shader_subgroup_clustered");\
9382 else if (operation == GroupOperationReduce)\
9383 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
9384 else if (operation == GroupOperationExclusiveScan)\
9385 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
9386 else if (operation == GroupOperationInclusiveScan)\
9387 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
9388 else\
9389 SPIRV_CROSS_THROW("Invalid group operation.");\
9390 break;\
9391 }
9392
9393 GLSL_GROUP_OP(IAdd)
9394 GLSL_GROUP_OP(FAdd)
9395 GLSL_GROUP_OP(IMul)
9396 GLSL_GROUP_OP(FMul)
9397
9398#undef GLSL_GROUP_OP
9399 // clang-format on
9400
9401 case OpGroupNonUniformFMin:
9402 case OpGroupNonUniformFMax:
9403 case OpGroupNonUniformSMin:
9404 case OpGroupNonUniformSMax:
9405 case OpGroupNonUniformUMin:
9406 case OpGroupNonUniformUMax:
9407 case OpGroupNonUniformBitwiseAnd:
9408 case OpGroupNonUniformBitwiseOr:
9409 case OpGroupNonUniformBitwiseXor:
9410 case OpGroupNonUniformLogicalAnd:
9411 case OpGroupNonUniformLogicalOr:
9412 case OpGroupNonUniformLogicalXor:
9413 {
9414 auto operation = static_cast<GroupOperation>(ops[3]);
9415 if (operation == GroupOperationClusteredReduce)
9416 {
9417 require_extension_internal(ext: "GL_KHR_shader_subgroup_clustered");
9418 }
9419 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
9420 operation == GroupOperationReduce)
9421 {
9422 require_extension_internal(ext: "GL_KHR_shader_subgroup_arithmetic");
9423 }
9424 else
9425 SPIRV_CROSS_THROW("Invalid group operation.");
9426 break;
9427 }
9428
9429 case OpGroupNonUniformQuadSwap:
9430 case OpGroupNonUniformQuadBroadcast:
9431 require_extension_internal(ext: "GL_KHR_shader_subgroup_quad");
9432 break;
9433
9434 default:
9435 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
9436 }
9437
9438 uint32_t result_type = ops[0];
9439 uint32_t id = ops[1];
9440
9441 auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2]));
9442 if (scope != ScopeSubgroup)
9443 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
9444
9445 switch (op)
9446 {
9447 case OpGroupNonUniformElect:
9448 emit_op(result_type, result_id: id, rhs: "subgroupElect()", forwarding: true);
9449 break;
9450
9451 case OpGroupNonUniformBroadcast:
9452 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBroadcast");
9453 break;
9454
9455 case OpGroupNonUniformBroadcastFirst:
9456 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBroadcastFirst");
9457 break;
9458
9459 case OpGroupNonUniformBallot:
9460 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallot");
9461 break;
9462
9463 case OpGroupNonUniformInverseBallot:
9464 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupInverseBallot");
9465 break;
9466
9467 case OpGroupNonUniformBallotBitExtract:
9468 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBallotBitExtract");
9469 break;
9470
9471 case OpGroupNonUniformBallotFindLSB:
9472 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindLSB");
9473 break;
9474
9475 case OpGroupNonUniformBallotFindMSB:
9476 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindMSB");
9477 break;
9478
9479 case OpGroupNonUniformBallotBitCount:
9480 {
9481 auto operation = static_cast<GroupOperation>(ops[3]);
9482 if (operation == GroupOperationReduce)
9483 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotBitCount");
9484 else if (operation == GroupOperationInclusiveScan)
9485 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotInclusiveBitCount");
9486 else if (operation == GroupOperationExclusiveScan)
9487 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotExclusiveBitCount");
9488 else
9489 SPIRV_CROSS_THROW("Invalid BitCount operation.");
9490 break;
9491 }
9492
9493 case OpGroupNonUniformShuffle:
9494 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffle");
9495 break;
9496
9497 case OpGroupNonUniformShuffleXor:
9498 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleXor");
9499 break;
9500
9501 case OpGroupNonUniformShuffleUp:
9502 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleUp");
9503 break;
9504
9505 case OpGroupNonUniformShuffleDown:
9506 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleDown");
9507 break;
9508
9509 case OpGroupNonUniformAll:
9510 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAll");
9511 break;
9512
9513 case OpGroupNonUniformAny:
9514 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAny");
9515 break;
9516
9517 case OpGroupNonUniformAllEqual:
9518 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAllEqual");
9519 break;
9520
9521 // clang-format off
9522#define GLSL_GROUP_OP(op, glsl_op) \
9523case OpGroupNonUniform##op: \
9524 { \
9525 auto operation = static_cast<GroupOperation>(ops[3]); \
9526 if (operation == GroupOperationReduce) \
9527 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
9528 else if (operation == GroupOperationInclusiveScan) \
9529 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
9530 else if (operation == GroupOperationExclusiveScan) \
9531 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
9532 else if (operation == GroupOperationClusteredReduce) \
9533 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
9534 else \
9535 SPIRV_CROSS_THROW("Invalid group operation."); \
9536 break; \
9537 }
9538
9539#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
9540case OpGroupNonUniform##op: \
9541 { \
9542 auto operation = static_cast<GroupOperation>(ops[3]); \
9543 if (operation == GroupOperationReduce) \
9544 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
9545 else if (operation == GroupOperationInclusiveScan) \
9546 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
9547 else if (operation == GroupOperationExclusiveScan) \
9548 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
9549 else if (operation == GroupOperationClusteredReduce) \
9550 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
9551 else \
9552 SPIRV_CROSS_THROW("Invalid group operation."); \
9553 break; \
9554 }
9555
9556 GLSL_GROUP_OP(FAdd, Add)
9557 GLSL_GROUP_OP(FMul, Mul)
9558 GLSL_GROUP_OP(FMin, Min)
9559 GLSL_GROUP_OP(FMax, Max)
9560 GLSL_GROUP_OP(IAdd, Add)
9561 GLSL_GROUP_OP(IMul, Mul)
9562 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
9563 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
9564 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
9565 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
9566 GLSL_GROUP_OP(BitwiseAnd, And)
9567 GLSL_GROUP_OP(BitwiseOr, Or)
9568 GLSL_GROUP_OP(BitwiseXor, Xor)
9569 GLSL_GROUP_OP(LogicalAnd, And)
9570 GLSL_GROUP_OP(LogicalOr, Or)
9571 GLSL_GROUP_OP(LogicalXor, Xor)
9572#undef GLSL_GROUP_OP
9573#undef GLSL_GROUP_OP_CAST
9574 // clang-format on
9575
9576 case OpGroupNonUniformQuadSwap:
9577 {
9578 uint32_t direction = evaluate_constant_u32(id: ops[4]);
9579 if (direction == 0)
9580 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapHorizontal");
9581 else if (direction == 1)
9582 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapVertical");
9583 else if (direction == 2)
9584 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapDiagonal");
9585 else
9586 SPIRV_CROSS_THROW("Invalid quad swap direction.");
9587 break;
9588 }
9589
9590 case OpGroupNonUniformQuadBroadcast:
9591 {
9592 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupQuadBroadcast");
9593 break;
9594 }
9595
9596 default:
9597 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
9598 }
9599
9600 register_control_dependent_expression(expr: id);
9601}
9602
9603string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
9604{
9605 // OpBitcast can deal with pointers.
9606 if (out_type.pointer || in_type.pointer)
9607 {
9608 if (out_type.vecsize == 2 || in_type.vecsize == 2)
9609 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
9610 return type_to_glsl(type: out_type);
9611 }
9612
9613 if (out_type.basetype == in_type.basetype)
9614 return "";
9615
9616 assert(out_type.basetype != SPIRType::Boolean);
9617 assert(in_type.basetype != SPIRType::Boolean);
9618
9619 bool integral_cast = type_is_integral(type: out_type) && type_is_integral(type: in_type);
9620 bool same_size_cast = out_type.width == in_type.width;
9621
9622 // Trivial bitcast case, casts between integers.
9623 if (integral_cast && same_size_cast)
9624 return type_to_glsl(type: out_type);
9625
9626 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
9627 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
9628 return "unpack8";
9629 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
9630 return "pack16";
9631 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
9632 return "pack32";
9633
9634 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
9635 // 16-bit, 32-bit and 64-bit floats.
9636 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
9637 {
9638 if (is_legacy_es())
9639 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
9640 else if (!options.es && options.version < 330)
9641 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9642 return "floatBitsToUint";
9643 }
9644 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
9645 {
9646 if (is_legacy_es())
9647 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
9648 else if (!options.es && options.version < 330)
9649 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9650 return "floatBitsToInt";
9651 }
9652 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
9653 {
9654 if (is_legacy_es())
9655 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
9656 else if (!options.es && options.version < 330)
9657 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9658 return "uintBitsToFloat";
9659 }
9660 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
9661 {
9662 if (is_legacy_es())
9663 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
9664 else if (!options.es && options.version < 330)
9665 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9666 return "intBitsToFloat";
9667 }
9668
9669 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
9670 return "doubleBitsToInt64";
9671 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
9672 return "doubleBitsToUint64";
9673 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
9674 return "int64BitsToDouble";
9675 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
9676 return "uint64BitsToDouble";
9677 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
9678 return "float16BitsToInt16";
9679 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
9680 return "float16BitsToUint16";
9681 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
9682 return "int16BitsToFloat16";
9683 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
9684 return "uint16BitsToFloat16";
9685
9686 // And finally, some even more special purpose casts.
9687 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
9688 return "packUint2x32";
9689 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
9690 return "unpackUint2x32";
9691 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
9692 return "unpackFloat2x16";
9693 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
9694 return "packFloat2x16";
9695 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
9696 return "packInt2x16";
9697 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
9698 return "unpackInt2x16";
9699 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
9700 return "packUint2x16";
9701 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
9702 return "unpackUint2x16";
9703 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
9704 return "packInt4x16";
9705 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
9706 return "unpackInt4x16";
9707 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
9708 return "packUint4x16";
9709 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
9710 return "unpackUint4x16";
9711
9712 return "";
9713}
9714
9715string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
9716{
9717 auto op = bitcast_glsl_op(out_type: result_type, in_type: expression_type(id: argument));
9718 if (op.empty())
9719 return to_enclosed_unpacked_expression(id: argument);
9720 else
9721 return join(ts&: op, ts: "(", ts: to_unpacked_expression(id: argument), ts: ")");
9722}
9723
9724std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
9725{
9726 auto expr = to_expression(id: arg);
9727 auto &src_type = expression_type(id: arg);
9728 if (src_type.basetype != target_type)
9729 {
9730 auto target = src_type;
9731 target.basetype = target_type;
9732 expr = join(ts: bitcast_glsl_op(out_type: target, in_type: src_type), ts: "(", ts&: expr, ts: ")");
9733 }
9734
9735 return expr;
9736}
9737
9738std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
9739 const std::string &expr)
9740{
9741 if (target_type.basetype == expr_type)
9742 return expr;
9743
9744 auto src_type = target_type;
9745 src_type.basetype = expr_type;
9746 return join(ts: bitcast_glsl_op(out_type: target_type, in_type: src_type), ts: "(", ts: expr, ts: ")");
9747}
9748
9749string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
9750{
9751 switch (builtin)
9752 {
9753 case BuiltInPosition:
9754 return "gl_Position";
9755 case BuiltInPointSize:
9756 return "gl_PointSize";
9757 case BuiltInClipDistance:
9758 {
9759 if (options.es)
9760 require_extension_internal(ext: "GL_EXT_clip_cull_distance");
9761 return "gl_ClipDistance";
9762 }
9763 case BuiltInCullDistance:
9764 {
9765 if (options.es)
9766 require_extension_internal(ext: "GL_EXT_clip_cull_distance");
9767 return "gl_CullDistance";
9768 }
9769 case BuiltInVertexId:
9770 if (options.vulkan_semantics)
9771 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
9772 "with GL semantics.");
9773 return "gl_VertexID";
9774 case BuiltInInstanceId:
9775 if (options.vulkan_semantics)
9776 {
9777 auto model = get_entry_point().model;
9778 switch (model)
9779 {
9780 case spv::ExecutionModelIntersectionKHR:
9781 case spv::ExecutionModelAnyHitKHR:
9782 case spv::ExecutionModelClosestHitKHR:
9783 // gl_InstanceID is allowed in these shaders.
9784 break;
9785
9786 default:
9787 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
9788 "created with GL semantics.");
9789 }
9790 }
9791 if (!options.es && options.version < 140)
9792 {
9793 require_extension_internal(ext: "GL_ARB_draw_instanced");
9794 }
9795 return "gl_InstanceID";
9796 case BuiltInVertexIndex:
9797 if (options.vulkan_semantics)
9798 return "gl_VertexIndex";
9799 else
9800 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
9801 case BuiltInInstanceIndex:
9802 if (options.vulkan_semantics)
9803 return "gl_InstanceIndex";
9804
9805 if (!options.es && options.version < 140)
9806 {
9807 require_extension_internal(ext: "GL_ARB_draw_instanced");
9808 }
9809
9810 if (options.vertex.support_nonzero_base_instance)
9811 {
9812 if (!options.vulkan_semantics)
9813 {
9814 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
9815 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9816 }
9817 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
9818 }
9819 else
9820 return "gl_InstanceID";
9821 case BuiltInPrimitiveId:
9822 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
9823 return "gl_PrimitiveIDIn";
9824 else
9825 return "gl_PrimitiveID";
9826 case BuiltInInvocationId:
9827 return "gl_InvocationID";
9828 case BuiltInLayer:
9829 return "gl_Layer";
9830 case BuiltInViewportIndex:
9831 return "gl_ViewportIndex";
9832 case BuiltInTessLevelOuter:
9833 return "gl_TessLevelOuter";
9834 case BuiltInTessLevelInner:
9835 return "gl_TessLevelInner";
9836 case BuiltInTessCoord:
9837 return "gl_TessCoord";
9838 case BuiltInPatchVertices:
9839 return "gl_PatchVerticesIn";
9840 case BuiltInFragCoord:
9841 return "gl_FragCoord";
9842 case BuiltInPointCoord:
9843 return "gl_PointCoord";
9844 case BuiltInFrontFacing:
9845 return "gl_FrontFacing";
9846 case BuiltInFragDepth:
9847 return "gl_FragDepth";
9848 case BuiltInNumWorkgroups:
9849 return "gl_NumWorkGroups";
9850 case BuiltInWorkgroupSize:
9851 return "gl_WorkGroupSize";
9852 case BuiltInWorkgroupId:
9853 return "gl_WorkGroupID";
9854 case BuiltInLocalInvocationId:
9855 return "gl_LocalInvocationID";
9856 case BuiltInGlobalInvocationId:
9857 return "gl_GlobalInvocationID";
9858 case BuiltInLocalInvocationIndex:
9859 return "gl_LocalInvocationIndex";
9860 case BuiltInHelperInvocation:
9861 return "gl_HelperInvocation";
9862
9863 case BuiltInBaseVertex:
9864 if (options.es)
9865 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
9866
9867 if (options.vulkan_semantics)
9868 {
9869 if (options.version < 460)
9870 {
9871 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9872 return "gl_BaseVertexARB";
9873 }
9874 return "gl_BaseVertex";
9875 }
9876 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9877 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9878 return "SPIRV_Cross_BaseVertex";
9879
9880 case BuiltInBaseInstance:
9881 if (options.es)
9882 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
9883
9884 if (options.vulkan_semantics)
9885 {
9886 if (options.version < 460)
9887 {
9888 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9889 return "gl_BaseInstanceARB";
9890 }
9891 return "gl_BaseInstance";
9892 }
9893 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9894 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9895 return "SPIRV_Cross_BaseInstance";
9896
9897 case BuiltInDrawIndex:
9898 if (options.es)
9899 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
9900
9901 if (options.vulkan_semantics)
9902 {
9903 if (options.version < 460)
9904 {
9905 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9906 return "gl_DrawIDARB";
9907 }
9908 return "gl_DrawID";
9909 }
9910 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9911 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9912 return "gl_DrawIDARB";
9913
9914 case BuiltInSampleId:
9915 if (is_legacy())
9916 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9917 else if (options.es && options.version < 320)
9918 require_extension_internal(ext: "GL_OES_sample_variables");
9919 else if (!options.es && options.version < 400)
9920 require_extension_internal(ext: "GL_ARB_sample_shading");
9921 return "gl_SampleID";
9922
9923 case BuiltInSampleMask:
9924 if (is_legacy())
9925 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9926 else if (options.es && options.version < 320)
9927 require_extension_internal(ext: "GL_OES_sample_variables");
9928 else if (!options.es && options.version < 400)
9929 require_extension_internal(ext: "GL_ARB_sample_shading");
9930
9931 if (storage == StorageClassInput)
9932 return "gl_SampleMaskIn";
9933 else
9934 return "gl_SampleMask";
9935
9936 case BuiltInSamplePosition:
9937 if (is_legacy())
9938 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9939 else if (options.es && options.version < 320)
9940 require_extension_internal(ext: "GL_OES_sample_variables");
9941 else if (!options.es && options.version < 400)
9942 require_extension_internal(ext: "GL_ARB_sample_shading");
9943 return "gl_SamplePosition";
9944
9945 case BuiltInViewIndex:
9946 if (options.vulkan_semantics)
9947 return "gl_ViewIndex";
9948 else
9949 return "gl_ViewID_OVR";
9950
9951 case BuiltInNumSubgroups:
9952 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::NumSubgroups);
9953 return "gl_NumSubgroups";
9954
9955 case BuiltInSubgroupId:
9956 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupID);
9957 return "gl_SubgroupID";
9958
9959 case BuiltInSubgroupSize:
9960 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupSize);
9961 return "gl_SubgroupSize";
9962
9963 case BuiltInSubgroupLocalInvocationId:
9964 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInvocationID);
9965 return "gl_SubgroupInvocationID";
9966
9967 case BuiltInSubgroupEqMask:
9968 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9969 return "gl_SubgroupEqMask";
9970
9971 case BuiltInSubgroupGeMask:
9972 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9973 return "gl_SubgroupGeMask";
9974
9975 case BuiltInSubgroupGtMask:
9976 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9977 return "gl_SubgroupGtMask";
9978
9979 case BuiltInSubgroupLeMask:
9980 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9981 return "gl_SubgroupLeMask";
9982
9983 case BuiltInSubgroupLtMask:
9984 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9985 return "gl_SubgroupLtMask";
9986
9987 case BuiltInLaunchIdKHR:
9988 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
9989 case BuiltInLaunchSizeKHR:
9990 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
9991 case BuiltInWorldRayOriginKHR:
9992 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
9993 case BuiltInWorldRayDirectionKHR:
9994 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
9995 case BuiltInObjectRayOriginKHR:
9996 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
9997 case BuiltInObjectRayDirectionKHR:
9998 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
9999 case BuiltInRayTminKHR:
10000 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
10001 case BuiltInRayTmaxKHR:
10002 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
10003 case BuiltInInstanceCustomIndexKHR:
10004 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
10005 case BuiltInObjectToWorldKHR:
10006 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
10007 case BuiltInWorldToObjectKHR:
10008 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
10009 case BuiltInHitTNV:
10010 // gl_HitTEXT is an alias of RayTMax in KHR.
10011 return "gl_HitTNV";
10012 case BuiltInHitKindKHR:
10013 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
10014 case BuiltInIncomingRayFlagsKHR:
10015 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
10016
10017 case BuiltInBaryCoordKHR:
10018 {
10019 if (options.es && options.version < 320)
10020 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
10021 else if (!options.es && options.version < 450)
10022 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
10023
10024 if (barycentric_is_nv)
10025 {
10026 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
10027 return "gl_BaryCoordNV";
10028 }
10029 else
10030 {
10031 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
10032 return "gl_BaryCoordEXT";
10033 }
10034 }
10035
10036 case BuiltInBaryCoordNoPerspNV:
10037 {
10038 if (options.es && options.version < 320)
10039 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
10040 else if (!options.es && options.version < 450)
10041 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
10042
10043 if (barycentric_is_nv)
10044 {
10045 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
10046 return "gl_BaryCoordNoPerspNV";
10047 }
10048 else
10049 {
10050 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
10051 return "gl_BaryCoordNoPerspEXT";
10052 }
10053 }
10054
10055 case BuiltInFragStencilRefEXT:
10056 {
10057 if (!options.es)
10058 {
10059 require_extension_internal(ext: "GL_ARB_shader_stencil_export");
10060 return "gl_FragStencilRefARB";
10061 }
10062 else
10063 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
10064 }
10065
10066 case BuiltInPrimitiveShadingRateKHR:
10067 {
10068 if (!options.vulkan_semantics)
10069 SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
10070 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
10071 return "gl_PrimitiveShadingRateEXT";
10072 }
10073
10074 case BuiltInShadingRateKHR:
10075 {
10076 if (!options.vulkan_semantics)
10077 SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
10078 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
10079 return "gl_ShadingRateEXT";
10080 }
10081
10082 case BuiltInDeviceIndex:
10083 if (!options.vulkan_semantics)
10084 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
10085 require_extension_internal(ext: "GL_EXT_device_group");
10086 return "gl_DeviceIndex";
10087
10088 case BuiltInFullyCoveredEXT:
10089 if (!options.es)
10090 require_extension_internal(ext: "GL_NV_conservative_raster_underestimation");
10091 else
10092 SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
10093 return "gl_FragFullyCoveredNV";
10094
10095 case BuiltInPrimitiveTriangleIndicesEXT:
10096 return "gl_PrimitiveTriangleIndicesEXT";
10097 case BuiltInPrimitiveLineIndicesEXT:
10098 return "gl_PrimitiveLineIndicesEXT";
10099 case BuiltInPrimitivePointIndicesEXT:
10100 return "gl_PrimitivePointIndicesEXT";
10101 case BuiltInCullPrimitiveEXT:
10102 return "gl_CullPrimitiveEXT";
10103
10104 default:
10105 return join(ts: "gl_BuiltIn_", ts: convert_to_string(t: builtin));
10106 }
10107}
10108
10109const char *CompilerGLSL::index_to_swizzle(uint32_t index)
10110{
10111 switch (index)
10112 {
10113 case 0:
10114 return "x";
10115 case 1:
10116 return "y";
10117 case 2:
10118 return "z";
10119 case 3:
10120 return "w";
10121 default:
10122 return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
10123 }
10124}
10125
10126void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
10127 AccessChainFlags flags, bool &access_chain_is_arrayed,
10128 uint32_t index)
10129{
10130 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
10131 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
10132 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
10133
10134 string idx_expr = index_is_literal ? convert_to_string(t: index) : to_unpacked_expression(id: index, register_expression_read);
10135
10136 // For the case where the base of an OpPtrAccessChain already ends in [n],
10137 // we need to use the index as an offset to the existing index, otherwise,
10138 // we can just use the index directly.
10139 if (ptr_chain && access_chain_is_arrayed)
10140 {
10141 size_t split_pos = expr.find_last_of(c: ']');
10142 size_t enclose_split = expr.find_last_of(c: ')');
10143
10144 // If we have already enclosed the expression, don't try to be clever, it will break.
10145 if (split_pos > enclose_split || enclose_split == string::npos)
10146 {
10147 string expr_front = expr.substr(pos: 0, n: split_pos);
10148 string expr_back = expr.substr(pos: split_pos);
10149 expr = expr_front + " + " + enclose_expression(expr: idx_expr) + expr_back;
10150 return;
10151 }
10152 }
10153
10154 expr += "[";
10155 expr += idx_expr;
10156 expr += "]";
10157}
10158
10159bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
10160{
10161 return true;
10162}
10163
10164string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
10165 AccessChainFlags flags, AccessChainMeta *meta)
10166{
10167 string expr;
10168
10169 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
10170 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
10171 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
10172 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
10173 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
10174 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
10175
10176 if (!chain_only)
10177 {
10178 // We handle transpose explicitly, so don't resolve that here.
10179 auto *e = maybe_get<SPIRExpression>(id: base);
10180 bool old_transpose = e && e->need_transpose;
10181 if (e)
10182 e->need_transpose = false;
10183 expr = to_enclosed_expression(id: base, register_expression_read);
10184 if (e)
10185 e->need_transpose = old_transpose;
10186 }
10187
10188 // Start traversing type hierarchy at the proper non-pointer types,
10189 // but keep type_id referencing the original pointer for use below.
10190 uint32_t type_id = expression_type_id(id: base);
10191 const auto *type = &get_pointee_type(type_id);
10192
10193 if (!backend.native_pointers)
10194 {
10195 if (ptr_chain)
10196 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
10197
10198 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
10199 // continuing the access chain.
10200 if (should_dereference(id: base))
10201 expr = dereference_expression(expr_type: get<SPIRType>(id: type_id), expr);
10202 }
10203 else if (should_dereference(id: base) && type->basetype != SPIRType::Struct && !ptr_chain)
10204 expr = join(ts: "(", ts: dereference_expression(expr_type: *type, expr), ts: ")");
10205
10206 bool access_chain_is_arrayed = expr.find_first_of(c: '[') != string::npos;
10207 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(id: base);
10208 bool is_packed = has_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypePacked);
10209 uint32_t physical_type = get_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypeID);
10210 bool is_invariant = has_decoration(id: base, decoration: DecorationInvariant);
10211 bool relaxed_precision = has_decoration(id: base, decoration: DecorationRelaxedPrecision);
10212 bool pending_array_enclose = false;
10213 bool dimension_flatten = false;
10214 bool access_meshlet_position_y = false;
10215 bool chain_is_builtin = false;
10216 spv::BuiltIn chained_builtin = {};
10217
10218 if (auto *base_expr = maybe_get<SPIRExpression>(id: base))
10219 {
10220 access_meshlet_position_y = base_expr->access_meshlet_position_y;
10221 }
10222
10223 // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden
10224 bool hide_first_subscript = count > 1 && is_user_type_structured(id: base);
10225
10226 const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
10227 AccessChainFlags mod_flags = flags;
10228 if (!is_literal)
10229 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
10230 if (!is_ptr_chain)
10231 mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
10232 access_chain_internal_append_index(expr, base, type, flags: mod_flags, access_chain_is_arrayed, index);
10233 check_physical_type_cast(expr, type, physical_type);
10234 };
10235
10236 for (uint32_t i = 0; i < count; i++)
10237 {
10238 uint32_t index = indices[i];
10239
10240 bool is_literal = index_is_literal;
10241 if (is_literal && msb_is_id && (index >> 31u) != 0u)
10242 {
10243 is_literal = false;
10244 index &= 0x7fffffffu;
10245 }
10246
10247 bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(type: *type);
10248
10249 if (ptr_chain_array_entry)
10250 {
10251 // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed.
10252 // We are considered to have a pointer to array and one element shifts by one array at a time.
10253 // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness,
10254 // so we have to take pointer to array explicitly.
10255 if (!should_dereference(id: base))
10256 expr = enclose_expression(expr: address_of_expression(expr));
10257 }
10258
10259 if (ptr_chain && i == 0)
10260 {
10261 // Pointer chains
10262 // If we are flattening multidimensional arrays, only create opening bracket on first
10263 // array index.
10264 if (options.flatten_multidimensional_arrays)
10265 {
10266 dimension_flatten = type->array.size() >= 1;
10267 pending_array_enclose = dimension_flatten;
10268 if (pending_array_enclose)
10269 expr += "[";
10270 }
10271
10272 if (options.flatten_multidimensional_arrays && dimension_flatten)
10273 {
10274 // If we are flattening multidimensional arrays, do manual stride computation.
10275 if (is_literal)
10276 expr += convert_to_string(t: index);
10277 else
10278 expr += to_enclosed_expression(id: index, register_expression_read);
10279
10280 for (auto j = uint32_t(type->array.size()); j; j--)
10281 {
10282 expr += " * ";
10283 expr += enclose_expression(expr: to_array_size(type: *type, index: j - 1));
10284 }
10285
10286 if (type->array.empty())
10287 pending_array_enclose = false;
10288 else
10289 expr += " + ";
10290
10291 if (!pending_array_enclose)
10292 expr += "]";
10293 }
10294 else
10295 {
10296 if (flags & ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT)
10297 {
10298 SPIRType tmp_type(OpTypeInt);
10299 tmp_type.basetype = SPIRType::UInt64;
10300 tmp_type.width = 64;
10301 tmp_type.vecsize = 1;
10302 tmp_type.columns = 1;
10303
10304 TypeID ptr_type_id = expression_type_id(id: base);
10305 const SPIRType &ptr_type = get<SPIRType>(id: ptr_type_id);
10306 const SPIRType &pointee_type = get_pointee_type(type: ptr_type);
10307
10308 // This only runs in native pointer backends.
10309 // Can replace reinterpret_cast with a backend string if ever needed.
10310 // We expect this to count as a de-reference.
10311 // This leaks some MSL details, but feels slightly overkill to
10312 // add yet another virtual interface just for this.
10313 auto intptr_expr = join(ts: "reinterpret_cast<", ts: type_to_glsl(type: tmp_type), ts: ">(", ts&: expr, ts: ")");
10314 intptr_expr += join(ts: " + ", ts: to_enclosed_unpacked_expression(id: index), ts: " * ",
10315 ts: get_decoration(id: ptr_type_id, decoration: DecorationArrayStride));
10316
10317 if (flags & ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT)
10318 {
10319 is_packed = true;
10320 expr = join(ts: "*reinterpret_cast<device packed_", ts: type_to_glsl(type: pointee_type),
10321 ts: " *>(", ts&: intptr_expr, ts: ")");
10322 }
10323 else
10324 {
10325 expr = join(ts: "*reinterpret_cast<", ts: type_to_glsl(type: ptr_type), ts: ">(", ts&: intptr_expr, ts: ")");
10326 }
10327 }
10328 else
10329 append_index(index, is_literal, true);
10330 }
10331
10332 if (type->basetype == SPIRType::ControlPointArray)
10333 {
10334 type_id = type->parent_type;
10335 type = &get<SPIRType>(id: type_id);
10336 }
10337
10338 access_chain_is_arrayed = true;
10339
10340 // Explicitly enclose the expression if this is one of the weird pointer-to-array cases.
10341 // We don't want any future indexing to add to this array dereference.
10342 // Enclosing the expression blocks that and avoids any shenanigans with operand priority.
10343 if (ptr_chain_array_entry)
10344 expr = join(ts: "(", ts&: expr, ts: ")");
10345 }
10346 // Arrays
10347 else if (!type->array.empty())
10348 {
10349 // If we are flattening multidimensional arrays, only create opening bracket on first
10350 // array index.
10351 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
10352 {
10353 dimension_flatten = type->array.size() > 1;
10354 pending_array_enclose = dimension_flatten;
10355 if (pending_array_enclose)
10356 expr += "[";
10357 }
10358
10359 assert(type->parent_type);
10360
10361 auto *var = maybe_get<SPIRVariable>(id: base);
10362 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(var: *var) &&
10363 !has_decoration(id: type->self, decoration: DecorationBlock))
10364 {
10365 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
10366 // Normally, these variables live in blocks when compiled from GLSL,
10367 // but HLSL seems to just emit straight arrays here.
10368 // We must pretend this access goes through gl_in/gl_out arrays
10369 // to be able to access certain builtins as arrays.
10370 // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
10371 auto builtin = ir.meta[base].decoration.builtin_type;
10372 bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
10373
10374 chain_is_builtin = true;
10375 chained_builtin = builtin;
10376
10377 switch (builtin)
10378 {
10379 case BuiltInCullDistance:
10380 case BuiltInClipDistance:
10381 if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here.
10382 {
10383 append_index(index, is_literal);
10384 break;
10385 }
10386 // fallthrough
10387 case BuiltInPosition:
10388 case BuiltInPointSize:
10389 if (mesh_shader)
10390 expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10391 else if (var->storage == StorageClassInput)
10392 expr = join(ts: "gl_in[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10393 else if (var->storage == StorageClassOutput)
10394 expr = join(ts: "gl_out[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10395 else
10396 append_index(index, is_literal);
10397 break;
10398
10399 case BuiltInPrimitiveId:
10400 case BuiltInLayer:
10401 case BuiltInViewportIndex:
10402 case BuiltInCullPrimitiveEXT:
10403 case BuiltInPrimitiveShadingRateKHR:
10404 if (mesh_shader)
10405 expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10406 else
10407 append_index(index, is_literal);
10408 break;
10409
10410 default:
10411 append_index(index, is_literal);
10412 break;
10413 }
10414 }
10415 else if (backend.force_merged_mesh_block && i == 0 && var &&
10416 !is_builtin_variable(var: *var) && var->storage == StorageClassOutput)
10417 {
10418 if (is_per_primitive_variable(var: *var))
10419 expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10420 else
10421 expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10422 }
10423 else if (options.flatten_multidimensional_arrays && dimension_flatten)
10424 {
10425 // If we are flattening multidimensional arrays, do manual stride computation.
10426 auto &parent_type = get<SPIRType>(id: type->parent_type);
10427
10428 if (is_literal)
10429 expr += convert_to_string(t: index);
10430 else
10431 expr += to_enclosed_expression(id: index, register_expression_read);
10432
10433 for (auto j = uint32_t(parent_type.array.size()); j; j--)
10434 {
10435 expr += " * ";
10436 expr += enclose_expression(expr: to_array_size(type: parent_type, index: j - 1));
10437 }
10438
10439 if (parent_type.array.empty())
10440 pending_array_enclose = false;
10441 else
10442 expr += " + ";
10443
10444 if (!pending_array_enclose)
10445 expr += "]";
10446 }
10447 else if (index_is_literal || !builtin_translates_to_nonarray(builtin: BuiltIn(get_decoration(id: base, decoration: DecorationBuiltIn))))
10448 {
10449 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
10450 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
10451 // For literal indices we are working on composites, so we ignore this since we have already converted to proper array.
10452 append_index(index, is_literal);
10453 }
10454
10455 if (var && has_decoration(id: var->self, decoration: DecorationBuiltIn) &&
10456 get_decoration(id: var->self, decoration: DecorationBuiltIn) == BuiltInPosition &&
10457 get_execution_model() == ExecutionModelMeshEXT)
10458 {
10459 access_meshlet_position_y = true;
10460 }
10461
10462 type_id = type->parent_type;
10463 type = &get<SPIRType>(id: type_id);
10464
10465 // If the physical type has an unnatural vecsize,
10466 // we must assume it's a faked struct where the .data member
10467 // is used for the real payload.
10468 if (physical_type && (is_vector(type: *type) || is_scalar(type: *type)))
10469 {
10470 auto &phys = get<SPIRType>(id: physical_type);
10471 if (phys.vecsize > 4)
10472 expr += ".data";
10473 }
10474
10475 access_chain_is_arrayed = true;
10476 }
10477 // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
10478 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
10479 else if (type->basetype == SPIRType::Struct)
10480 {
10481 if (!is_literal)
10482 index = evaluate_constant_u32(id: index);
10483
10484 if (index < uint32_t(type->member_type_index_redirection.size()))
10485 index = type->member_type_index_redirection[index];
10486
10487 if (index >= type->member_types.size())
10488 SPIRV_CROSS_THROW("Member index is out of bounds!");
10489
10490 if (hide_first_subscript)
10491 {
10492 // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers
10493 hide_first_subscript = false;
10494 }
10495 else
10496 {
10497 BuiltIn builtin = BuiltInMax;
10498 if (is_member_builtin(type: *type, index, builtin: &builtin) && access_chain_needs_stage_io_builtin_translation(base))
10499 {
10500 if (access_chain_is_arrayed)
10501 {
10502 expr += ".";
10503 expr += builtin_to_glsl(builtin, storage: type->storage);
10504 }
10505 else
10506 expr = builtin_to_glsl(builtin, storage: type->storage);
10507
10508 if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT)
10509 {
10510 access_meshlet_position_y = true;
10511 }
10512
10513 chain_is_builtin = true;
10514 chained_builtin = builtin;
10515 }
10516 else
10517 {
10518 // If the member has a qualified name, use it as the entire chain
10519 string qual_mbr_name = get_member_qualified_name(type_id, index);
10520 if (!qual_mbr_name.empty())
10521 expr = qual_mbr_name;
10522 else if (flatten_member_reference)
10523 expr += join(ts: "_", ts: to_member_name(type: *type, index));
10524 else
10525 {
10526 // Any pointer de-refences for values are handled in the first access chain.
10527 // For pointer chains, the pointer-ness is resolved through an array access.
10528 // The only time this is not true is when accessing array of SSBO/UBO.
10529 // This case is explicitly handled.
10530 expr += to_member_reference(base, type: *type, index, ptr_chain_is_resolved: ptr_chain || i != 0);
10531 }
10532 }
10533 }
10534
10535 if (has_member_decoration(id: type->self, index, decoration: DecorationInvariant))
10536 is_invariant = true;
10537 if (has_member_decoration(id: type->self, index, decoration: DecorationRelaxedPrecision))
10538 relaxed_precision = true;
10539
10540 is_packed = member_is_packed_physical_type(type: *type, index);
10541 if (member_is_remapped_physical_type(type: *type, index))
10542 physical_type = get_extended_member_decoration(type: type->self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
10543 else
10544 physical_type = 0;
10545
10546 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(type: *type, index);
10547 type = &get<SPIRType>(id: type->member_types[index]);
10548 }
10549 // Matrix -> Vector
10550 else if (type->columns > 1)
10551 {
10552 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
10553 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
10554 // by flipping indexing order of the matrix.
10555
10556 expr += "[";
10557 if (is_literal)
10558 expr += convert_to_string(t: index);
10559 else
10560 expr += to_unpacked_expression(id: index, register_expression_read);
10561 expr += "]";
10562
10563 // If the physical type has an unnatural vecsize,
10564 // we must assume it's a faked struct where the .data member
10565 // is used for the real payload.
10566 if (physical_type)
10567 {
10568 auto &phys = get<SPIRType>(id: physical_type);
10569 if (phys.vecsize > 4 || phys.columns > 4)
10570 expr += ".data";
10571 }
10572
10573 type_id = type->parent_type;
10574 type = &get<SPIRType>(id: type_id);
10575 }
10576 // Vector -> Scalar
10577 else if (type->vecsize > 1)
10578 {
10579 string deferred_index;
10580 if (row_major_matrix_needs_conversion)
10581 {
10582 // Flip indexing order.
10583 auto column_index = expr.find_last_of(c: '[');
10584 if (column_index != string::npos)
10585 {
10586 deferred_index = expr.substr(pos: column_index);
10587
10588 auto end_deferred_index = deferred_index.find_last_of(c: ']');
10589 if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size())
10590 {
10591 // If we have any data member fixups, it must be transposed so that it refers to this index.
10592 // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
10593 // and needs to be [1].data[0] instead.
10594 end_deferred_index++;
10595 deferred_index = deferred_index.substr(pos: end_deferred_index) +
10596 deferred_index.substr(pos: 0, n: end_deferred_index);
10597 }
10598
10599 expr.resize(n: column_index);
10600 }
10601 }
10602
10603 // Internally, access chain implementation can also be used on composites,
10604 // ignore scalar access workarounds in this case.
10605 StorageClass effective_storage = StorageClassGeneric;
10606 bool ignore_potential_sliced_writes = false;
10607 if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
10608 {
10609 if (expression_type(id: base).pointer)
10610 effective_storage = get_expression_effective_storage_class(ptr: base);
10611
10612 // Special consideration for control points.
10613 // Control points can only be written by InvocationID, so there is no need
10614 // to consider scalar access chains here.
10615 // Cleans up some cases where it's very painful to determine the accurate storage class
10616 // since blocks can be partially masked ...
10617 auto *var = maybe_get_backing_variable(chain: base);
10618 if (var && var->storage == StorageClassOutput &&
10619 get_execution_model() == ExecutionModelTessellationControl &&
10620 !has_decoration(id: var->self, decoration: DecorationPatch))
10621 {
10622 ignore_potential_sliced_writes = true;
10623 }
10624 }
10625 else
10626 ignore_potential_sliced_writes = true;
10627
10628 if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
10629 {
10630 // On some backends, we might not be able to safely access individual scalars in a vector.
10631 // To work around this, we might have to cast the access chain reference to something which can,
10632 // like a pointer to scalar, which we can then index into.
10633 prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
10634 is_packed);
10635 }
10636
10637 if (is_literal)
10638 {
10639 bool out_of_bounds = (index >= type->vecsize);
10640
10641 if (!is_packed && !row_major_matrix_needs_conversion)
10642 {
10643 expr += ".";
10644 expr += index_to_swizzle(index: out_of_bounds ? 0 : index);
10645 }
10646 else
10647 {
10648 // For packed vectors, we can only access them as an array, not by swizzle.
10649 expr += join(ts: "[", ts: out_of_bounds ? 0 : index, ts: "]");
10650 }
10651 }
10652 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
10653 {
10654 auto &c = get<SPIRConstant>(id: index);
10655 bool out_of_bounds = (c.scalar() >= type->vecsize);
10656
10657 if (c.specialization)
10658 {
10659 // If the index is a spec constant, we cannot turn extract into a swizzle.
10660 expr += join(ts: "[", ts: out_of_bounds ? "0" : to_expression(id: index), ts: "]");
10661 }
10662 else
10663 {
10664 expr += ".";
10665 expr += index_to_swizzle(index: out_of_bounds ? 0 : c.scalar());
10666 }
10667 }
10668 else
10669 {
10670 expr += "[";
10671 expr += to_unpacked_expression(id: index, register_expression_read);
10672 expr += "]";
10673 }
10674
10675 if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
10676 {
10677 if (prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
10678 is_packed))
10679 {
10680 // We're in a pointer context now, so just remove any member dereference.
10681 auto first_index = deferred_index.find_first_of(c: '[');
10682 if (first_index != string::npos && first_index != 0)
10683 deferred_index = deferred_index.substr(pos: first_index);
10684 }
10685 }
10686
10687 if (access_meshlet_position_y)
10688 {
10689 if (is_literal)
10690 {
10691 access_meshlet_position_y = index == 1;
10692 }
10693 else
10694 {
10695 const auto *c = maybe_get<SPIRConstant>(id: index);
10696 if (c)
10697 access_meshlet_position_y = c->scalar() == 1;
10698 else
10699 {
10700 // We don't know, but we have to assume no.
10701 // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave.
10702 access_meshlet_position_y = false;
10703 }
10704 }
10705 }
10706
10707 expr += deferred_index;
10708 row_major_matrix_needs_conversion = false;
10709
10710 is_packed = false;
10711 physical_type = 0;
10712 type_id = type->parent_type;
10713 type = &get<SPIRType>(id: type_id);
10714 }
10715 else if (!backend.allow_truncated_access_chain)
10716 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
10717 }
10718
10719 if (pending_array_enclose)
10720 {
10721 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
10722 "but the access chain was terminated in the middle of a multidimensional array. "
10723 "This is not supported.");
10724 }
10725
10726 if (meta)
10727 {
10728 meta->need_transpose = row_major_matrix_needs_conversion;
10729 meta->storage_is_packed = is_packed;
10730 meta->storage_is_invariant = is_invariant;
10731 meta->storage_physical_type = physical_type;
10732 meta->relaxed_precision = relaxed_precision;
10733 meta->access_meshlet_position_y = access_meshlet_position_y;
10734 meta->chain_is_builtin = chain_is_builtin;
10735 meta->builtin = chained_builtin;
10736 }
10737
10738 return expr;
10739}
10740
10741void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
10742{
10743}
10744
10745bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
10746{
10747 return false;
10748}
10749
10750string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
10751{
10752 auto ret = join(ts: basename, ts: "_", ts: to_member_name(type, index));
10753 ParsedIR::sanitize_underscores(str&: ret);
10754 return ret;
10755}
10756
10757uint32_t CompilerGLSL::get_physical_type_stride(const SPIRType &) const
10758{
10759 SPIRV_CROSS_THROW("Invalid to call get_physical_type_stride on a backend without native pointer support.");
10760}
10761
10762string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
10763 AccessChainMeta *meta, bool ptr_chain)
10764{
10765 if (flattened_buffer_blocks.count(x: base))
10766 {
10767 uint32_t matrix_stride = 0;
10768 uint32_t array_stride = 0;
10769 bool need_transpose = false;
10770 flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset: 0, word_stride: 16, need_transpose: &need_transpose, matrix_stride: &matrix_stride,
10771 array_stride: &array_stride, ptr_chain);
10772
10773 if (meta)
10774 {
10775 meta->need_transpose = target_type.columns > 1 && need_transpose;
10776 meta->storage_is_packed = false;
10777 }
10778
10779 return flattened_access_chain(base, indices, count, target_type, offset: 0, matrix_stride, array_stride,
10780 need_transpose);
10781 }
10782 else if (flattened_structs.count(x: base) && count > 0)
10783 {
10784 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
10785 if (ptr_chain)
10786 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
10787
10788 if (flattened_structs[base])
10789 {
10790 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
10791 if (meta)
10792 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
10793 }
10794
10795 auto chain = access_chain_internal(base, indices, count, flags, meta: nullptr).substr(pos: 1);
10796 if (meta)
10797 {
10798 meta->need_transpose = false;
10799 meta->storage_is_packed = false;
10800 }
10801
10802 auto basename = to_flattened_access_chain_expression(id: base);
10803 auto ret = join(ts&: basename, ts: "_", ts&: chain);
10804 ParsedIR::sanitize_underscores(str&: ret);
10805 return ret;
10806 }
10807 else
10808 {
10809 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
10810 if (ptr_chain)
10811 {
10812 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
10813 // PtrAccessChain could get complicated.
10814 TypeID type_id = expression_type_id(id: base);
10815 if (backend.native_pointers && has_decoration(id: type_id, decoration: DecorationArrayStride))
10816 {
10817 // If there is a mismatch we have to go via 64-bit pointer arithmetic :'(
10818 // Using packed hacks only gets us so far, and is not designed to deal with pointer to
10819 // random values. It works for structs though.
10820 auto &pointee_type = get_pointee_type(type: get<SPIRType>(id: type_id));
10821 uint32_t physical_stride = get_physical_type_stride(pointee_type);
10822 uint32_t requested_stride = get_decoration(id: type_id, decoration: DecorationArrayStride);
10823 if (physical_stride != requested_stride)
10824 {
10825 flags |= ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT;
10826 if (is_vector(type: pointee_type))
10827 flags |= ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT;
10828 }
10829 }
10830 }
10831
10832 return access_chain_internal(base, indices, count, flags, meta);
10833 }
10834}
10835
10836string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
10837{
10838 auto expr = type_to_glsl_constructor(type);
10839 expr += '(';
10840
10841 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
10842 {
10843 if (i)
10844 expr += ", ";
10845
10846 auto &member_type = get<SPIRType>(id: type.member_types[i]);
10847 if (member_type.basetype == SPIRType::Struct)
10848 expr += load_flattened_struct(basename: to_flattened_struct_member(basename, type, index: i), type: member_type);
10849 else
10850 expr += to_flattened_struct_member(basename, type, index: i);
10851 }
10852 expr += ')';
10853 return expr;
10854}
10855
10856std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
10857{
10858 // Do not use to_expression as that will unflatten access chains.
10859 string basename;
10860 if (const auto *var = maybe_get<SPIRVariable>(id))
10861 basename = to_name(id: var->self);
10862 else if (const auto *expr = maybe_get<SPIRExpression>(id))
10863 basename = expr->expression;
10864 else
10865 basename = to_expression(id);
10866
10867 return basename;
10868}
10869
10870void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
10871 const SmallVector<uint32_t> &indices)
10872{
10873 SmallVector<uint32_t> sub_indices = indices;
10874 sub_indices.push_back(t: 0);
10875
10876 auto *member_type = &type;
10877 for (auto &index : indices)
10878 member_type = &get<SPIRType>(id: member_type->member_types[index]);
10879
10880 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
10881 {
10882 sub_indices.back() = i;
10883 auto lhs = join(ts: basename, ts: "_", ts: to_member_name(type: *member_type, index: i));
10884 ParsedIR::sanitize_underscores(str&: lhs);
10885
10886 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
10887 {
10888 store_flattened_struct(basename: lhs, rhs_id, type, indices: sub_indices);
10889 }
10890 else
10891 {
10892 auto rhs = to_expression(id: rhs_id) + to_multi_member_reference(type, indices: sub_indices);
10893 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
10894 }
10895 }
10896}
10897
10898void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
10899{
10900 auto &type = expression_type(id: lhs_id);
10901 auto basename = to_flattened_access_chain_expression(id: lhs_id);
10902 store_flattened_struct(basename, rhs_id: value, type, indices: {});
10903}
10904
10905std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
10906 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
10907 uint32_t /* array_stride */, bool need_transpose)
10908{
10909 if (!target_type.array.empty())
10910 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
10911 else if (target_type.basetype == SPIRType::Struct)
10912 return flattened_access_chain_struct(base, indices, count, target_type, offset);
10913 else if (target_type.columns > 1)
10914 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
10915 else
10916 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
10917}
10918
10919std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
10920 const SPIRType &target_type, uint32_t offset)
10921{
10922 std::string expr;
10923
10924 if (backend.can_declare_struct_inline)
10925 {
10926 expr += type_to_glsl_constructor(type: target_type);
10927 expr += "(";
10928 }
10929 else
10930 expr += "{";
10931
10932 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
10933 {
10934 if (i != 0)
10935 expr += ", ";
10936
10937 const SPIRType &member_type = get<SPIRType>(id: target_type.member_types[i]);
10938 uint32_t member_offset = type_struct_member_offset(type: target_type, index: i);
10939
10940 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
10941 // ahead of time.
10942 bool need_transpose = false;
10943 bool relaxed = false;
10944 uint32_t matrix_stride = 0;
10945 if (member_type.columns > 1)
10946 {
10947 auto decorations = combined_decoration_for_member(type: target_type, index: i);
10948 need_transpose = decorations.get(bit: DecorationRowMajor);
10949 relaxed = decorations.get(bit: DecorationRelaxedPrecision);
10950 matrix_stride = type_struct_member_matrix_stride(type: target_type, index: i);
10951 }
10952
10953 auto tmp = flattened_access_chain(base, indices, count, target_type: member_type, offset: offset + member_offset, matrix_stride,
10954 0 /* array_stride */, need_transpose);
10955
10956 // Cannot forward transpositions, so resolve them here.
10957 if (need_transpose)
10958 expr += convert_row_major_matrix(exp_str: tmp, exp_type: member_type, physical_type_id: 0, is_packed: false, relaxed);
10959 else
10960 expr += tmp;
10961 }
10962
10963 expr += backend.can_declare_struct_inline ? ")" : "}";
10964
10965 return expr;
10966}
10967
10968std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
10969 const SPIRType &target_type, uint32_t offset,
10970 uint32_t matrix_stride, bool need_transpose)
10971{
10972 assert(matrix_stride);
10973 SPIRType tmp_type = target_type;
10974 if (need_transpose)
10975 swap(a&: tmp_type.vecsize, b&: tmp_type.columns);
10976
10977 std::string expr;
10978
10979 expr += type_to_glsl_constructor(type: tmp_type);
10980 expr += "(";
10981
10982 for (uint32_t i = 0; i < tmp_type.columns; i++)
10983 {
10984 if (i != 0)
10985 expr += ", ";
10986
10987 expr += flattened_access_chain_vector(base, indices, count, target_type: tmp_type, offset: offset + i * matrix_stride, matrix_stride,
10988 /* need_transpose= */ false);
10989 }
10990
10991 expr += ")";
10992
10993 return expr;
10994}
10995
10996std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
10997 const SPIRType &target_type, uint32_t offset,
10998 uint32_t matrix_stride, bool need_transpose)
10999{
11000 auto result = flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset, word_stride: 16);
11001
11002 auto buffer_name = to_name(id: expression_type(id: base).self);
11003
11004 if (need_transpose)
11005 {
11006 std::string expr;
11007
11008 if (target_type.vecsize > 1)
11009 {
11010 expr += type_to_glsl_constructor(type: target_type);
11011 expr += "(";
11012 }
11013
11014 for (uint32_t i = 0; i < target_type.vecsize; ++i)
11015 {
11016 if (i != 0)
11017 expr += ", ";
11018
11019 uint32_t component_offset = result.second + i * matrix_stride;
11020
11021 assert(component_offset % (target_type.width / 8) == 0);
11022 uint32_t index = component_offset / (target_type.width / 8);
11023
11024 expr += buffer_name;
11025 expr += "[";
11026 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
11027 expr += convert_to_string(t: index / 4);
11028 expr += "]";
11029
11030 expr += vector_swizzle(vecsize: 1, index: index % 4);
11031 }
11032
11033 if (target_type.vecsize > 1)
11034 {
11035 expr += ")";
11036 }
11037
11038 return expr;
11039 }
11040 else
11041 {
11042 assert(result.second % (target_type.width / 8) == 0);
11043 uint32_t index = result.second / (target_type.width / 8);
11044
11045 std::string expr;
11046
11047 expr += buffer_name;
11048 expr += "[";
11049 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
11050 expr += convert_to_string(t: index / 4);
11051 expr += "]";
11052
11053 expr += vector_swizzle(vecsize: target_type.vecsize, index: index % 4);
11054
11055 return expr;
11056 }
11057}
11058
11059std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
11060 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
11061 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
11062{
11063 // Start traversing type hierarchy at the proper non-pointer types.
11064 const auto *type = &get_pointee_type(type: basetype);
11065
11066 std::string expr;
11067
11068 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
11069 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
11070 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
11071 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
11072
11073 for (uint32_t i = 0; i < count; i++)
11074 {
11075 uint32_t index = indices[i];
11076
11077 // Pointers
11078 if (ptr_chain && i == 0)
11079 {
11080 // Here, the pointer type will be decorated with an array stride.
11081 array_stride = get_decoration(id: basetype.self, decoration: DecorationArrayStride);
11082 if (!array_stride)
11083 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
11084
11085 auto *constant = maybe_get<SPIRConstant>(id: index);
11086 if (constant)
11087 {
11088 // Constant array access.
11089 offset += constant->scalar() * array_stride;
11090 }
11091 else
11092 {
11093 // Dynamic array access.
11094 if (array_stride % word_stride)
11095 {
11096 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
11097 "of a 4-component vector. "
11098 "Likely culprit here is a float or vec2 array inside a push "
11099 "constant block which is std430. "
11100 "This cannot be flattened. Try using std140 layout instead.");
11101 }
11102
11103 expr += to_enclosed_expression(id: index);
11104 expr += " * ";
11105 expr += convert_to_string(t: array_stride / word_stride);
11106 expr += " + ";
11107 }
11108 }
11109 // Arrays
11110 else if (!type->array.empty())
11111 {
11112 auto *constant = maybe_get<SPIRConstant>(id: index);
11113 if (constant)
11114 {
11115 // Constant array access.
11116 offset += constant->scalar() * array_stride;
11117 }
11118 else
11119 {
11120 // Dynamic array access.
11121 if (array_stride % word_stride)
11122 {
11123 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
11124 "of a 4-component vector. "
11125 "Likely culprit here is a float or vec2 array inside a push "
11126 "constant block which is std430. "
11127 "This cannot be flattened. Try using std140 layout instead.");
11128 }
11129
11130 expr += to_enclosed_expression(id: index, register_expression_read: false);
11131 expr += " * ";
11132 expr += convert_to_string(t: array_stride / word_stride);
11133 expr += " + ";
11134 }
11135
11136 uint32_t parent_type = type->parent_type;
11137 type = &get<SPIRType>(id: parent_type);
11138
11139 if (!type->array.empty())
11140 array_stride = get_decoration(id: parent_type, decoration: DecorationArrayStride);
11141 }
11142 // For structs, the index refers to a constant, which indexes into the members.
11143 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
11144 else if (type->basetype == SPIRType::Struct)
11145 {
11146 index = evaluate_constant_u32(id: index);
11147
11148 if (index >= type->member_types.size())
11149 SPIRV_CROSS_THROW("Member index is out of bounds!");
11150
11151 offset += type_struct_member_offset(type: *type, index);
11152
11153 auto &struct_type = *type;
11154 type = &get<SPIRType>(id: type->member_types[index]);
11155
11156 if (type->columns > 1)
11157 {
11158 matrix_stride = type_struct_member_matrix_stride(type: struct_type, index);
11159 row_major_matrix_needs_conversion =
11160 combined_decoration_for_member(type: struct_type, index).get(bit: DecorationRowMajor);
11161 }
11162 else
11163 row_major_matrix_needs_conversion = false;
11164
11165 if (!type->array.empty())
11166 array_stride = type_struct_member_array_stride(type: struct_type, index);
11167 }
11168 // Matrix -> Vector
11169 else if (type->columns > 1)
11170 {
11171 auto *constant = maybe_get<SPIRConstant>(id: index);
11172 if (constant)
11173 {
11174 index = evaluate_constant_u32(id: index);
11175 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
11176 }
11177 else
11178 {
11179 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
11180 // Dynamic array access.
11181 if (indexing_stride % word_stride)
11182 {
11183 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
11184 "4-component vector. "
11185 "Likely culprit here is a row-major matrix being accessed dynamically. "
11186 "This cannot be flattened. Try using std140 layout instead.");
11187 }
11188
11189 expr += to_enclosed_expression(id: index, register_expression_read: false);
11190 expr += " * ";
11191 expr += convert_to_string(t: indexing_stride / word_stride);
11192 expr += " + ";
11193 }
11194
11195 type = &get<SPIRType>(id: type->parent_type);
11196 }
11197 // Vector -> Scalar
11198 else if (type->vecsize > 1)
11199 {
11200 auto *constant = maybe_get<SPIRConstant>(id: index);
11201 if (constant)
11202 {
11203 index = evaluate_constant_u32(id: index);
11204 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
11205 }
11206 else
11207 {
11208 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
11209
11210 // Dynamic array access.
11211 if (indexing_stride % word_stride)
11212 {
11213 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
11214 "size of a 4-component vector. "
11215 "This cannot be flattened in legacy targets.");
11216 }
11217
11218 expr += to_enclosed_expression(id: index, register_expression_read: false);
11219 expr += " * ";
11220 expr += convert_to_string(t: indexing_stride / word_stride);
11221 expr += " + ";
11222 }
11223
11224 type = &get<SPIRType>(id: type->parent_type);
11225 }
11226 else
11227 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
11228 }
11229
11230 if (need_transpose)
11231 *need_transpose = row_major_matrix_needs_conversion;
11232 if (out_matrix_stride)
11233 *out_matrix_stride = matrix_stride;
11234 if (out_array_stride)
11235 *out_array_stride = array_stride;
11236
11237 return std::make_pair(x&: expr, y&: offset);
11238}
11239
11240bool CompilerGLSL::should_dereference(uint32_t id)
11241{
11242 const auto &type = expression_type(id);
11243 // Non-pointer expressions don't need to be dereferenced.
11244 if (!type.pointer)
11245 return false;
11246
11247 // Handles shouldn't be dereferenced either.
11248 if (!expression_is_lvalue(id))
11249 return false;
11250
11251 // If id is a variable but not a phi variable, we should not dereference it.
11252 if (auto *var = maybe_get<SPIRVariable>(id))
11253 return var->phi_variable;
11254
11255 if (auto *expr = maybe_get<SPIRExpression>(id))
11256 {
11257 // If id is an access chain, we should not dereference it.
11258 if (expr->access_chain)
11259 return false;
11260
11261 // If id is a forwarded copy of a variable pointer, we should not dereference it.
11262 SPIRVariable *var = nullptr;
11263 while (expr->loaded_from && expression_is_forwarded(id: expr->self))
11264 {
11265 auto &src_type = expression_type(id: expr->loaded_from);
11266 // To be a copy, the pointer and its source expression must be the
11267 // same type. Can't check type.self, because for some reason that's
11268 // usually the base type with pointers stripped off. This check is
11269 // complex enough that I've hoisted it out of the while condition.
11270 if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
11271 src_type.parent_type != type.parent_type)
11272 break;
11273 if ((var = maybe_get<SPIRVariable>(id: expr->loaded_from)))
11274 break;
11275 if (!(expr = maybe_get<SPIRExpression>(id: expr->loaded_from)))
11276 break;
11277 }
11278
11279 return !var || var->phi_variable;
11280 }
11281
11282 // Otherwise, we should dereference this pointer expression.
11283 return true;
11284}
11285
11286bool CompilerGLSL::should_forward(uint32_t id) const
11287{
11288 // If id is a variable we will try to forward it regardless of force_temporary check below
11289 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
11290
11291 auto *var = maybe_get<SPIRVariable>(id);
11292 if (var)
11293 {
11294 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
11295 return !(has_decoration(id, decoration: DecorationBuiltIn) && has_decoration(id, decoration: DecorationVolatile));
11296 }
11297
11298 // For debugging emit temporary variables for all expressions
11299 if (options.force_temporary)
11300 return false;
11301
11302 // If an expression carries enough dependencies we need to stop forwarding at some point,
11303 // or we explode compilers. There are usually limits to how much we can nest expressions.
11304 auto *expr = maybe_get<SPIRExpression>(id);
11305 const uint32_t max_expression_dependencies = 64;
11306 if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
11307 return false;
11308
11309 if (expr && expr->loaded_from
11310 && has_decoration(id: expr->loaded_from, decoration: DecorationBuiltIn)
11311 && has_decoration(id: expr->loaded_from, decoration: DecorationVolatile))
11312 {
11313 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
11314 return false;
11315 }
11316
11317 // Immutable expression can always be forwarded.
11318 if (is_immutable(id))
11319 return true;
11320
11321 return false;
11322}
11323
11324bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
11325{
11326 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
11327 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
11328}
11329
11330void CompilerGLSL::track_expression_read(uint32_t id)
11331{
11332 switch (ir.ids[id].get_type())
11333 {
11334 case TypeExpression:
11335 {
11336 auto &e = get<SPIRExpression>(id);
11337 for (auto implied_read : e.implied_read_expressions)
11338 track_expression_read(id: implied_read);
11339 break;
11340 }
11341
11342 case TypeAccessChain:
11343 {
11344 auto &e = get<SPIRAccessChain>(id);
11345 for (auto implied_read : e.implied_read_expressions)
11346 track_expression_read(id: implied_read);
11347 break;
11348 }
11349
11350 default:
11351 break;
11352 }
11353
11354 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
11355 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
11356 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
11357 {
11358 auto &v = expression_usage_counts[id];
11359 v++;
11360
11361 // If we create an expression outside a loop,
11362 // but access it inside a loop, we're implicitly reading it multiple times.
11363 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
11364 // working inside the backend compiler.
11365 if (expression_read_implies_multiple_reads(id))
11366 v++;
11367
11368 if (v >= 2)
11369 {
11370 //if (v == 2)
11371 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
11372
11373 // Force a recompile after this pass to avoid forwarding this variable.
11374 force_temporary_and_recompile(id);
11375 }
11376 }
11377}
11378
11379bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
11380{
11381 if (forced_temporaries.find(x: id) != end(cont&: forced_temporaries))
11382 return false;
11383
11384 for (uint32_t i = 0; i < num_args; i++)
11385 if (!should_forward(id: args[i]))
11386 return false;
11387
11388 // We need to forward globals as well.
11389 if (!pure)
11390 {
11391 for (auto global : global_variables)
11392 if (!should_forward(id: global))
11393 return false;
11394 for (auto aliased : aliased_variables)
11395 if (!should_forward(id: aliased))
11396 return false;
11397 }
11398
11399 return true;
11400}
11401
11402void CompilerGLSL::register_impure_function_call()
11403{
11404 // Impure functions can modify globals and aliased variables, so invalidate them as well.
11405 for (auto global : global_variables)
11406 flush_dependees(var&: get<SPIRVariable>(id: global));
11407 for (auto aliased : aliased_variables)
11408 flush_dependees(var&: get<SPIRVariable>(id: aliased));
11409}
11410
11411void CompilerGLSL::register_call_out_argument(uint32_t id)
11412{
11413 register_write(chain: id);
11414
11415 auto *var = maybe_get<SPIRVariable>(id);
11416 if (var)
11417 flush_variable_declaration(id: var->self);
11418}
11419
11420string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
11421{
11422 // These variables are always function local,
11423 // so make sure we emit the variable without storage qualifiers.
11424 // Some backends will inject custom variables locally in a function
11425 // with a storage qualifier which is not function-local.
11426 auto old_storage = var.storage;
11427 var.storage = StorageClassFunction;
11428 auto expr = variable_decl(variable: var);
11429 var.storage = old_storage;
11430 return expr;
11431}
11432
11433void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
11434{
11435 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
11436 if (var.allocate_temporary_copy && !flushed_phi_variables.count(x: var.self))
11437 {
11438 auto &type = get<SPIRType>(id: var.basetype);
11439 auto &flags = get_decoration_bitset(id: var.self);
11440 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: join(ts: "_", ts: var.self, ts: "_copy")), ts: ";");
11441 flushed_phi_variables.insert(x: var.self);
11442 }
11443}
11444
11445void CompilerGLSL::flush_variable_declaration(uint32_t id)
11446{
11447 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
11448 auto *var = maybe_get<SPIRVariable>(id);
11449 if (var && var->deferred_declaration)
11450 {
11451 string initializer;
11452 if (options.force_zero_initialized_variables &&
11453 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
11454 var->storage == StorageClassPrivate) &&
11455 !var->initializer && type_can_zero_initialize(type: get_variable_data_type(var: *var)))
11456 {
11457 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: *var)));
11458 }
11459
11460 statement(ts: variable_decl_function_local(var&: *var), ts&: initializer, ts: ";");
11461 var->deferred_declaration = false;
11462 }
11463 if (var)
11464 {
11465 emit_variable_temporary_copies(var: *var);
11466 }
11467}
11468
11469bool CompilerGLSL::remove_duplicate_swizzle(string &op)
11470{
11471 auto pos = op.find_last_of(c: '.');
11472 if (pos == string::npos || pos == 0)
11473 return false;
11474
11475 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
11476
11477 if (backend.swizzle_is_function)
11478 {
11479 if (final_swiz.size() < 2)
11480 return false;
11481
11482 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
11483 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
11484 else
11485 return false;
11486 }
11487
11488 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
11489 // If so, and previous swizzle is of same length,
11490 // we can drop the final swizzle altogether.
11491 for (uint32_t i = 0; i < final_swiz.size(); i++)
11492 {
11493 static const char expected[] = { 'x', 'y', 'z', 'w' };
11494 if (i >= 4 || final_swiz[i] != expected[i])
11495 return false;
11496 }
11497
11498 auto prevpos = op.find_last_of(c: '.', pos: pos - 1);
11499 if (prevpos == string::npos)
11500 return false;
11501
11502 prevpos++;
11503
11504 // Make sure there are only swizzles here ...
11505 for (auto i = prevpos; i < pos; i++)
11506 {
11507 if (op[i] < 'w' || op[i] > 'z')
11508 {
11509 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
11510 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
11511 break;
11512 return false;
11513 }
11514 }
11515
11516 // If original swizzle is large enough, just carve out the components we need.
11517 // E.g. foobar.wyx.xy will turn into foobar.wy.
11518 if (pos - prevpos >= final_swiz.size())
11519 {
11520 op.erase(pos: prevpos + final_swiz.size(), n: string::npos);
11521
11522 // Add back the function call ...
11523 if (backend.swizzle_is_function)
11524 op += "()";
11525 }
11526 return true;
11527}
11528
11529// Optimizes away vector swizzles where we have something like
11530// vec3 foo;
11531// foo.xyz <-- swizzle expression does nothing.
11532// This is a very common pattern after OpCompositeCombine.
11533bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
11534{
11535 auto pos = op.find_last_of(c: '.');
11536 if (pos == string::npos || pos == 0)
11537 return false;
11538
11539 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
11540
11541 if (backend.swizzle_is_function)
11542 {
11543 if (final_swiz.size() < 2)
11544 return false;
11545
11546 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
11547 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
11548 else
11549 return false;
11550 }
11551
11552 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
11553 // If so, and previous swizzle is of same length,
11554 // we can drop the final swizzle altogether.
11555 for (uint32_t i = 0; i < final_swiz.size(); i++)
11556 {
11557 static const char expected[] = { 'x', 'y', 'z', 'w' };
11558 if (i >= 4 || final_swiz[i] != expected[i])
11559 return false;
11560 }
11561
11562 auto &type = expression_type(id: base);
11563
11564 // Sanity checking ...
11565 assert(type.columns == 1 && type.array.empty());
11566
11567 if (type.vecsize == final_swiz.size())
11568 op.erase(pos: pos, n: string::npos);
11569 return true;
11570}
11571
11572string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
11573{
11574 ID base = 0;
11575 string op;
11576 string subop;
11577
11578 // Can only merge swizzles for vectors.
11579 auto &type = get<SPIRType>(id: return_type);
11580 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
11581 bool swizzle_optimization = false;
11582
11583 for (uint32_t i = 0; i < length; i++)
11584 {
11585 auto *e = maybe_get<SPIRExpression>(id: elems[i]);
11586
11587 // If we're merging another scalar which belongs to the same base
11588 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
11589 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
11590 {
11591 // Only supposed to be used for vector swizzle -> scalar.
11592 assert(!e->expression.empty() && e->expression.front() == '.');
11593 subop += e->expression.substr(pos: 1, n: string::npos);
11594 swizzle_optimization = true;
11595 }
11596 else
11597 {
11598 // We'll likely end up with duplicated swizzles, e.g.
11599 // foobar.xyz.xyz from patterns like
11600 // OpVectorShuffle
11601 // OpCompositeExtract x 3
11602 // OpCompositeConstruct 3x + other scalar.
11603 // Just modify op in-place.
11604 if (swizzle_optimization)
11605 {
11606 if (backend.swizzle_is_function)
11607 subop += "()";
11608
11609 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
11610 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
11611 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
11612 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
11613 // Case 1:
11614 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
11615 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
11616 // Case 2:
11617 // foo.xyz: Duplicate swizzle won't kick in.
11618 // If foo is vec3, we can remove xyz, giving just foo.
11619 if (!remove_duplicate_swizzle(op&: subop))
11620 remove_unity_swizzle(base, op&: subop);
11621
11622 // Strips away redundant parens if we created them during component extraction.
11623 strip_enclosed_expression(expr&: subop);
11624 swizzle_optimization = false;
11625 op += subop;
11626 }
11627 else
11628 op += subop;
11629
11630 if (i)
11631 op += ", ";
11632
11633 bool uses_buffer_offset =
11634 type.basetype == SPIRType::Struct && has_member_decoration(id: type.self, index: i, decoration: DecorationOffset);
11635 subop = to_composite_constructor_expression(parent_type: type, id: elems[i], block_like_type: uses_buffer_offset);
11636 }
11637
11638 base = e ? e->base_expression : ID(0);
11639 }
11640
11641 if (swizzle_optimization)
11642 {
11643 if (backend.swizzle_is_function)
11644 subop += "()";
11645
11646 if (!remove_duplicate_swizzle(op&: subop))
11647 remove_unity_swizzle(base, op&: subop);
11648 // Strips away redundant parens if we created them during component extraction.
11649 strip_enclosed_expression(expr&: subop);
11650 }
11651
11652 op += subop;
11653 return op;
11654}
11655
11656bool CompilerGLSL::skip_argument(uint32_t id) const
11657{
11658 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
11659 {
11660 auto &type = expression_type(id);
11661 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
11662 return true;
11663 }
11664 return false;
11665}
11666
11667bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
11668{
11669 // Do this with strings because we have a very clear pattern we can check for and it avoids
11670 // adding lots of special cases to the code emission.
11671 if (rhs.size() < lhs.size() + 3)
11672 return false;
11673
11674 // Do not optimize matrices. They are a bit awkward to reason about in general
11675 // (in which order does operation happen?), and it does not work on MSL anyways.
11676 if (type.vecsize > 1 && type.columns > 1)
11677 return false;
11678
11679 auto index = rhs.find(str: lhs);
11680 if (index != 0)
11681 return false;
11682
11683 // TODO: Shift operators, but it's not important for now.
11684 auto op = rhs.find_first_of(s: "+-/*%|&^", pos: lhs.size() + 1);
11685 if (op != lhs.size() + 1)
11686 return false;
11687
11688 // Check that the op is followed by space. This excludes && and ||.
11689 if (rhs[op + 1] != ' ')
11690 return false;
11691
11692 char bop = rhs[op];
11693 auto expr = rhs.substr(pos: lhs.size() + 3);
11694
11695 // Avoids false positives where we get a = a * b + c.
11696 // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this.
11697 if (needs_enclose_expression(expr))
11698 return false;
11699
11700 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
11701 // Find some common patterns which are equivalent.
11702 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
11703 statement(ts: lhs, ts&: bop, ts&: bop, ts: ";");
11704 else
11705 statement(ts: lhs, ts: " ", ts&: bop, ts: "= ", ts&: expr, ts: ";");
11706 return true;
11707}
11708
11709void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
11710{
11711 if (forwarded_temporaries.find(x: expr) == end(cont&: forwarded_temporaries))
11712 return;
11713
11714 assert(current_emitting_block);
11715 current_emitting_block->invalidate_expressions.push_back(t: expr);
11716}
11717
11718void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
11719{
11720 current_emitting_block = &block;
11721
11722 if (backend.requires_relaxed_precision_analysis)
11723 {
11724 // If PHI variables are consumed in unexpected precision contexts, copy them here.
11725 for (size_t i = 0, n = block.phi_variables.size(); i < n; i++)
11726 {
11727 auto &phi = block.phi_variables[i];
11728
11729 // Ensure we only copy once. We know a-priori that this array will lay out
11730 // the same function variables together.
11731 if (i && block.phi_variables[i - 1].function_variable == phi.function_variable)
11732 continue;
11733
11734 auto itr = temporary_to_mirror_precision_alias.find(x: phi.function_variable);
11735 if (itr != temporary_to_mirror_precision_alias.end())
11736 {
11737 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
11738 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
11739 EmbeddedInstruction inst;
11740 inst.op = OpCopyObject;
11741 inst.length = 3;
11742 inst.ops.push_back(t: expression_type_id(id: itr->first));
11743 inst.ops.push_back(t: itr->second);
11744 inst.ops.push_back(t: itr->first);
11745 emit_instruction(instr: inst);
11746 }
11747 }
11748 }
11749
11750 for (auto &op : block.ops)
11751 {
11752 auto temporary_copy = handle_instruction_precision(instr: op);
11753 emit_instruction(instr: op);
11754 if (temporary_copy.dst_id)
11755 {
11756 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
11757 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
11758 EmbeddedInstruction inst;
11759 inst.op = OpCopyObject;
11760 inst.length = 3;
11761 inst.ops.push_back(t: expression_type_id(id: temporary_copy.src_id));
11762 inst.ops.push_back(t: temporary_copy.dst_id);
11763 inst.ops.push_back(t: temporary_copy.src_id);
11764
11765 // Never attempt to hoist mirrored temporaries.
11766 // They are hoisted in lock-step with their parents.
11767 block_temporary_hoisting = true;
11768 emit_instruction(instr: inst);
11769 block_temporary_hoisting = false;
11770 }
11771 }
11772
11773 current_emitting_block = nullptr;
11774}
11775
11776void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
11777{
11778 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
11779 // these will be marked as having suppressed usage tracking.
11780 // Our only concern is to make sure arithmetic operations are done in similar ways.
11781 if (forced_invariant_temporaries.count(x: expr.self) == 0)
11782 {
11783 if (!expression_suppresses_usage_tracking(id: expr.self))
11784 force_temporary_and_recompile(id: expr.self);
11785 forced_invariant_temporaries.insert(x: expr.self);
11786
11787 for (auto &dependent : expr.invariance_dependencies)
11788 disallow_forwarding_in_expression_chain(expr: get<SPIRExpression>(id: dependent));
11789 }
11790}
11791
11792void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
11793{
11794 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
11795 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
11796 // in one translation unit, but not another, e.g. due to multiple use of an expression.
11797 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
11798 // expressions to be temporaries.
11799 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
11800 // for all reasonable uses of invariant.
11801 if (!has_decoration(id: store_id, decoration: DecorationInvariant))
11802 return;
11803
11804 auto *expr = maybe_get<SPIRExpression>(id: value_id);
11805 if (!expr)
11806 return;
11807
11808 disallow_forwarding_in_expression_chain(expr: *expr);
11809}
11810
11811void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
11812{
11813 auto rhs = to_pointer_expression(id: rhs_expression);
11814
11815 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
11816 if (!rhs.empty())
11817 {
11818 handle_store_to_invariant_variable(store_id: lhs_expression, value_id: rhs_expression);
11819
11820 if (!unroll_array_to_complex_store(target_id: lhs_expression, source_id: rhs_expression))
11821 {
11822 auto lhs = to_dereferenced_expression(id: lhs_expression);
11823 if (has_decoration(id: lhs_expression, decoration: DecorationNonUniform))
11824 convert_non_uniform_expression(expr&: lhs, ptr_id: lhs_expression);
11825
11826 // We might need to cast in order to store to a builtin.
11827 cast_to_variable_store(target_id: lhs_expression, expr&: rhs, expr_type: expression_type(id: rhs_expression));
11828
11829 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
11830 // While this is purely cosmetic, this is important for legacy ESSL where loop
11831 // variable increments must be in either i++ or i += const-expr.
11832 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
11833 if (!optimize_read_modify_write(type: expression_type(id: rhs_expression), lhs, rhs))
11834 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
11835 }
11836 register_write(chain: lhs_expression);
11837 }
11838}
11839
11840uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
11841{
11842 if (instr.length < 3)
11843 return 32;
11844
11845 auto *ops = stream(instr);
11846
11847 switch (instr.op)
11848 {
11849 case OpSConvert:
11850 case OpConvertSToF:
11851 case OpUConvert:
11852 case OpConvertUToF:
11853 case OpIEqual:
11854 case OpINotEqual:
11855 case OpSLessThan:
11856 case OpSLessThanEqual:
11857 case OpSGreaterThan:
11858 case OpSGreaterThanEqual:
11859 case OpULessThan:
11860 case OpULessThanEqual:
11861 case OpUGreaterThan:
11862 case OpUGreaterThanEqual:
11863 return expression_type(id: ops[2]).width;
11864
11865 case OpSMulExtended:
11866 case OpUMulExtended:
11867 return get<SPIRType>(id: get<SPIRType>(id: ops[0]).member_types[0]).width;
11868
11869 default:
11870 {
11871 // We can look at result type which is more robust.
11872 auto *type = maybe_get<SPIRType>(id: ops[0]);
11873 if (type && type_is_integral(type: *type))
11874 return type->width;
11875 else
11876 return 32;
11877 }
11878 }
11879}
11880
11881uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
11882{
11883 if (length < 1)
11884 return 32;
11885
11886 switch (op)
11887 {
11888 case GLSLstd450SAbs:
11889 case GLSLstd450SSign:
11890 case GLSLstd450UMin:
11891 case GLSLstd450SMin:
11892 case GLSLstd450UMax:
11893 case GLSLstd450SMax:
11894 case GLSLstd450UClamp:
11895 case GLSLstd450SClamp:
11896 case GLSLstd450FindSMsb:
11897 case GLSLstd450FindUMsb:
11898 return expression_type(id: ops[0]).width;
11899
11900 default:
11901 {
11902 // We don't need to care about other opcodes, just return 32.
11903 return 32;
11904 }
11905 }
11906}
11907
11908void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
11909{
11910 // Only GLSL supports RelaxedPrecision directly.
11911 // We cannot implement this in HLSL or MSL because it is tied to the type system.
11912 // In SPIR-V, everything must masquerade as 32-bit.
11913 if (!backend.requires_relaxed_precision_analysis)
11914 return;
11915
11916 auto input_precision = analyze_expression_precision(args, length);
11917
11918 // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
11919 // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
11920 if (input_precision == Options::Mediump)
11921 set_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
11922}
11923
11924CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
11925{
11926 // Now, analyze the precision at which the arguments would run.
11927 // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
11928 // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
11929 // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
11930 // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
11931 // correct precision.
11932 bool expression_has_highp = false;
11933 bool expression_has_mediump = false;
11934
11935 for (uint32_t i = 0; i < length; i++)
11936 {
11937 uint32_t arg = args[i];
11938
11939 auto handle_type = ir.ids[arg].get_type();
11940 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
11941 continue;
11942
11943 if (has_decoration(id: arg, decoration: DecorationRelaxedPrecision))
11944 expression_has_mediump = true;
11945 else
11946 expression_has_highp = true;
11947 }
11948
11949 if (expression_has_highp)
11950 return Options::Highp;
11951 else if (expression_has_mediump)
11952 return Options::Mediump;
11953 else
11954 return Options::DontCare;
11955}
11956
11957void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
11958{
11959 if (!backend.requires_relaxed_precision_analysis)
11960 return;
11961
11962 auto &type = get<SPIRType>(id: type_id);
11963
11964 // RelaxedPrecision only applies to 32-bit values.
11965 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
11966 return;
11967
11968 bool operation_is_highp = !has_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
11969
11970 auto input_precision = analyze_expression_precision(args, length);
11971 if (input_precision == Options::DontCare)
11972 {
11973 consume_temporary_in_precision_context(type_id, id: dst_id, precision: input_precision);
11974 return;
11975 }
11976
11977 // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
11978 // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
11979 // However, if the expression is not, inputs must be expanded to 32-bit first,
11980 // since the operation must run at high precision.
11981 // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
11982 // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
11983 // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
11984 if ((operation_is_highp && input_precision == Options::Mediump) ||
11985 (!operation_is_highp && input_precision == Options::Highp))
11986 {
11987 auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
11988 for (uint32_t i = 0; i < length; i++)
11989 {
11990 // Rewrites the opcode so that we consume an ID in correct precision context.
11991 // This is pretty hacky, but it's the most straight forward way of implementing this without adding
11992 // lots of extra passes to rewrite all code blocks.
11993 args[i] = consume_temporary_in_precision_context(type_id: expression_type_id(id: args[i]), id: args[i], precision);
11994 }
11995 }
11996}
11997
11998// This is probably not exhaustive ...
11999static bool opcode_is_precision_sensitive_operation(Op op)
12000{
12001 switch (op)
12002 {
12003 case OpFAdd:
12004 case OpFSub:
12005 case OpFMul:
12006 case OpFNegate:
12007 case OpIAdd:
12008 case OpISub:
12009 case OpIMul:
12010 case OpSNegate:
12011 case OpFMod:
12012 case OpFDiv:
12013 case OpFRem:
12014 case OpSMod:
12015 case OpSDiv:
12016 case OpSRem:
12017 case OpUMod:
12018 case OpUDiv:
12019 case OpVectorTimesMatrix:
12020 case OpMatrixTimesVector:
12021 case OpMatrixTimesMatrix:
12022 case OpDPdx:
12023 case OpDPdy:
12024 case OpDPdxCoarse:
12025 case OpDPdyCoarse:
12026 case OpDPdxFine:
12027 case OpDPdyFine:
12028 case OpFwidth:
12029 case OpFwidthCoarse:
12030 case OpFwidthFine:
12031 case OpVectorTimesScalar:
12032 case OpMatrixTimesScalar:
12033 case OpOuterProduct:
12034 case OpFConvert:
12035 case OpSConvert:
12036 case OpUConvert:
12037 case OpConvertSToF:
12038 case OpConvertUToF:
12039 case OpConvertFToU:
12040 case OpConvertFToS:
12041 return true;
12042
12043 default:
12044 return false;
12045 }
12046}
12047
12048// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
12049// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
12050// relevant when operating on the IDs, not when shuffling things around.
12051static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
12052{
12053 switch (op)
12054 {
12055 case OpLoad:
12056 case OpAccessChain:
12057 case OpInBoundsAccessChain:
12058 case OpCompositeExtract:
12059 case OpVectorExtractDynamic:
12060 case OpSampledImage:
12061 case OpImage:
12062 case OpCopyObject:
12063
12064 case OpImageRead:
12065 case OpImageFetch:
12066 case OpImageSampleImplicitLod:
12067 case OpImageSampleProjImplicitLod:
12068 case OpImageSampleDrefImplicitLod:
12069 case OpImageSampleProjDrefImplicitLod:
12070 case OpImageSampleExplicitLod:
12071 case OpImageSampleProjExplicitLod:
12072 case OpImageSampleDrefExplicitLod:
12073 case OpImageSampleProjDrefExplicitLod:
12074 case OpImageGather:
12075 case OpImageDrefGather:
12076 case OpImageSparseRead:
12077 case OpImageSparseFetch:
12078 case OpImageSparseSampleImplicitLod:
12079 case OpImageSparseSampleProjImplicitLod:
12080 case OpImageSparseSampleDrefImplicitLod:
12081 case OpImageSparseSampleProjDrefImplicitLod:
12082 case OpImageSparseSampleExplicitLod:
12083 case OpImageSparseSampleProjExplicitLod:
12084 case OpImageSparseSampleDrefExplicitLod:
12085 case OpImageSparseSampleProjDrefExplicitLod:
12086 case OpImageSparseGather:
12087 case OpImageSparseDrefGather:
12088 arg_count = 1;
12089 return true;
12090
12091 case OpVectorShuffle:
12092 arg_count = 2;
12093 return true;
12094
12095 case OpCompositeConstruct:
12096 return true;
12097
12098 default:
12099 break;
12100 }
12101
12102 return false;
12103}
12104
12105CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
12106{
12107 auto ops = stream_mutable(instr: instruction);
12108 auto opcode = static_cast<Op>(instruction.op);
12109 uint32_t length = instruction.length;
12110
12111 if (backend.requires_relaxed_precision_analysis)
12112 {
12113 if (length > 2)
12114 {
12115 uint32_t forwarding_length = length - 2;
12116
12117 if (opcode_is_precision_sensitive_operation(op: opcode))
12118 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[2], length: forwarding_length);
12119 else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(id: ops[2]).ext == SPIRExtension::GLSL)
12120 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[4], length: forwarding_length - 2);
12121 else if (opcode_is_precision_forwarding_instruction(op: opcode, arg_count&: forwarding_length))
12122 forward_relaxed_precision(dst_id: ops[1], args: &ops[2], length: forwarding_length);
12123 }
12124
12125 uint32_t result_type = 0, result_id = 0;
12126 if (instruction_to_result_type(result_type, result_id, op: opcode, args: ops, length))
12127 {
12128 auto itr = temporary_to_mirror_precision_alias.find(x: ops[1]);
12129 if (itr != temporary_to_mirror_precision_alias.end())
12130 return { .dst_id: itr->second, .src_id: itr->first };
12131 }
12132 }
12133
12134 return {};
12135}
12136
12137void CompilerGLSL::emit_instruction(const Instruction &instruction)
12138{
12139 auto ops = stream(instr: instruction);
12140 auto opcode = static_cast<Op>(instruction.op);
12141 uint32_t length = instruction.length;
12142
12143#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
12144#define GLSL_BOP_CAST(op, type) \
12145 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
12146 opcode_is_sign_invariant(opcode), implicit_integer_promotion)
12147#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
12148#define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
12149#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
12150#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
12151#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
12152#define GLSL_BFOP_CAST(op, type) \
12153 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
12154#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
12155#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
12156
12157 // If we need to do implicit bitcasts, make sure we do it with the correct type.
12158 uint32_t integer_width = get_integer_width_for_instruction(instr: instruction);
12159 auto int_type = to_signed_basetype(width: integer_width);
12160 auto uint_type = to_unsigned_basetype(width: integer_width);
12161
12162 // Handle C implicit integer promotion rules.
12163 // If we get implicit promotion to int, need to make sure we cast by value to intended return type,
12164 // otherwise, future sign-dependent operations and bitcasts will break.
12165 bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
12166 opcode_can_promote_integer_implicitly(opcode) &&
12167 get<SPIRType>(id: ops[0]).vecsize == 1;
12168
12169 opcode = get_remapped_spirv_op(op: opcode);
12170
12171 switch (opcode)
12172 {
12173 // Dealing with memory
12174 case OpLoad:
12175 {
12176 uint32_t result_type = ops[0];
12177 uint32_t id = ops[1];
12178 uint32_t ptr = ops[2];
12179
12180 flush_variable_declaration(id: ptr);
12181
12182 // If we're loading from memory that cannot be changed by the shader,
12183 // just forward the expression directly to avoid needless temporaries.
12184 // If an expression is mutable and forwardable, we speculate that it is immutable.
12185 bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
12186
12187 // If loading a non-native row-major matrix, mark the expression as need_transpose.
12188 bool need_transpose = false;
12189 bool old_need_transpose = false;
12190
12191 auto *ptr_expression = maybe_get<SPIRExpression>(id: ptr);
12192
12193 if (forward)
12194 {
12195 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
12196 // taking the expression.
12197 if (ptr_expression && ptr_expression->need_transpose)
12198 {
12199 old_need_transpose = true;
12200 ptr_expression->need_transpose = false;
12201 need_transpose = true;
12202 }
12203 else if (is_non_native_row_major_matrix(id: ptr))
12204 need_transpose = true;
12205 }
12206
12207 // If we are forwarding this load,
12208 // don't register the read to access chain here, defer that to when we actually use the expression,
12209 // using the add_implied_read_expression mechanism.
12210 string expr;
12211
12212 bool is_packed = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12213 bool is_remapped = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID);
12214 if (forward || (!is_packed && !is_remapped))
12215 {
12216 // For the simple case, we do not need to deal with repacking.
12217 expr = to_dereferenced_expression(id: ptr, register_expression_read: false);
12218 }
12219 else
12220 {
12221 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
12222 // storing the expression to a temporary.
12223 expr = to_unpacked_expression(id: ptr);
12224 }
12225
12226 auto &type = get<SPIRType>(id: result_type);
12227 auto &expr_type = expression_type(id: ptr);
12228
12229 // If the expression has more vector components than the result type, insert
12230 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
12231 // happen with e.g. the MSL backend replacing the type of an input variable.
12232 if (expr_type.vecsize > type.vecsize)
12233 expr = enclose_expression(expr: expr + vector_swizzle(vecsize: type.vecsize, index: 0));
12234
12235 if (forward && ptr_expression)
12236 ptr_expression->need_transpose = old_need_transpose;
12237
12238 // We might need to cast in order to load from a builtin.
12239 cast_from_variable_load(source_id: ptr, expr, expr_type: type);
12240
12241 if (forward && ptr_expression)
12242 ptr_expression->need_transpose = false;
12243
12244 // We might be trying to load a gl_Position[N], where we should be
12245 // doing float4[](gl_in[i].gl_Position, ...) instead.
12246 // Similar workarounds are required for input arrays in tessellation.
12247 // Also, loading from gl_SampleMask array needs special unroll.
12248 unroll_array_from_complex_load(target_id: id, source_id: ptr, expr);
12249
12250 if (!type_is_opaque_value(type) && has_decoration(id: ptr, decoration: DecorationNonUniform))
12251 {
12252 // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
12253 convert_non_uniform_expression(expr, ptr_id: ptr);
12254 }
12255
12256 if (forward && ptr_expression)
12257 ptr_expression->need_transpose = old_need_transpose;
12258
12259 bool flattened = ptr_expression && flattened_buffer_blocks.count(x: ptr_expression->loaded_from) != 0;
12260
12261 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(id: ptr) && !flattened)
12262 rewrite_load_for_wrapped_row_major(expr, loaded_type: result_type, ptr);
12263
12264 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
12265 // However, if we try to load a complex, composite object from a flattened buffer,
12266 // we should avoid emitting the same code over and over and lower the result to a temporary.
12267 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
12268
12269 SPIRExpression *e = nullptr;
12270 if (!forward && expression_is_non_value_type_array(ptr))
12271 {
12272 // Complicated load case where we need to make a copy of ptr, but we cannot, because
12273 // it is an array, and our backend does not support arrays as value types.
12274 // Emit the temporary, and copy it explicitly.
12275 e = &emit_uninitialized_temporary_expression(type: result_type, id);
12276 emit_array_copy(expr: nullptr, lhs_id: id, rhs_id: ptr, lhs_storage: StorageClassFunction, rhs_storage: get_expression_effective_storage_class(ptr));
12277 }
12278 else
12279 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: forward, suppress_usage_tracking: !usage_tracking);
12280
12281 e->need_transpose = need_transpose;
12282 register_read(expr: id, chain: ptr, forwarded: forward);
12283
12284 if (forward)
12285 {
12286 // Pass through whether the result is of a packed type and the physical type ID.
12287 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked))
12288 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12289 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID))
12290 {
12291 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID,
12292 value: get_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID));
12293 }
12294 }
12295 else
12296 {
12297 // This might have been set on an earlier compilation iteration, force it to be unset.
12298 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12299 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
12300 }
12301
12302 inherit_expression_dependencies(dst: id, source: ptr);
12303 if (forward)
12304 add_implied_read_expression(e&: *e, source: ptr);
12305 break;
12306 }
12307
12308 case OpInBoundsAccessChain:
12309 case OpAccessChain:
12310 case OpPtrAccessChain:
12311 {
12312 auto *var = maybe_get<SPIRVariable>(id: ops[2]);
12313 if (var)
12314 flush_variable_declaration(id: var->self);
12315
12316 // If the base is immutable, the access chain pointer must also be.
12317 // If an expression is mutable and forwardable, we speculate that it is immutable.
12318 AccessChainMeta meta;
12319 bool ptr_chain = opcode == OpPtrAccessChain;
12320 auto &target_type = get<SPIRType>(id: ops[0]);
12321 auto e = access_chain(base: ops[2], indices: &ops[3], count: length - 3, target_type, meta: &meta, ptr_chain);
12322
12323 // If the base is flattened UBO of struct type, the expression has to be a composite.
12324 // In that case, backends which do not support inline syntax need it to be bound to a temporary.
12325 // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
12326 bool requires_temporary = false;
12327 if (flattened_buffer_blocks.count(x: ops[2]) && target_type.basetype == SPIRType::Struct)
12328 requires_temporary = !backend.can_declare_struct_inline;
12329
12330 auto &expr = requires_temporary ?
12331 emit_op(result_type: ops[0], result_id: ops[1], rhs: std::move(e), forwarding: false) :
12332 set<SPIRExpression>(id: ops[1], args: std::move(e), args: ops[0], args: should_forward(id: ops[2]));
12333
12334 auto *backing_variable = maybe_get_backing_variable(chain: ops[2]);
12335 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
12336 expr.need_transpose = meta.need_transpose;
12337 expr.access_chain = true;
12338 expr.access_meshlet_position_y = meta.access_meshlet_position_y;
12339
12340 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
12341 if (meta.storage_is_packed)
12342 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypePacked);
12343 if (meta.storage_physical_type != 0)
12344 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
12345 if (meta.storage_is_invariant)
12346 set_decoration(id: ops[1], decoration: DecorationInvariant);
12347 if (meta.flattened_struct)
12348 flattened_structs[ops[1]] = true;
12349 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
12350 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
12351 if (meta.chain_is_builtin)
12352 set_decoration(id: ops[1], decoration: DecorationBuiltIn, argument: meta.builtin);
12353
12354 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
12355 // temporary which could be subject to invalidation.
12356 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
12357 forwarded_temporaries.insert(x: ops[1]);
12358 // The access chain itself is never forced to a temporary, but its dependencies might.
12359 suppressed_usage_tracking.insert(x: ops[1]);
12360
12361 for (uint32_t i = 2; i < length; i++)
12362 {
12363 inherit_expression_dependencies(dst: ops[1], source: ops[i]);
12364 add_implied_read_expression(e&: expr, source: ops[i]);
12365 }
12366
12367 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
12368 // we're not forwarded after all.
12369 if (expr.expression_dependencies.empty())
12370 forwarded_temporaries.erase(x: ops[1]);
12371
12372 break;
12373 }
12374
12375 case OpStore:
12376 {
12377 auto *var = maybe_get<SPIRVariable>(id: ops[0]);
12378
12379 if (var && var->statically_assigned)
12380 var->static_expression = ops[1];
12381 else if (var && var->loop_variable && !var->loop_variable_enable)
12382 var->static_expression = ops[1];
12383 else if (var && var->remapped_variable && var->static_expression)
12384 {
12385 // Skip the write.
12386 }
12387 else if (flattened_structs.count(x: ops[0]))
12388 {
12389 store_flattened_struct(lhs_id: ops[0], value: ops[1]);
12390 register_write(chain: ops[0]);
12391 }
12392 else
12393 {
12394 emit_store_statement(lhs_expression: ops[0], rhs_expression: ops[1]);
12395 }
12396
12397 // Storing a pointer results in a variable pointer, so we must conservatively assume
12398 // we can write through it.
12399 if (expression_type(id: ops[1]).pointer)
12400 register_write(chain: ops[1]);
12401 break;
12402 }
12403
12404 case OpArrayLength:
12405 {
12406 uint32_t result_type = ops[0];
12407 uint32_t id = ops[1];
12408 auto e = access_chain_internal(base: ops[2], indices: &ops[3], count: length - 3, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12409 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
12410 convert_non_uniform_expression(expr&: e, ptr_id: ops[2]);
12411 set<SPIRExpression>(id, args: join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts&: e, ts: ".length())"), args&: result_type,
12412 args: true);
12413 break;
12414 }
12415
12416 // Function calls
12417 case OpFunctionCall:
12418 {
12419 uint32_t result_type = ops[0];
12420 uint32_t id = ops[1];
12421 uint32_t func = ops[2];
12422 const auto *arg = &ops[3];
12423 length -= 3;
12424
12425 auto &callee = get<SPIRFunction>(id: func);
12426 auto &return_type = get<SPIRType>(id: callee.return_type);
12427 bool pure = function_is_pure(func: callee);
12428 bool control_dependent = function_is_control_dependent(func: callee);
12429
12430 bool callee_has_out_variables = false;
12431 bool emit_return_value_as_argument = false;
12432
12433 // Invalidate out variables passed to functions since they can be OpStore'd to.
12434 for (uint32_t i = 0; i < length; i++)
12435 {
12436 if (callee.arguments[i].write_count)
12437 {
12438 register_call_out_argument(id: arg[i]);
12439 callee_has_out_variables = true;
12440 }
12441
12442 flush_variable_declaration(id: arg[i]);
12443 }
12444
12445 if (!return_type.array.empty() && !backend.can_return_array)
12446 {
12447 callee_has_out_variables = true;
12448 emit_return_value_as_argument = true;
12449 }
12450
12451 if (!pure)
12452 register_impure_function_call();
12453
12454 string funexpr;
12455 SmallVector<string> arglist;
12456 funexpr += to_name(id: func) + "(";
12457
12458 if (emit_return_value_as_argument)
12459 {
12460 statement(ts: type_to_glsl(type: return_type), ts: " ", ts: to_name(id), ts: type_to_array_glsl(type: return_type, variable_id: 0), ts: ";");
12461 arglist.push_back(t: to_name(id));
12462 }
12463
12464 for (uint32_t i = 0; i < length; i++)
12465 {
12466 // Do not pass in separate images or samplers if we're remapping
12467 // to combined image samplers.
12468 if (skip_argument(id: arg[i]))
12469 continue;
12470
12471 arglist.push_back(t: to_func_call_arg(callee.arguments[i], id: arg[i]));
12472 }
12473
12474 for (auto &combined : callee.combined_parameters)
12475 {
12476 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
12477 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
12478 arglist.push_back(t: to_combined_image_sampler(image_id, samp_id: sampler_id));
12479 }
12480
12481 append_global_func_args(func: callee, index: length, arglist);
12482
12483 funexpr += merge(list: arglist);
12484 funexpr += ")";
12485
12486 // Check for function call constraints.
12487 check_function_call_constraints(args: arg, length);
12488
12489 if (return_type.basetype != SPIRType::Void)
12490 {
12491 // If the function actually writes to an out variable,
12492 // take the conservative route and do not forward.
12493 // The problem is that we might not read the function
12494 // result (and emit the function) before an out variable
12495 // is read (common case when return value is ignored!
12496 // In order to avoid start tracking invalid variables,
12497 // just avoid the forwarding problem altogether.
12498 bool forward = args_will_forward(id, args: arg, num_args: length, pure) && !callee_has_out_variables && pure &&
12499 (forced_temporaries.find(x: id) == end(cont&: forced_temporaries));
12500
12501 if (emit_return_value_as_argument)
12502 {
12503 statement(ts&: funexpr, ts: ";");
12504 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12505 }
12506 else
12507 emit_op(result_type, result_id: id, rhs: funexpr, forwarding: forward);
12508
12509 // Function calls are implicit loads from all variables in question.
12510 // Set dependencies for them.
12511 for (uint32_t i = 0; i < length; i++)
12512 register_read(expr: id, chain: arg[i], forwarded: forward);
12513
12514 // If we're going to forward the temporary result,
12515 // put dependencies on every variable that must not change.
12516 if (forward)
12517 register_global_read_dependencies(func: callee, id);
12518 }
12519 else
12520 statement(ts&: funexpr, ts: ";");
12521
12522 if (control_dependent)
12523 register_control_dependent_expression(expr: id);
12524
12525 break;
12526 }
12527
12528 // Composite munging
12529 case OpCompositeConstruct:
12530 {
12531 uint32_t result_type = ops[0];
12532 uint32_t id = ops[1];
12533 const auto *const elems = &ops[2];
12534 length -= 2;
12535
12536 bool forward = true;
12537 for (uint32_t i = 0; i < length; i++)
12538 forward = forward && should_forward(id: elems[i]);
12539
12540 auto &out_type = get<SPIRType>(id: result_type);
12541 auto *in_type = length > 0 ? &expression_type(id: elems[0]) : nullptr;
12542
12543 // Only splat if we have vector constructors.
12544 // Arrays and structs must be initialized properly in full.
12545 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
12546
12547 bool splat = false;
12548 bool swizzle_splat = false;
12549
12550 if (in_type)
12551 {
12552 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
12553 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
12554
12555 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(type: *in_type))
12556 {
12557 // Cannot swizzle literal integers as a special case.
12558 swizzle_splat = false;
12559 }
12560 }
12561
12562 if (splat || swizzle_splat)
12563 {
12564 uint32_t input = elems[0];
12565 for (uint32_t i = 0; i < length; i++)
12566 {
12567 if (input != elems[i])
12568 {
12569 splat = false;
12570 swizzle_splat = false;
12571 }
12572 }
12573 }
12574
12575 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
12576 forward = false;
12577 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
12578 forward = false;
12579 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12580 forward = false;
12581
12582 string constructor_op;
12583 if (backend.use_initializer_list && composite)
12584 {
12585 bool needs_trailing_tracket = false;
12586 // Only use this path if we are building composites.
12587 // This path cannot be used for arithmetic.
12588 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
12589 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
12590 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
12591 {
12592 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
12593 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
12594 needs_trailing_tracket = true;
12595 }
12596 constructor_op += "{ ";
12597
12598 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12599 constructor_op += "0";
12600 else if (splat)
12601 constructor_op += to_unpacked_expression(id: elems[0]);
12602 else
12603 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
12604 constructor_op += " }";
12605 if (needs_trailing_tracket)
12606 constructor_op += ")";
12607 }
12608 else if (swizzle_splat && !composite)
12609 {
12610 constructor_op = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 1, expr: to_unpacked_expression(id: elems[0]));
12611 }
12612 else
12613 {
12614 constructor_op = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
12615 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12616 constructor_op += "0";
12617 else if (splat)
12618 constructor_op += to_unpacked_expression(id: elems[0]);
12619 else
12620 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
12621 constructor_op += ")";
12622 }
12623
12624 if (!constructor_op.empty())
12625 {
12626 emit_op(result_type, result_id: id, rhs: constructor_op, forwarding: forward);
12627 for (uint32_t i = 0; i < length; i++)
12628 inherit_expression_dependencies(dst: id, source: elems[i]);
12629 }
12630 break;
12631 }
12632
12633 case OpVectorInsertDynamic:
12634 {
12635 uint32_t result_type = ops[0];
12636 uint32_t id = ops[1];
12637 uint32_t vec = ops[2];
12638 uint32_t comp = ops[3];
12639 uint32_t index = ops[4];
12640
12641 flush_variable_declaration(id: vec);
12642
12643 // Make a copy, then use access chain to store the variable.
12644 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: vec), ts: ";");
12645 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12646 auto chain = access_chain_internal(base: id, indices: &index, count: 1, flags: 0, meta: nullptr);
12647 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: comp), ts: ";");
12648 break;
12649 }
12650
12651 case OpVectorExtractDynamic:
12652 {
12653 uint32_t result_type = ops[0];
12654 uint32_t id = ops[1];
12655
12656 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: 1, flags: 0, meta: nullptr);
12657 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
12658 inherit_expression_dependencies(dst: id, source: ops[2]);
12659 inherit_expression_dependencies(dst: id, source: ops[3]);
12660 break;
12661 }
12662
12663 case OpCompositeExtract:
12664 {
12665 uint32_t result_type = ops[0];
12666 uint32_t id = ops[1];
12667 length -= 3;
12668
12669 auto &type = get<SPIRType>(id: result_type);
12670
12671 // We can only split the expression here if our expression is forwarded as a temporary.
12672 bool allow_base_expression = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
12673
12674 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
12675 auto &composite_type = expression_type(id: ops[2]);
12676 bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
12677 if (composite_type_is_complex)
12678 allow_base_expression = false;
12679
12680 // Packed expressions or physical ID mapped expressions cannot be split up.
12681 if (has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypePacked) ||
12682 has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypeID))
12683 allow_base_expression = false;
12684
12685 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
12686 // into the base expression.
12687 if (is_non_native_row_major_matrix(id: ops[2]))
12688 allow_base_expression = false;
12689
12690 AccessChainMeta meta;
12691 SPIRExpression *e = nullptr;
12692 auto *c = maybe_get<SPIRConstant>(id: ops[2]);
12693
12694 if (c && !c->specialization && !composite_type_is_complex)
12695 {
12696 auto expr = to_extract_constant_composite_expression(result_type, c: *c, chain: ops + 3, length);
12697 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: true);
12698 }
12699 else if (allow_base_expression && should_forward(id: ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
12700 {
12701 // Only apply this optimization if result is scalar.
12702
12703 // We want to split the access chain from the base.
12704 // This is so we can later combine different CompositeExtract results
12705 // with CompositeConstruct without emitting code like
12706 //
12707 // vec3 temp = texture(...).xyz
12708 // vec4(temp.x, temp.y, temp.z, 1.0).
12709 //
12710 // when we actually wanted to emit this
12711 // vec4(texture(...).xyz, 1.0).
12712 //
12713 // Including the base will prevent this and would trigger multiple reads
12714 // from expression causing it to be forced to an actual temporary in GLSL.
12715 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
12716 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
12717 ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
12718 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
12719 inherit_expression_dependencies(dst: id, source: ops[2]);
12720 e->base_expression = ops[2];
12721
12722 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
12723 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
12724 }
12725 else
12726 {
12727 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
12728 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
12729 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]), suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
12730 inherit_expression_dependencies(dst: id, source: ops[2]);
12731 }
12732
12733 // Pass through some meta information to the loaded expression.
12734 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
12735 // instead of loading everything through an access chain.
12736 e->need_transpose = meta.need_transpose;
12737 if (meta.storage_is_packed)
12738 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12739 if (meta.storage_physical_type != 0)
12740 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
12741 if (meta.storage_is_invariant)
12742 set_decoration(id, decoration: DecorationInvariant);
12743
12744 break;
12745 }
12746
12747 case OpCompositeInsert:
12748 {
12749 uint32_t result_type = ops[0];
12750 uint32_t id = ops[1];
12751 uint32_t obj = ops[2];
12752 uint32_t composite = ops[3];
12753 const auto *elems = &ops[4];
12754 length -= 4;
12755
12756 flush_variable_declaration(id: composite);
12757
12758 // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
12759 // Speculate that the input composite is no longer used, and we can modify it in-place.
12760 // There are various scenarios where this is not possible to satisfy.
12761 bool can_modify_in_place = true;
12762 forced_temporaries.insert(x: id);
12763
12764 // Cannot safely RMW PHI variables since they have no way to be invalidated,
12765 // forcing temporaries is not going to help.
12766 // This is similar for Constant and Undef inputs.
12767 // The only safe thing to RMW is SPIRExpression.
12768 // If the expression has already been used (i.e. used in a continue block), we have to keep using
12769 // that loop variable, since we won't be able to override the expression after the fact.
12770 // If the composite is hoisted, we might never be able to properly invalidate any usage
12771 // of that composite in a subsequent loop iteration.
12772 if (invalid_expressions.count(x: composite) ||
12773 block_composite_insert_overwrite.count(x: composite) ||
12774 hoisted_temporaries.count(x: id) || hoisted_temporaries.count(x: composite) ||
12775 maybe_get<SPIRExpression>(id: composite) == nullptr)
12776 {
12777 can_modify_in_place = false;
12778 }
12779 else if (backend.requires_relaxed_precision_analysis &&
12780 has_decoration(id: composite, decoration: DecorationRelaxedPrecision) !=
12781 has_decoration(id, decoration: DecorationRelaxedPrecision) &&
12782 get<SPIRType>(id: result_type).basetype != SPIRType::Struct)
12783 {
12784 // Similarly, if precision does not match for input and output,
12785 // we cannot alias them. If we write a composite into a relaxed precision
12786 // ID, we might get a false truncation.
12787 can_modify_in_place = false;
12788 }
12789
12790 if (can_modify_in_place)
12791 {
12792 // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
12793 if (!forced_temporaries.count(x: composite))
12794 force_temporary_and_recompile(id: composite);
12795
12796 auto chain = access_chain_internal(base: composite, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12797 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
12798 set<SPIRExpression>(id, args: to_expression(id: composite), args&: result_type, args: true);
12799 invalid_expressions.insert(x: composite);
12800 composite_insert_overwritten.insert(x: composite);
12801 }
12802 else
12803 {
12804 if (maybe_get<SPIRUndef>(id: composite) != nullptr)
12805 {
12806 emit_uninitialized_temporary_expression(type: result_type, id);
12807 }
12808 else
12809 {
12810 // Make a copy, then use access chain to store the variable.
12811 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: composite), ts: ";");
12812 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12813 }
12814
12815 auto chain = access_chain_internal(base: id, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12816 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
12817 }
12818
12819 break;
12820 }
12821
12822 case OpCopyMemory:
12823 {
12824 uint32_t lhs = ops[0];
12825 uint32_t rhs = ops[1];
12826 if (lhs != rhs)
12827 {
12828 uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
12829 if (!tmp_id)
12830 tmp_id = ir.increase_bound_by(count: 1);
12831 uint32_t tmp_type_id = expression_type(id: rhs).parent_type;
12832
12833 EmbeddedInstruction fake_load, fake_store;
12834 fake_load.op = OpLoad;
12835 fake_load.length = 3;
12836 fake_load.ops.push_back(t: tmp_type_id);
12837 fake_load.ops.push_back(t: tmp_id);
12838 fake_load.ops.push_back(t: rhs);
12839
12840 fake_store.op = OpStore;
12841 fake_store.length = 2;
12842 fake_store.ops.push_back(t: lhs);
12843 fake_store.ops.push_back(t: tmp_id);
12844
12845 // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
12846 // Synthesize a fake Load and Store pair for CopyMemory.
12847 emit_instruction(instruction: fake_load);
12848 emit_instruction(instruction: fake_store);
12849 }
12850 break;
12851 }
12852
12853 case OpCopyLogical:
12854 {
12855 // This is used for copying object of different types, arrays and structs.
12856 // We need to unroll the copy, element-by-element.
12857 uint32_t result_type = ops[0];
12858 uint32_t id = ops[1];
12859 uint32_t rhs = ops[2];
12860
12861 emit_uninitialized_temporary_expression(type: result_type, id);
12862 emit_copy_logical_type(lhs_id: id, lhs_type_id: result_type, rhs_id: rhs, rhs_type_id: expression_type_id(id: rhs), chain: {});
12863 break;
12864 }
12865
12866 case OpCopyObject:
12867 {
12868 uint32_t result_type = ops[0];
12869 uint32_t id = ops[1];
12870 uint32_t rhs = ops[2];
12871 bool pointer = get<SPIRType>(id: result_type).pointer;
12872
12873 auto *chain = maybe_get<SPIRAccessChain>(id: rhs);
12874 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(id: rhs);
12875 if (chain)
12876 {
12877 // Cannot lower to a SPIRExpression, just copy the object.
12878 auto &e = set<SPIRAccessChain>(id, args&: *chain);
12879 e.self = id;
12880 }
12881 else if (imgsamp)
12882 {
12883 // Cannot lower to a SPIRExpression, just copy the object.
12884 // GLSL does not currently use this type and will never get here, but MSL does.
12885 // Handled here instead of CompilerMSL for better integration and general handling,
12886 // and in case GLSL or other subclasses require it in the future.
12887 auto &e = set<SPIRCombinedImageSampler>(id, args&: *imgsamp);
12888 e.self = id;
12889 }
12890 else if (expression_is_lvalue(id: rhs) && !pointer)
12891 {
12892 // Need a copy.
12893 // For pointer types, we copy the pointer itself.
12894 emit_op(result_type, result_id: id, rhs: to_unpacked_expression(id: rhs), forwarding: false);
12895 }
12896 else
12897 {
12898 // RHS expression is immutable, so just forward it.
12899 // Copying these things really make no sense, but
12900 // seems to be allowed anyways.
12901 auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: rhs), forwarding: true, suppress_usage_tracking: true);
12902 if (pointer)
12903 {
12904 auto *var = maybe_get_backing_variable(chain: rhs);
12905 e.loaded_from = var ? var->self : ID(0);
12906 }
12907
12908 // If we're copying an access chain, need to inherit the read expressions.
12909 auto *rhs_expr = maybe_get<SPIRExpression>(id: rhs);
12910 if (rhs_expr)
12911 {
12912 e.implied_read_expressions = rhs_expr->implied_read_expressions;
12913 e.expression_dependencies = rhs_expr->expression_dependencies;
12914 }
12915 }
12916 break;
12917 }
12918
12919 case OpVectorShuffle:
12920 {
12921 uint32_t result_type = ops[0];
12922 uint32_t id = ops[1];
12923 uint32_t vec0 = ops[2];
12924 uint32_t vec1 = ops[3];
12925 const auto *elems = &ops[4];
12926 length -= 4;
12927
12928 auto &type0 = expression_type(id: vec0);
12929
12930 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
12931 // or in our case, T(0).
12932 bool shuffle = false;
12933 for (uint32_t i = 0; i < length; i++)
12934 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
12935 shuffle = true;
12936
12937 // Cannot use swizzles with packed expressions, force shuffle path.
12938 if (!shuffle && has_extended_decoration(id: vec0, decoration: SPIRVCrossDecorationPhysicalTypePacked))
12939 shuffle = true;
12940
12941 string expr;
12942 bool should_fwd, trivial_forward;
12943
12944 if (shuffle)
12945 {
12946 should_fwd = should_forward(id: vec0) && should_forward(id: vec1);
12947 trivial_forward = should_suppress_usage_tracking(id: vec0) && should_suppress_usage_tracking(id: vec1);
12948
12949 // Constructor style and shuffling from two different vectors.
12950 SmallVector<string> args;
12951 for (uint32_t i = 0; i < length; i++)
12952 {
12953 if (elems[i] == 0xffffffffu)
12954 {
12955 // Use a constant 0 here.
12956 // We could use the first component or similar, but then we risk propagating
12957 // a value we might not need, and bog down codegen.
12958 SPIRConstant c;
12959 c.constant_type = type0.parent_type;
12960 assert(type0.parent_type != ID(0));
12961 args.push_back(t: constant_expression(c));
12962 }
12963 else if (elems[i] >= type0.vecsize)
12964 args.push_back(t: to_extract_component_expression(id: vec1, index: elems[i] - type0.vecsize));
12965 else
12966 args.push_back(t: to_extract_component_expression(id: vec0, index: elems[i]));
12967 }
12968 expr += join(ts: type_to_glsl_constructor(type: get<SPIRType>(id: result_type)), ts: "(", ts: merge(list: args), ts: ")");
12969 }
12970 else
12971 {
12972 should_fwd = should_forward(id: vec0);
12973 trivial_forward = should_suppress_usage_tracking(id: vec0);
12974
12975 // We only source from first vector, so can use swizzle.
12976 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
12977 expr += to_enclosed_unpacked_expression(id: vec0);
12978 expr += ".";
12979 for (uint32_t i = 0; i < length; i++)
12980 {
12981 assert(elems[i] != 0xffffffffu);
12982 expr += index_to_swizzle(index: elems[i]);
12983 }
12984
12985 if (backend.swizzle_is_function && length > 1)
12986 expr += "()";
12987 }
12988
12989 // A shuffle is trivial in that it doesn't actually *do* anything.
12990 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
12991
12992 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_fwd, suppress_usage_tracking: trivial_forward);
12993
12994 inherit_expression_dependencies(dst: id, source: vec0);
12995 if (vec0 != vec1)
12996 inherit_expression_dependencies(dst: id, source: vec1);
12997 break;
12998 }
12999
13000 // ALU
13001 case OpIsNan:
13002 if (!is_legacy())
13003 GLSL_UFOP(isnan);
13004 else
13005 {
13006 // Check if the number doesn't equal itself
13007 auto &type = get<SPIRType>(id: ops[0]);
13008 if (type.vecsize > 1)
13009 emit_binary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "notEqual");
13010 else
13011 emit_binary_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "!=");
13012 }
13013 break;
13014
13015 case OpIsInf:
13016 if (!is_legacy())
13017 GLSL_UFOP(isinf);
13018 else
13019 {
13020 // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0
13021 // This is more reliable than checking if product with zero is NaN
13022 uint32_t result_type = ops[0];
13023 uint32_t result_id = ops[1];
13024 uint32_t operand = ops[2];
13025
13026 auto &type = get<SPIRType>(id: result_type);
13027 std::string expr;
13028 if (type.vecsize > 1)
13029 {
13030 expr = type_to_glsl_constructor(type);
13031 expr += '(';
13032 for (uint32_t i = 0; i < type.vecsize; i++)
13033 {
13034 auto comp = to_extract_component_expression(id: operand, index: i);
13035 expr += join(ts&: comp, ts: " != 0.0 && 2.0 * ", ts&: comp, ts: " == ", ts&: comp);
13036
13037 if (i + 1 < type.vecsize)
13038 expr += ", ";
13039 }
13040 expr += ')';
13041 }
13042 else
13043 {
13044 // Register an extra read to force writing out a temporary
13045 auto oper = to_enclosed_expression(id: operand);
13046 track_expression_read(id: operand);
13047 expr += join(ts&: oper, ts: " != 0.0 && 2.0 * ", ts&: oper, ts: " == ", ts&: oper);
13048 }
13049 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand));
13050
13051 inherit_expression_dependencies(dst: result_id, source: operand);
13052 }
13053 break;
13054
13055 case OpSNegate:
13056 if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0])
13057 GLSL_UOP_CAST(-);
13058 else
13059 GLSL_UOP(-);
13060 break;
13061
13062 case OpFNegate:
13063 GLSL_UOP(-);
13064 break;
13065
13066 case OpIAdd:
13067 {
13068 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
13069 auto type = get<SPIRType>(id: ops[0]).basetype;
13070 GLSL_BOP_CAST(+, type);
13071 break;
13072 }
13073
13074 case OpFAdd:
13075 GLSL_BOP(+);
13076 break;
13077
13078 case OpISub:
13079 {
13080 auto type = get<SPIRType>(id: ops[0]).basetype;
13081 GLSL_BOP_CAST(-, type);
13082 break;
13083 }
13084
13085 case OpFSub:
13086 GLSL_BOP(-);
13087 break;
13088
13089 case OpIMul:
13090 {
13091 auto type = get<SPIRType>(id: ops[0]).basetype;
13092 GLSL_BOP_CAST(*, type);
13093 break;
13094 }
13095
13096 case OpVectorTimesMatrix:
13097 case OpMatrixTimesVector:
13098 {
13099 // If the matrix needs transpose, just flip the multiply order.
13100 auto *e = maybe_get<SPIRExpression>(id: ops[opcode == OpMatrixTimesVector ? 2 : 3]);
13101 if (e && e->need_transpose)
13102 {
13103 e->need_transpose = false;
13104 string expr;
13105
13106 if (opcode == OpMatrixTimesVector)
13107 expr = join(ts: to_enclosed_unpacked_expression(id: ops[3]), ts: " * ",
13108 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
13109 else
13110 expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
13111 ts: to_enclosed_unpacked_expression(id: ops[2]));
13112
13113 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13114 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13115 e->need_transpose = true;
13116 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13117 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13118 }
13119 else
13120 GLSL_BOP(*);
13121 break;
13122 }
13123
13124 case OpMatrixTimesMatrix:
13125 {
13126 auto *a = maybe_get<SPIRExpression>(id: ops[2]);
13127 auto *b = maybe_get<SPIRExpression>(id: ops[3]);
13128
13129 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
13130 // a^T * b^T = (b * a)^T.
13131 if (a && b && a->need_transpose && b->need_transpose)
13132 {
13133 a->need_transpose = false;
13134 b->need_transpose = false;
13135 auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
13136 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
13137 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13138 auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13139 e.need_transpose = true;
13140 a->need_transpose = true;
13141 b->need_transpose = true;
13142 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13143 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13144 }
13145 else
13146 GLSL_BOP(*);
13147
13148 break;
13149 }
13150
13151 case OpMatrixTimesScalar:
13152 {
13153 auto *a = maybe_get<SPIRExpression>(id: ops[2]);
13154
13155 // If the matrix need transpose, just mark the result as needing so.
13156 if (a && a->need_transpose)
13157 {
13158 a->need_transpose = false;
13159 auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])), ts: " * ",
13160 ts: to_enclosed_unpacked_expression(id: ops[3]));
13161 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13162 auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13163 e.need_transpose = true;
13164 a->need_transpose = true;
13165 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13166 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13167 }
13168 else
13169 GLSL_BOP(*);
13170 break;
13171 }
13172
13173 case OpFMul:
13174 case OpVectorTimesScalar:
13175 GLSL_BOP(*);
13176 break;
13177
13178 case OpOuterProduct:
13179 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
13180 {
13181 uint32_t result_type = ops[0];
13182 uint32_t id = ops[1];
13183 uint32_t a = ops[2];
13184 uint32_t b = ops[3];
13185
13186 auto &type = get<SPIRType>(id: result_type);
13187 string expr = type_to_glsl_constructor(type);
13188 expr += "(";
13189 for (uint32_t col = 0; col < type.columns; col++)
13190 {
13191 expr += to_enclosed_expression(id: a);
13192 expr += " * ";
13193 expr += to_extract_component_expression(id: b, index: col);
13194 if (col + 1 < type.columns)
13195 expr += ", ";
13196 }
13197 expr += ")";
13198 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: a) && should_forward(id: b));
13199 inherit_expression_dependencies(dst: id, source: a);
13200 inherit_expression_dependencies(dst: id, source: b);
13201 }
13202 else
13203 GLSL_BFOP(outerProduct);
13204 break;
13205
13206 case OpDot:
13207 GLSL_BFOP(dot);
13208 break;
13209
13210 case OpTranspose:
13211 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
13212 {
13213 // transpose() is not available, so instead, flip need_transpose,
13214 // which can later be turned into an emulated transpose op by
13215 // convert_row_major_matrix(), if necessary.
13216 uint32_t result_type = ops[0];
13217 uint32_t result_id = ops[1];
13218 uint32_t input = ops[2];
13219
13220 // Force need_transpose to false temporarily to prevent
13221 // to_expression() from doing the transpose.
13222 bool need_transpose = false;
13223 auto *input_e = maybe_get<SPIRExpression>(id: input);
13224 if (input_e)
13225 swap(a&: need_transpose, b&: input_e->need_transpose);
13226
13227 bool forward = should_forward(id: input);
13228 auto &e = emit_op(result_type, result_id, rhs: to_expression(id: input), forwarding: forward);
13229 e.need_transpose = !need_transpose;
13230
13231 // Restore the old need_transpose flag.
13232 if (input_e)
13233 input_e->need_transpose = need_transpose;
13234 }
13235 else
13236 GLSL_UFOP(transpose);
13237 break;
13238
13239 case OpSRem:
13240 {
13241 uint32_t result_type = ops[0];
13242 uint32_t result_id = ops[1];
13243 uint32_t op0 = ops[2];
13244 uint32_t op1 = ops[3];
13245
13246 auto &out_type = get<SPIRType>(id: result_type);
13247
13248 bool forward = should_forward(id: op0) && should_forward(id: op1);
13249 string cast_op0, cast_op1;
13250 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type&: int_type, op0, op1, skip_cast_if_equal_type: false);
13251
13252 // Needs special handling.
13253 auto expr = join(ts&: cast_op0, ts: " - ", ts&: cast_op1, ts: " * ", ts: "(", ts&: cast_op0, ts: " / ", ts&: cast_op1, ts: ")");
13254
13255 if (implicit_integer_promotion)
13256 {
13257 expr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: '(', ts&: expr, ts: ')');
13258 }
13259 else if (out_type.basetype != int_type)
13260 {
13261 expected_type.basetype = int_type;
13262 expr = join(ts: bitcast_glsl_op(out_type, in_type: expected_type), ts: '(', ts&: expr, ts: ')');
13263 }
13264
13265 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
13266 inherit_expression_dependencies(dst: result_id, source: op0);
13267 inherit_expression_dependencies(dst: result_id, source: op1);
13268 break;
13269 }
13270
13271 case OpSDiv:
13272 GLSL_BOP_CAST(/, int_type);
13273 break;
13274
13275 case OpUDiv:
13276 GLSL_BOP_CAST(/, uint_type);
13277 break;
13278
13279 case OpIAddCarry:
13280 case OpISubBorrow:
13281 {
13282 if (options.es && options.version < 310)
13283 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
13284 else if (!options.es && options.version < 400)
13285 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
13286
13287 uint32_t result_type = ops[0];
13288 uint32_t result_id = ops[1];
13289 uint32_t op0 = ops[2];
13290 uint32_t op1 = ops[3];
13291 auto &type = get<SPIRType>(id: result_type);
13292 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
13293 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
13294
13295 statement(ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ",
13296 ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
13297 break;
13298 }
13299
13300 case OpUMulExtended:
13301 case OpSMulExtended:
13302 {
13303 if (options.es && options.version < 310)
13304 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
13305 else if (!options.es && options.version < 400)
13306 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
13307
13308 uint32_t result_type = ops[0];
13309 uint32_t result_id = ops[1];
13310 uint32_t op0 = ops[2];
13311 uint32_t op1 = ops[3];
13312 auto &type = get<SPIRType>(id: result_type);
13313 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
13314 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
13315
13316 statement(ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ", ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".",
13317 ts: to_member_name(type, index: 1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: ");");
13318 break;
13319 }
13320
13321 case OpFDiv:
13322 GLSL_BOP(/);
13323 break;
13324
13325 case OpShiftRightLogical:
13326 GLSL_BOP_CAST(>>, uint_type);
13327 break;
13328
13329 case OpShiftRightArithmetic:
13330 GLSL_BOP_CAST(>>, int_type);
13331 break;
13332
13333 case OpShiftLeftLogical:
13334 {
13335 auto type = get<SPIRType>(id: ops[0]).basetype;
13336 GLSL_BOP_CAST(<<, type);
13337 break;
13338 }
13339
13340 case OpBitwiseOr:
13341 {
13342 auto type = get<SPIRType>(id: ops[0]).basetype;
13343 GLSL_BOP_CAST(|, type);
13344 break;
13345 }
13346
13347 case OpBitwiseXor:
13348 {
13349 auto type = get<SPIRType>(id: ops[0]).basetype;
13350 GLSL_BOP_CAST(^, type);
13351 break;
13352 }
13353
13354 case OpBitwiseAnd:
13355 {
13356 auto type = get<SPIRType>(id: ops[0]).basetype;
13357 GLSL_BOP_CAST(&, type);
13358 break;
13359 }
13360
13361 case OpNot:
13362 if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0])
13363 GLSL_UOP_CAST(~);
13364 else
13365 GLSL_UOP(~);
13366 break;
13367
13368 case OpUMod:
13369 GLSL_BOP_CAST(%, uint_type);
13370 break;
13371
13372 case OpSMod:
13373 GLSL_BOP_CAST(%, int_type);
13374 break;
13375
13376 case OpFMod:
13377 GLSL_BFOP(mod);
13378 break;
13379
13380 case OpFRem:
13381 {
13382 uint32_t result_type = ops[0];
13383 uint32_t result_id = ops[1];
13384 uint32_t op0 = ops[2];
13385 uint32_t op1 = ops[3];
13386
13387 // Needs special handling.
13388 bool forward = should_forward(id: op0) && should_forward(id: op1);
13389 std::string expr;
13390 if (!is_legacy())
13391 {
13392 expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "trunc(",
13393 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
13394 }
13395 else
13396 {
13397 // Legacy GLSL has no trunc, emulate by casting to int and back
13398 auto &op0_type = expression_type(id: op0);
13399 auto via_type = op0_type;
13400 via_type.basetype = SPIRType::Int;
13401 expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ",
13402 ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(",
13403 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: "))");
13404 }
13405
13406 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
13407 inherit_expression_dependencies(dst: result_id, source: op0);
13408 inherit_expression_dependencies(dst: result_id, source: op1);
13409 break;
13410 }
13411
13412 // Relational
13413 case OpAny:
13414 GLSL_UFOP(any);
13415 break;
13416
13417 case OpAll:
13418 GLSL_UFOP(all);
13419 break;
13420
13421 case OpSelect:
13422 emit_mix_op(result_type: ops[0], id: ops[1], left: ops[4], right: ops[3], lerp: ops[2]);
13423 break;
13424
13425 case OpLogicalOr:
13426 {
13427 // No vector variant in GLSL for logical OR.
13428 auto result_type = ops[0];
13429 auto id = ops[1];
13430 auto &type = get<SPIRType>(id: result_type);
13431
13432 if (type.vecsize > 1)
13433 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "||", negate: false, expected_type: SPIRType::Unknown);
13434 else
13435 GLSL_BOP(||);
13436 break;
13437 }
13438
13439 case OpLogicalAnd:
13440 {
13441 // No vector variant in GLSL for logical AND.
13442 auto result_type = ops[0];
13443 auto id = ops[1];
13444 auto &type = get<SPIRType>(id: result_type);
13445
13446 if (type.vecsize > 1)
13447 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "&&", negate: false, expected_type: SPIRType::Unknown);
13448 else
13449 GLSL_BOP(&&);
13450 break;
13451 }
13452
13453 case OpLogicalNot:
13454 {
13455 auto &type = get<SPIRType>(id: ops[0]);
13456 if (type.vecsize > 1)
13457 GLSL_UFOP(not );
13458 else
13459 GLSL_UOP(!);
13460 break;
13461 }
13462
13463 case OpIEqual:
13464 {
13465 if (expression_type(id: ops[2]).vecsize > 1)
13466 GLSL_BFOP_CAST(equal, int_type);
13467 else
13468 GLSL_BOP_CAST(==, int_type);
13469 break;
13470 }
13471
13472 case OpLogicalEqual:
13473 case OpFOrdEqual:
13474 {
13475 if (expression_type(id: ops[2]).vecsize > 1)
13476 GLSL_BFOP(equal);
13477 else
13478 GLSL_BOP(==);
13479 break;
13480 }
13481
13482 case OpINotEqual:
13483 {
13484 if (expression_type(id: ops[2]).vecsize > 1)
13485 GLSL_BFOP_CAST(notEqual, int_type);
13486 else
13487 GLSL_BOP_CAST(!=, int_type);
13488 break;
13489 }
13490
13491 case OpLogicalNotEqual:
13492 case OpFOrdNotEqual:
13493 case OpFUnordNotEqual:
13494 {
13495 // GLSL is fuzzy on what to do with ordered vs unordered not equal.
13496 // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
13497 // but this means we have no easy way of implementing ordered not equal.
13498 if (expression_type(id: ops[2]).vecsize > 1)
13499 GLSL_BFOP(notEqual);
13500 else
13501 GLSL_BOP(!=);
13502 break;
13503 }
13504
13505 case OpUGreaterThan:
13506 case OpSGreaterThan:
13507 {
13508 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
13509 if (expression_type(id: ops[2]).vecsize > 1)
13510 GLSL_BFOP_CAST(greaterThan, type);
13511 else
13512 GLSL_BOP_CAST(>, type);
13513 break;
13514 }
13515
13516 case OpFOrdGreaterThan:
13517 {
13518 if (expression_type(id: ops[2]).vecsize > 1)
13519 GLSL_BFOP(greaterThan);
13520 else
13521 GLSL_BOP(>);
13522 break;
13523 }
13524
13525 case OpUGreaterThanEqual:
13526 case OpSGreaterThanEqual:
13527 {
13528 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
13529 if (expression_type(id: ops[2]).vecsize > 1)
13530 GLSL_BFOP_CAST(greaterThanEqual, type);
13531 else
13532 GLSL_BOP_CAST(>=, type);
13533 break;
13534 }
13535
13536 case OpFOrdGreaterThanEqual:
13537 {
13538 if (expression_type(id: ops[2]).vecsize > 1)
13539 GLSL_BFOP(greaterThanEqual);
13540 else
13541 GLSL_BOP(>=);
13542 break;
13543 }
13544
13545 case OpULessThan:
13546 case OpSLessThan:
13547 {
13548 auto type = opcode == OpULessThan ? uint_type : int_type;
13549 if (expression_type(id: ops[2]).vecsize > 1)
13550 GLSL_BFOP_CAST(lessThan, type);
13551 else
13552 GLSL_BOP_CAST(<, type);
13553 break;
13554 }
13555
13556 case OpFOrdLessThan:
13557 {
13558 if (expression_type(id: ops[2]).vecsize > 1)
13559 GLSL_BFOP(lessThan);
13560 else
13561 GLSL_BOP(<);
13562 break;
13563 }
13564
13565 case OpULessThanEqual:
13566 case OpSLessThanEqual:
13567 {
13568 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
13569 if (expression_type(id: ops[2]).vecsize > 1)
13570 GLSL_BFOP_CAST(lessThanEqual, type);
13571 else
13572 GLSL_BOP_CAST(<=, type);
13573 break;
13574 }
13575
13576 case OpFOrdLessThanEqual:
13577 {
13578 if (expression_type(id: ops[2]).vecsize > 1)
13579 GLSL_BFOP(lessThanEqual);
13580 else
13581 GLSL_BOP(<=);
13582 break;
13583 }
13584
13585 // Conversion
13586 case OpSConvert:
13587 case OpConvertSToF:
13588 case OpUConvert:
13589 case OpConvertUToF:
13590 {
13591 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
13592 uint32_t result_type = ops[0];
13593 uint32_t id = ops[1];
13594
13595 auto &type = get<SPIRType>(id: result_type);
13596 auto &arg_type = expression_type(id: ops[2]);
13597 auto func = type_to_glsl_constructor(type);
13598
13599 if (arg_type.width < type.width || type_is_floating_point(type))
13600 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type, expected_result_type: type.basetype);
13601 else
13602 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
13603 break;
13604 }
13605
13606 case OpConvertFToU:
13607 case OpConvertFToS:
13608 {
13609 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
13610 uint32_t result_type = ops[0];
13611 uint32_t id = ops[1];
13612 auto &type = get<SPIRType>(id: result_type);
13613 auto expected_type = type;
13614 auto &float_type = expression_type(id: ops[2]);
13615 expected_type.basetype =
13616 opcode == OpConvertFToS ? to_signed_basetype(width: type.width) : to_unsigned_basetype(width: type.width);
13617
13618 auto func = type_to_glsl_constructor(type: expected_type);
13619 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type: float_type.basetype, expected_result_type: expected_type.basetype);
13620 break;
13621 }
13622
13623 case OpFConvert:
13624 {
13625 uint32_t result_type = ops[0];
13626 uint32_t id = ops[1];
13627
13628 auto func = type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
13629 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
13630 break;
13631 }
13632
13633 case OpBitcast:
13634 {
13635 uint32_t result_type = ops[0];
13636 uint32_t id = ops[1];
13637 uint32_t arg = ops[2];
13638
13639 if (!emit_complex_bitcast(result_type, id, op0: arg))
13640 {
13641 auto op = bitcast_glsl_op(out_type: get<SPIRType>(id: result_type), in_type: expression_type(id: arg));
13642 emit_unary_func_op(result_type, result_id: id, op0: arg, op: op.c_str());
13643 }
13644 break;
13645 }
13646
13647 case OpQuantizeToF16:
13648 {
13649 uint32_t result_type = ops[0];
13650 uint32_t id = ops[1];
13651 uint32_t arg = ops[2];
13652
13653 string op;
13654 auto &type = get<SPIRType>(id: result_type);
13655
13656 switch (type.vecsize)
13657 {
13658 case 1:
13659 op = join(ts: "unpackHalf2x16(packHalf2x16(vec2(", ts: to_expression(id: arg), ts: "))).x");
13660 break;
13661 case 2:
13662 op = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: "))");
13663 break;
13664 case 3:
13665 {
13666 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
13667 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zz)).x");
13668 op = join(ts: "vec3(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
13669 break;
13670 }
13671 case 4:
13672 {
13673 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
13674 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zw))");
13675 op = join(ts: "vec4(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
13676 break;
13677 }
13678 default:
13679 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
13680 }
13681
13682 emit_op(result_type, result_id: id, rhs: op, forwarding: should_forward(id: arg));
13683 inherit_expression_dependencies(dst: id, source: arg);
13684 break;
13685 }
13686
13687 // Derivatives
13688 case OpDPdx:
13689 GLSL_UFOP(dFdx);
13690 if (is_legacy_es())
13691 require_extension_internal(ext: "GL_OES_standard_derivatives");
13692 register_control_dependent_expression(expr: ops[1]);
13693 break;
13694
13695 case OpDPdy:
13696 GLSL_UFOP(dFdy);
13697 if (is_legacy_es())
13698 require_extension_internal(ext: "GL_OES_standard_derivatives");
13699 register_control_dependent_expression(expr: ops[1]);
13700 break;
13701
13702 case OpDPdxFine:
13703 GLSL_UFOP(dFdxFine);
13704 if (options.es)
13705 {
13706 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13707 }
13708 if (options.version < 450)
13709 require_extension_internal(ext: "GL_ARB_derivative_control");
13710 register_control_dependent_expression(expr: ops[1]);
13711 break;
13712
13713 case OpDPdyFine:
13714 GLSL_UFOP(dFdyFine);
13715 if (options.es)
13716 {
13717 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13718 }
13719 if (options.version < 450)
13720 require_extension_internal(ext: "GL_ARB_derivative_control");
13721 register_control_dependent_expression(expr: ops[1]);
13722 break;
13723
13724 case OpDPdxCoarse:
13725 if (options.es)
13726 {
13727 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13728 }
13729 GLSL_UFOP(dFdxCoarse);
13730 if (options.version < 450)
13731 require_extension_internal(ext: "GL_ARB_derivative_control");
13732 register_control_dependent_expression(expr: ops[1]);
13733 break;
13734
13735 case OpDPdyCoarse:
13736 GLSL_UFOP(dFdyCoarse);
13737 if (options.es)
13738 {
13739 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13740 }
13741 if (options.version < 450)
13742 require_extension_internal(ext: "GL_ARB_derivative_control");
13743 register_control_dependent_expression(expr: ops[1]);
13744 break;
13745
13746 case OpFwidth:
13747 GLSL_UFOP(fwidth);
13748 if (is_legacy_es())
13749 require_extension_internal(ext: "GL_OES_standard_derivatives");
13750 register_control_dependent_expression(expr: ops[1]);
13751 break;
13752
13753 case OpFwidthCoarse:
13754 GLSL_UFOP(fwidthCoarse);
13755 if (options.es)
13756 {
13757 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13758 }
13759 if (options.version < 450)
13760 require_extension_internal(ext: "GL_ARB_derivative_control");
13761 register_control_dependent_expression(expr: ops[1]);
13762 break;
13763
13764 case OpFwidthFine:
13765 GLSL_UFOP(fwidthFine);
13766 if (options.es)
13767 {
13768 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13769 }
13770 if (options.version < 450)
13771 require_extension_internal(ext: "GL_ARB_derivative_control");
13772 register_control_dependent_expression(expr: ops[1]);
13773 break;
13774
13775 // Bitfield
13776 case OpBitFieldInsert:
13777 {
13778 emit_bitfield_insert_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op3: ops[5], op: "bitfieldInsert", offset_count_type: SPIRType::Int);
13779 break;
13780 }
13781
13782 case OpBitFieldSExtract:
13783 {
13784 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: int_type, input_type0: int_type,
13785 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
13786 break;
13787 }
13788
13789 case OpBitFieldUExtract:
13790 {
13791 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: uint_type, input_type0: uint_type,
13792 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
13793 break;
13794 }
13795
13796 case OpBitReverse:
13797 // BitReverse does not have issues with sign since result type must match input type.
13798 GLSL_UFOP(bitfieldReverse);
13799 break;
13800
13801 case OpBitCount:
13802 {
13803 auto basetype = expression_type(id: ops[2]).basetype;
13804 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "bitCount", input_type: basetype, expected_result_type: int_type);
13805 break;
13806 }
13807
13808 // Atomics
13809 case OpAtomicExchange:
13810 {
13811 uint32_t result_type = ops[0];
13812 uint32_t id = ops[1];
13813 uint32_t ptr = ops[2];
13814 // Ignore semantics for now, probably only relevant to CL.
13815 uint32_t val = ops[5];
13816 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
13817
13818 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: val, op);
13819 break;
13820 }
13821
13822 case OpAtomicCompareExchange:
13823 {
13824 uint32_t result_type = ops[0];
13825 uint32_t id = ops[1];
13826 uint32_t ptr = ops[2];
13827 uint32_t val = ops[6];
13828 uint32_t comp = ops[7];
13829 const char *op = check_atomic_image(id: ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
13830
13831 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: comp, op2: val, op);
13832 break;
13833 }
13834
13835 case OpAtomicLoad:
13836 {
13837 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
13838 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
13839 auto &type = expression_type(id: ops[2]);
13840 forced_temporaries.insert(x: ops[1]);
13841 bool atomic_image = check_atomic_image(id: ops[2]);
13842 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
13843 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
13844 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
13845 const char *increment = unsigned_type ? "0u" : "0";
13846 emit_op(result_type: ops[0], result_id: ops[1],
13847 rhs: join(ts&: op, ts: "(",
13848 ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
13849 flush_all_atomic_capable_variables();
13850 break;
13851 }
13852
13853 case OpAtomicStore:
13854 {
13855 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
13856 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
13857 uint32_t ptr = ops[0];
13858 // Ignore semantics for now, probably only relevant to CL.
13859 uint32_t val = ops[3];
13860 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
13861 statement(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ptr), ts: ", ", ts: to_expression(id: val), ts: ");");
13862 flush_all_atomic_capable_variables();
13863 break;
13864 }
13865
13866 case OpAtomicIIncrement:
13867 case OpAtomicIDecrement:
13868 {
13869 forced_temporaries.insert(x: ops[1]);
13870 auto &type = expression_type(id: ops[2]);
13871 if (type.storage == StorageClassAtomicCounter)
13872 {
13873 // Legacy GLSL stuff, not sure if this is relevant to support.
13874 if (opcode == OpAtomicIIncrement)
13875 GLSL_UFOP(atomicCounterIncrement);
13876 else
13877 GLSL_UFOP(atomicCounterDecrement);
13878 }
13879 else
13880 {
13881 bool atomic_image = check_atomic_image(id: ops[2]);
13882 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
13883 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
13884 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
13885
13886 const char *increment = nullptr;
13887 if (opcode == OpAtomicIIncrement && unsigned_type)
13888 increment = "1u";
13889 else if (opcode == OpAtomicIIncrement)
13890 increment = "1";
13891 else if (unsigned_type)
13892 increment = "uint(-1)";
13893 else
13894 increment = "-1";
13895
13896 emit_op(result_type: ops[0], result_id: ops[1],
13897 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
13898 }
13899
13900 flush_all_atomic_capable_variables();
13901 break;
13902 }
13903
13904 case OpAtomicIAdd:
13905 case OpAtomicFAddEXT:
13906 {
13907 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
13908 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13909 break;
13910 }
13911
13912 case OpAtomicISub:
13913 {
13914 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
13915 forced_temporaries.insert(x: ops[1]);
13916 auto expr = join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", -", ts: to_enclosed_expression(id: ops[5]), ts: ")");
13917 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: ops[2]) && should_forward(id: ops[5]));
13918 flush_all_atomic_capable_variables();
13919 break;
13920 }
13921
13922 case OpAtomicSMin:
13923 case OpAtomicUMin:
13924 {
13925 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMin" : "atomicMin";
13926 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13927 break;
13928 }
13929
13930 case OpAtomicSMax:
13931 case OpAtomicUMax:
13932 {
13933 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMax" : "atomicMax";
13934 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13935 break;
13936 }
13937
13938 case OpAtomicAnd:
13939 {
13940 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAnd" : "atomicAnd";
13941 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13942 break;
13943 }
13944
13945 case OpAtomicOr:
13946 {
13947 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicOr" : "atomicOr";
13948 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13949 break;
13950 }
13951
13952 case OpAtomicXor:
13953 {
13954 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicXor" : "atomicXor";
13955 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13956 break;
13957 }
13958
13959 // Geometry shaders
13960 case OpEmitVertex:
13961 statement(ts: "EmitVertex();");
13962 break;
13963
13964 case OpEndPrimitive:
13965 statement(ts: "EndPrimitive();");
13966 break;
13967
13968 case OpEmitStreamVertex:
13969 {
13970 if (options.es)
13971 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
13972 else if (!options.es && options.version < 400)
13973 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
13974
13975 auto stream_expr = to_expression(id: ops[0]);
13976 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
13977 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
13978 statement(ts: "EmitStreamVertex(", ts&: stream_expr, ts: ");");
13979 break;
13980 }
13981
13982 case OpEndStreamPrimitive:
13983 {
13984 if (options.es)
13985 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
13986 else if (!options.es && options.version < 400)
13987 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
13988
13989 auto stream_expr = to_expression(id: ops[0]);
13990 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
13991 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
13992 statement(ts: "EndStreamPrimitive(", ts&: stream_expr, ts: ");");
13993 break;
13994 }
13995
13996 // Textures
13997 case OpImageSampleExplicitLod:
13998 case OpImageSampleProjExplicitLod:
13999 case OpImageSampleDrefExplicitLod:
14000 case OpImageSampleProjDrefExplicitLod:
14001 case OpImageSampleImplicitLod:
14002 case OpImageSampleProjImplicitLod:
14003 case OpImageSampleDrefImplicitLod:
14004 case OpImageSampleProjDrefImplicitLod:
14005 case OpImageFetch:
14006 case OpImageGather:
14007 case OpImageDrefGather:
14008 // Gets a bit hairy, so move this to a separate instruction.
14009 emit_texture_op(i: instruction, sparse: false);
14010 break;
14011
14012 case OpImageSparseSampleExplicitLod:
14013 case OpImageSparseSampleProjExplicitLod:
14014 case OpImageSparseSampleDrefExplicitLod:
14015 case OpImageSparseSampleProjDrefExplicitLod:
14016 case OpImageSparseSampleImplicitLod:
14017 case OpImageSparseSampleProjImplicitLod:
14018 case OpImageSparseSampleDrefImplicitLod:
14019 case OpImageSparseSampleProjDrefImplicitLod:
14020 case OpImageSparseFetch:
14021 case OpImageSparseGather:
14022 case OpImageSparseDrefGather:
14023 // Gets a bit hairy, so move this to a separate instruction.
14024 emit_texture_op(i: instruction, sparse: true);
14025 break;
14026
14027 case OpImageSparseTexelsResident:
14028 if (options.es)
14029 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
14030 require_extension_internal(ext: "GL_ARB_sparse_texture2");
14031 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "sparseTexelsResidentARB", input_type: int_type, expected_result_type: SPIRType::Boolean);
14032 break;
14033
14034 case OpImage:
14035 {
14036 uint32_t result_type = ops[0];
14037 uint32_t id = ops[1];
14038
14039 // Suppress usage tracking.
14040 auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[2]), forwarding: true, suppress_usage_tracking: true);
14041
14042 // When using the image, we need to know which variable it is actually loaded from.
14043 auto *var = maybe_get_backing_variable(chain: ops[2]);
14044 e.loaded_from = var ? var->self : ID(0);
14045 break;
14046 }
14047
14048 case OpImageQueryLod:
14049 {
14050 const char *op = nullptr;
14051 if (!options.es && options.version < 400)
14052 {
14053 require_extension_internal(ext: "GL_ARB_texture_query_lod");
14054 // For some reason, the ARB spec is all-caps.
14055 op = "textureQueryLOD";
14056 }
14057 else if (options.es)
14058 {
14059 if (options.version < 300)
14060 SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES");
14061 require_extension_internal(ext: "GL_EXT_texture_query_lod");
14062 op = "textureQueryLOD";
14063 }
14064 else
14065 op = "textureQueryLod";
14066
14067 auto sampler_expr = to_expression(id: ops[2]);
14068 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
14069 {
14070 if (maybe_get_backing_variable(chain: ops[2]))
14071 convert_non_uniform_expression(expr&: sampler_expr, ptr_id: ops[2]);
14072 else if (*backend.nonuniform_qualifier != '\0')
14073 sampler_expr = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: sampler_expr, ts: ")");
14074 }
14075
14076 bool forward = should_forward(id: ops[3]);
14077 emit_op(result_type: ops[0], result_id: ops[1],
14078 rhs: join(ts&: op, ts: "(", ts&: sampler_expr, ts: ", ", ts: to_unpacked_expression(id: ops[3]), ts: ")"),
14079 forwarding: forward);
14080 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
14081 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
14082 register_control_dependent_expression(expr: ops[1]);
14083 break;
14084 }
14085
14086 case OpImageQueryLevels:
14087 {
14088 uint32_t result_type = ops[0];
14089 uint32_t id = ops[1];
14090
14091 if (!options.es && options.version < 430)
14092 require_extension_internal(ext: "GL_ARB_texture_query_levels");
14093 if (options.es)
14094 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
14095
14096 auto expr = join(ts: "textureQueryLevels(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14097 auto &restype = get<SPIRType>(id: ops[0]);
14098 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14099 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14100 break;
14101 }
14102
14103 case OpImageQuerySamples:
14104 {
14105 auto &type = expression_type(id: ops[2]);
14106 uint32_t result_type = ops[0];
14107 uint32_t id = ops[1];
14108
14109 if (options.es)
14110 SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile.");
14111 else if (options.version < 450)
14112 require_extension_internal(ext: "GL_ARB_texture_query_samples");
14113
14114 string expr;
14115 if (type.image.sampled == 2)
14116 expr = join(ts: "imageSamples(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14117 else
14118 expr = join(ts: "textureSamples(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14119
14120 auto &restype = get<SPIRType>(id: ops[0]);
14121 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14122 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14123 break;
14124 }
14125
14126 case OpSampledImage:
14127 {
14128 uint32_t result_type = ops[0];
14129 uint32_t id = ops[1];
14130 emit_sampled_image_op(result_type, result_id: id, image_id: ops[2], samp_id: ops[3]);
14131 inherit_expression_dependencies(dst: id, source: ops[2]);
14132 inherit_expression_dependencies(dst: id, source: ops[3]);
14133 break;
14134 }
14135
14136 case OpImageQuerySizeLod:
14137 {
14138 uint32_t result_type = ops[0];
14139 uint32_t id = ops[1];
14140 uint32_t img = ops[2];
14141 auto &type = expression_type(id: img);
14142 auto &imgtype = get<SPIRType>(id: type.self);
14143
14144 std::string fname = "textureSize";
14145 if (is_legacy_desktop())
14146 {
14147 fname = legacy_tex_op(op: fname, imgtype, tex: img);
14148 }
14149 else if (is_legacy_es())
14150 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
14151
14152 auto expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: img), ts: ", ",
14153 ts: bitcast_expression(target_type: SPIRType::Int, arg: ops[3]), ts: ")");
14154
14155 // ES needs to emulate 1D images as 2D.
14156 if (type.image.dim == Dim1D && options.es)
14157 expr = join(ts&: expr, ts: ".x");
14158
14159 auto &restype = get<SPIRType>(id: ops[0]);
14160 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14161 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14162 break;
14163 }
14164
14165 // Image load/store
14166 case OpImageRead:
14167 case OpImageSparseRead:
14168 {
14169 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
14170 // not adding the proper qualifiers.
14171 // If it turns out we need to read the image after all, remove the qualifier and recompile.
14172 auto *var = maybe_get_backing_variable(chain: ops[2]);
14173 if (var)
14174 {
14175 auto &flags = get_decoration_bitset(id: var->self);
14176 if (flags.get(bit: DecorationNonReadable))
14177 {
14178 unset_decoration(id: var->self, decoration: DecorationNonReadable);
14179 force_recompile();
14180 }
14181 }
14182
14183 uint32_t result_type = ops[0];
14184 uint32_t id = ops[1];
14185
14186 bool pure;
14187 string imgexpr;
14188 auto &type = expression_type(id: ops[2]);
14189
14190 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
14191 {
14192 if (type.image.ms)
14193 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
14194
14195 auto itr =
14196 find_if(first: begin(cont&: pls_inputs), last: end(cont&: pls_inputs), pred: [var](const PlsRemap &pls) { return pls.id == var->self; });
14197
14198 if (itr == end(cont&: pls_inputs))
14199 {
14200 // For non-PLS inputs, we rely on subpass type remapping information to get it right
14201 // since ImageRead always returns 4-component vectors and the backing type is opaque.
14202 if (!var->remapped_components)
14203 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
14204 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: var->remapped_components, expr: to_expression(id: ops[2]));
14205 }
14206 else
14207 {
14208 // PLS input could have different number of components than what the SPIR expects, swizzle to
14209 // the appropriate vector size.
14210 uint32_t components = pls_format_to_components(format: itr->format);
14211 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: components, expr: to_expression(id: ops[2]));
14212 }
14213 pure = true;
14214 }
14215 else if (type.image.dim == DimSubpassData)
14216 {
14217 if (var && subpass_input_is_framebuffer_fetch(id: var->self))
14218 {
14219 imgexpr = to_expression(id: var->self);
14220 }
14221 else if (options.vulkan_semantics)
14222 {
14223 // With Vulkan semantics, use the proper Vulkan GLSL construct.
14224 if (type.image.ms)
14225 {
14226 uint32_t operands = ops[4];
14227 if (operands != ImageOperandsSampleMask || length != 6)
14228 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14229 "operand mask was used.");
14230
14231 uint32_t samples = ops[5];
14232 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts: to_expression(id: samples), ts: ")");
14233 }
14234 else
14235 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14236 }
14237 else
14238 {
14239 if (type.image.ms)
14240 {
14241 uint32_t operands = ops[4];
14242 if (operands != ImageOperandsSampleMask || length != 6)
14243 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14244 "operand mask was used.");
14245
14246 uint32_t samples = ops[5];
14247 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), ",
14248 ts: to_expression(id: samples), ts: ")");
14249 }
14250 else
14251 {
14252 // Implement subpass loads via texture barrier style sampling.
14253 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), 0)");
14254 }
14255 }
14256 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
14257 pure = true;
14258 }
14259 else
14260 {
14261 bool sparse = opcode == OpImageSparseRead;
14262 uint32_t sparse_code_id = 0;
14263 uint32_t sparse_texel_id = 0;
14264 if (sparse)
14265 emit_sparse_feedback_temporaries(result_type_id: ops[0], id: ops[1], feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
14266
14267 // imageLoad only accepts int coords, not uint.
14268 auto coord_expr = to_expression(id: ops[3]);
14269 auto target_coord_type = expression_type(id: ops[3]);
14270 target_coord_type.basetype = SPIRType::Int;
14271 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
14272
14273 // ES needs to emulate 1D images as 2D.
14274 if (type.image.dim == Dim1D && options.es)
14275 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
14276
14277 // Plain image load/store.
14278 if (sparse)
14279 {
14280 if (type.image.ms)
14281 {
14282 uint32_t operands = ops[4];
14283 if (operands != ImageOperandsSampleMask || length != 6)
14284 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14285 "operand mask was used.");
14286
14287 uint32_t samples = ops[5];
14288 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
14289 ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
14290 }
14291 else
14292 {
14293 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
14294 ts&: coord_expr, ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
14295 }
14296 imgexpr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ",
14297 ts: to_expression(id: sparse_texel_id), ts: ")");
14298 }
14299 else
14300 {
14301 if (type.image.ms)
14302 {
14303 uint32_t operands = ops[4];
14304 if (operands != ImageOperandsSampleMask || length != 6)
14305 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14306 "operand mask was used.");
14307
14308 uint32_t samples = ops[5];
14309 imgexpr =
14310 join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ")");
14311 }
14312 else
14313 imgexpr = join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ")");
14314 }
14315
14316 if (!sparse)
14317 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
14318 pure = false;
14319 }
14320
14321 if (var)
14322 {
14323 bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
14324 auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: forward);
14325
14326 // We only need to track dependencies if we're reading from image load/store.
14327 if (!pure)
14328 {
14329 e.loaded_from = var->self;
14330 if (forward)
14331 var->dependees.push_back(t: id);
14332 }
14333 }
14334 else
14335 emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: false);
14336
14337 inherit_expression_dependencies(dst: id, source: ops[2]);
14338 if (type.image.ms)
14339 inherit_expression_dependencies(dst: id, source: ops[5]);
14340 break;
14341 }
14342
14343 case OpImageTexelPointer:
14344 {
14345 uint32_t result_type = ops[0];
14346 uint32_t id = ops[1];
14347
14348 auto coord_expr = to_expression(id: ops[3]);
14349 auto target_coord_type = expression_type(id: ops[3]);
14350 target_coord_type.basetype = SPIRType::Int;
14351 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
14352
14353 auto expr = join(ts: to_expression(id: ops[2]), ts: ", ", ts&: coord_expr);
14354 auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
14355
14356 // When using the pointer, we need to know which variable it is actually loaded from.
14357 auto *var = maybe_get_backing_variable(chain: ops[2]);
14358 e.loaded_from = var ? var->self : ID(0);
14359 inherit_expression_dependencies(dst: id, source: ops[3]);
14360 break;
14361 }
14362
14363 case OpImageWrite:
14364 {
14365 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
14366 // not adding the proper qualifiers.
14367 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
14368 auto *var = maybe_get_backing_variable(chain: ops[0]);
14369 if (var)
14370 {
14371 if (has_decoration(id: var->self, decoration: DecorationNonWritable))
14372 {
14373 unset_decoration(id: var->self, decoration: DecorationNonWritable);
14374 force_recompile();
14375 }
14376 }
14377
14378 auto &type = expression_type(id: ops[0]);
14379 auto &value_type = expression_type(id: ops[2]);
14380 auto store_type = value_type;
14381 store_type.vecsize = 4;
14382
14383 // imageStore only accepts int coords, not uint.
14384 auto coord_expr = to_expression(id: ops[1]);
14385 auto target_coord_type = expression_type(id: ops[1]);
14386 target_coord_type.basetype = SPIRType::Int;
14387 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[1]).basetype, expr: coord_expr);
14388
14389 // ES needs to emulate 1D images as 2D.
14390 if (type.image.dim == Dim1D && options.es)
14391 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
14392
14393 if (type.image.ms)
14394 {
14395 uint32_t operands = ops[3];
14396 if (operands != ImageOperandsSampleMask || length != 5)
14397 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
14398 uint32_t samples = ops[4];
14399 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ",
14400 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
14401 }
14402 else
14403 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ",
14404 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
14405
14406 if (var && variable_storage_is_aliased(var: *var))
14407 flush_all_aliased_variables();
14408 break;
14409 }
14410
14411 case OpImageQuerySize:
14412 {
14413 auto &type = expression_type(id: ops[2]);
14414 uint32_t result_type = ops[0];
14415 uint32_t id = ops[1];
14416
14417 if (type.basetype == SPIRType::Image)
14418 {
14419 string expr;
14420 if (type.image.sampled == 2)
14421 {
14422 if (!options.es && options.version < 430)
14423 require_extension_internal(ext: "GL_ARB_shader_image_size");
14424 else if (options.es && options.version < 310)
14425 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
14426
14427 // The size of an image is always constant.
14428 expr = join(ts: "imageSize(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14429 }
14430 else
14431 {
14432 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
14433 std::string fname = "textureSize";
14434 if (is_legacy())
14435 {
14436 auto &imgtype = get<SPIRType>(id: type.self);
14437 fname = legacy_tex_op(op: fname, imgtype, tex: ops[2]);
14438 }
14439 expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14440 }
14441
14442 auto &restype = get<SPIRType>(id: ops[0]);
14443 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14444 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14445 }
14446 else
14447 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
14448 break;
14449 }
14450
14451 case OpImageSampleWeightedQCOM:
14452 case OpImageBoxFilterQCOM:
14453 case OpImageBlockMatchSSDQCOM:
14454 case OpImageBlockMatchSADQCOM:
14455 {
14456 require_extension_internal(ext: "GL_QCOM_image_processing");
14457 uint32_t result_type_id = ops[0];
14458 uint32_t id = ops[1];
14459 string expr;
14460 switch (opcode)
14461 {
14462 case OpImageSampleWeightedQCOM:
14463 expr = "textureWeightedQCOM";
14464 break;
14465 case OpImageBoxFilterQCOM:
14466 expr = "textureBoxFilterQCOM";
14467 break;
14468 case OpImageBlockMatchSSDQCOM:
14469 expr = "textureBlockMatchSSDQCOM";
14470 break;
14471 case OpImageBlockMatchSADQCOM:
14472 expr = "textureBlockMatchSADQCOM";
14473 break;
14474 default:
14475 SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
14476 }
14477 expr += "(";
14478
14479 bool forward = false;
14480 expr += to_expression(id: ops[2]);
14481 expr += ", " + to_expression(id: ops[3]);
14482
14483 switch (opcode)
14484 {
14485 case OpImageSampleWeightedQCOM:
14486 expr += ", " + to_non_uniform_aware_expression(id: ops[4]);
14487 break;
14488 case OpImageBoxFilterQCOM:
14489 expr += ", " + to_expression(id: ops[4]);
14490 break;
14491 case OpImageBlockMatchSSDQCOM:
14492 case OpImageBlockMatchSADQCOM:
14493 expr += ", " + to_non_uniform_aware_expression(id: ops[4]);
14494 expr += ", " + to_expression(id: ops[5]);
14495 expr += ", " + to_expression(id: ops[6]);
14496 break;
14497 default:
14498 SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
14499 }
14500
14501 expr += ")";
14502 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
14503
14504 inherit_expression_dependencies(dst: id, source: ops[3]);
14505 if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM)
14506 inherit_expression_dependencies(dst: id, source: ops[5]);
14507
14508 break;
14509 }
14510
14511 case OpImageBlockMatchWindowSSDQCOM:
14512 case OpImageBlockMatchWindowSADQCOM:
14513 case OpImageBlockMatchGatherSSDQCOM:
14514 case OpImageBlockMatchGatherSADQCOM:
14515 {
14516 require_extension_internal(ext: "GL_QCOM_image_processing2");
14517 uint32_t result_type_id = ops[0];
14518 uint32_t id = ops[1];
14519 string expr;
14520 switch (opcode)
14521 {
14522 case OpImageBlockMatchWindowSSDQCOM:
14523 expr = "textureBlockMatchWindowSSDQCOM";
14524 break;
14525 case OpImageBlockMatchWindowSADQCOM:
14526 expr = "textureBlockMatchWindowSADQCOM";
14527 break;
14528 case OpImageBlockMatchGatherSSDQCOM:
14529 expr = "textureBlockMatchGatherSSDQCOM";
14530 break;
14531 case OpImageBlockMatchGatherSADQCOM:
14532 expr = "textureBlockMatchGatherSADQCOM";
14533 break;
14534 default:
14535 SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing2.");
14536 }
14537 expr += "(";
14538
14539 bool forward = false;
14540 expr += to_expression(id: ops[2]);
14541 expr += ", " + to_expression(id: ops[3]);
14542
14543 expr += ", " + to_non_uniform_aware_expression(id: ops[4]);
14544 expr += ", " + to_expression(id: ops[5]);
14545 expr += ", " + to_expression(id: ops[6]);
14546
14547 expr += ")";
14548 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
14549
14550 inherit_expression_dependencies(dst: id, source: ops[3]);
14551 inherit_expression_dependencies(dst: id, source: ops[5]);
14552 break;
14553 }
14554
14555 // Compute
14556 case OpControlBarrier:
14557 case OpMemoryBarrier:
14558 {
14559 uint32_t execution_scope = 0;
14560 uint32_t memory;
14561 uint32_t semantics;
14562
14563 if (opcode == OpMemoryBarrier)
14564 {
14565 memory = evaluate_constant_u32(id: ops[0]);
14566 semantics = evaluate_constant_u32(id: ops[1]);
14567 }
14568 else
14569 {
14570 execution_scope = evaluate_constant_u32(id: ops[0]);
14571 memory = evaluate_constant_u32(id: ops[1]);
14572 semantics = evaluate_constant_u32(id: ops[2]);
14573 }
14574
14575 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
14576 {
14577 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
14578 if (opcode != OpControlBarrier)
14579 {
14580 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMemBarrier);
14581 }
14582 else
14583 {
14584 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBarrier);
14585 }
14586 }
14587
14588 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
14589 {
14590 // Control shaders only have barriers, and it implies memory barriers.
14591 if (opcode == OpControlBarrier)
14592 statement(ts: "barrier();");
14593 break;
14594 }
14595
14596 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
14597 semantics = mask_relevant_memory_semantics(semantics);
14598
14599 if (opcode == OpMemoryBarrier)
14600 {
14601 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
14602 // does what we need, so we avoid redundant barriers.
14603 const Instruction *next = get_next_instruction_in_block(instr: instruction);
14604 if (next && next->op == OpControlBarrier)
14605 {
14606 auto *next_ops = stream(instr: *next);
14607 uint32_t next_memory = evaluate_constant_u32(id: next_ops[1]);
14608 uint32_t next_semantics = evaluate_constant_u32(id: next_ops[2]);
14609 next_semantics = mask_relevant_memory_semantics(semantics: next_semantics);
14610
14611 bool memory_scope_covered = false;
14612 if (next_memory == memory)
14613 memory_scope_covered = true;
14614 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
14615 {
14616 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
14617 // scope does not have to match.
14618 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
14619 (memory == ScopeDevice || memory == ScopeWorkgroup))
14620 {
14621 memory_scope_covered = true;
14622 }
14623 }
14624 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
14625 {
14626 // The control barrier has device scope, but the memory barrier just has workgroup scope.
14627 memory_scope_covered = true;
14628 }
14629
14630 // If we have the same memory scope, and all memory types are covered, we're good.
14631 if (memory_scope_covered && (semantics & next_semantics) == semantics)
14632 break;
14633 }
14634 }
14635
14636 // We are synchronizing some memory or syncing execution,
14637 // so we cannot forward any loads beyond the memory barrier.
14638 if (semantics || opcode == OpControlBarrier)
14639 {
14640 assert(current_emitting_block);
14641 flush_control_dependent_expressions(block: current_emitting_block->self);
14642 flush_all_active_variables();
14643 }
14644
14645 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
14646 {
14647 if (semantics == MemorySemanticsWorkgroupMemoryMask)
14648 {
14649 // OpControlBarrier implies a memory barrier for shared memory as well.
14650 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
14651 if (!implies_shared_barrier)
14652 statement(ts: "memoryBarrierShared();");
14653 }
14654 else if (semantics != 0)
14655 statement(ts: "groupMemoryBarrier();");
14656 }
14657 else if (memory == ScopeSubgroup)
14658 {
14659 const uint32_t all_barriers =
14660 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
14661
14662 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
14663 {
14664 // These are not relevant for GLSL, but assume it means memoryBarrier().
14665 // memoryBarrier() does everything, so no need to test anything else.
14666 statement(ts: "subgroupMemoryBarrier();");
14667 }
14668 else if ((semantics & all_barriers) == all_barriers)
14669 {
14670 // Short-hand instead of emitting 3 barriers.
14671 statement(ts: "subgroupMemoryBarrier();");
14672 }
14673 else
14674 {
14675 // Pick out individual barriers.
14676 if (semantics & MemorySemanticsWorkgroupMemoryMask)
14677 statement(ts: "subgroupMemoryBarrierShared();");
14678 if (semantics & MemorySemanticsUniformMemoryMask)
14679 statement(ts: "subgroupMemoryBarrierBuffer();");
14680 if (semantics & MemorySemanticsImageMemoryMask)
14681 statement(ts: "subgroupMemoryBarrierImage();");
14682 }
14683 }
14684 else
14685 {
14686 const uint32_t all_barriers =
14687 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
14688
14689 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
14690 {
14691 // These are not relevant for GLSL, but assume it means memoryBarrier().
14692 // memoryBarrier() does everything, so no need to test anything else.
14693 statement(ts: "memoryBarrier();");
14694 }
14695 else if ((semantics & all_barriers) == all_barriers)
14696 {
14697 // Short-hand instead of emitting 4 barriers.
14698 statement(ts: "memoryBarrier();");
14699 }
14700 else
14701 {
14702 // Pick out individual barriers.
14703 if (semantics & MemorySemanticsWorkgroupMemoryMask)
14704 statement(ts: "memoryBarrierShared();");
14705 if (semantics & MemorySemanticsUniformMemoryMask)
14706 statement(ts: "memoryBarrierBuffer();");
14707 if (semantics & MemorySemanticsImageMemoryMask)
14708 statement(ts: "memoryBarrierImage();");
14709 }
14710 }
14711
14712 if (opcode == OpControlBarrier)
14713 {
14714 if (execution_scope == ScopeSubgroup)
14715 statement(ts: "subgroupBarrier();");
14716 else
14717 statement(ts: "barrier();");
14718 }
14719 break;
14720 }
14721
14722 case OpExtInst:
14723 {
14724 uint32_t extension_set = ops[2];
14725 auto ext = get<SPIRExtension>(id: extension_set).ext;
14726
14727 if (ext == SPIRExtension::GLSL)
14728 {
14729 emit_glsl_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length: length - 4);
14730 }
14731 else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
14732 {
14733 emit_spv_amd_shader_ballot_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14734 }
14735 else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
14736 {
14737 emit_spv_amd_shader_explicit_vertex_parameter_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14738 }
14739 else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
14740 {
14741 emit_spv_amd_shader_trinary_minmax_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14742 }
14743 else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
14744 {
14745 emit_spv_amd_gcn_shader_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14746 }
14747 else if (ext == SPIRExtension::SPV_debug_info ||
14748 ext == SPIRExtension::NonSemanticShaderDebugInfo ||
14749 ext == SPIRExtension::NonSemanticGeneric)
14750 {
14751 break; // Ignore SPIR-V debug information extended instructions.
14752 }
14753 else if (ext == SPIRExtension::NonSemanticDebugPrintf)
14754 {
14755 // Operation 1 is printf.
14756 if (ops[3] == 1)
14757 {
14758 if (!options.vulkan_semantics)
14759 SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
14760 require_extension_internal(ext: "GL_EXT_debug_printf");
14761 auto &format_string = get<SPIRString>(id: ops[4]).str;
14762 string expr = join(ts: "debugPrintfEXT(\"", ts&: format_string, ts: "\"");
14763 for (uint32_t i = 5; i < length; i++)
14764 {
14765 expr += ", ";
14766 expr += to_expression(id: ops[i]);
14767 }
14768 statement(ts&: expr, ts: ");");
14769 }
14770 }
14771 else
14772 {
14773 statement(ts: "// unimplemented ext op ", ts: instruction.op);
14774 break;
14775 }
14776
14777 break;
14778 }
14779
14780 // Legacy sub-group stuff ...
14781 case OpSubgroupBallotKHR:
14782 {
14783 uint32_t result_type = ops[0];
14784 uint32_t id = ops[1];
14785 string expr;
14786 expr = join(ts: "uvec4(unpackUint2x32(ballotARB(" + to_expression(id: ops[2]) + ")), 0u, 0u)");
14787 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
14788
14789 require_extension_internal(ext: "GL_ARB_shader_ballot");
14790 inherit_expression_dependencies(dst: id, source: ops[2]);
14791 register_control_dependent_expression(expr: ops[1]);
14792 break;
14793 }
14794
14795 case OpSubgroupFirstInvocationKHR:
14796 {
14797 uint32_t result_type = ops[0];
14798 uint32_t id = ops[1];
14799 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "readFirstInvocationARB");
14800
14801 require_extension_internal(ext: "GL_ARB_shader_ballot");
14802 register_control_dependent_expression(expr: ops[1]);
14803 break;
14804 }
14805
14806 case OpSubgroupReadInvocationKHR:
14807 {
14808 uint32_t result_type = ops[0];
14809 uint32_t id = ops[1];
14810 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "readInvocationARB");
14811
14812 require_extension_internal(ext: "GL_ARB_shader_ballot");
14813 register_control_dependent_expression(expr: ops[1]);
14814 break;
14815 }
14816
14817 case OpSubgroupAllKHR:
14818 {
14819 uint32_t result_type = ops[0];
14820 uint32_t id = ops[1];
14821 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsARB");
14822
14823 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14824 register_control_dependent_expression(expr: ops[1]);
14825 break;
14826 }
14827
14828 case OpSubgroupAnyKHR:
14829 {
14830 uint32_t result_type = ops[0];
14831 uint32_t id = ops[1];
14832 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "anyInvocationARB");
14833
14834 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14835 register_control_dependent_expression(expr: ops[1]);
14836 break;
14837 }
14838
14839 case OpSubgroupAllEqualKHR:
14840 {
14841 uint32_t result_type = ops[0];
14842 uint32_t id = ops[1];
14843 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsEqualARB");
14844
14845 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14846 register_control_dependent_expression(expr: ops[1]);
14847 break;
14848 }
14849
14850 case OpGroupIAddNonUniformAMD:
14851 case OpGroupFAddNonUniformAMD:
14852 {
14853 uint32_t result_type = ops[0];
14854 uint32_t id = ops[1];
14855 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "addInvocationsNonUniformAMD");
14856
14857 require_extension_internal(ext: "GL_AMD_shader_ballot");
14858 register_control_dependent_expression(expr: ops[1]);
14859 break;
14860 }
14861
14862 case OpGroupFMinNonUniformAMD:
14863 case OpGroupUMinNonUniformAMD:
14864 case OpGroupSMinNonUniformAMD:
14865 {
14866 uint32_t result_type = ops[0];
14867 uint32_t id = ops[1];
14868 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "minInvocationsNonUniformAMD");
14869
14870 require_extension_internal(ext: "GL_AMD_shader_ballot");
14871 register_control_dependent_expression(expr: ops[1]);
14872 break;
14873 }
14874
14875 case OpGroupFMaxNonUniformAMD:
14876 case OpGroupUMaxNonUniformAMD:
14877 case OpGroupSMaxNonUniformAMD:
14878 {
14879 uint32_t result_type = ops[0];
14880 uint32_t id = ops[1];
14881 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "maxInvocationsNonUniformAMD");
14882
14883 require_extension_internal(ext: "GL_AMD_shader_ballot");
14884 register_control_dependent_expression(expr: ops[1]);
14885 break;
14886 }
14887
14888 case OpFragmentMaskFetchAMD:
14889 {
14890 auto &type = expression_type(id: ops[2]);
14891 uint32_t result_type = ops[0];
14892 uint32_t id = ops[1];
14893
14894 if (type.image.dim == spv::DimSubpassData)
14895 {
14896 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "fragmentMaskFetchAMD");
14897 }
14898 else
14899 {
14900 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "fragmentMaskFetchAMD");
14901 }
14902
14903 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
14904 break;
14905 }
14906
14907 case OpFragmentFetchAMD:
14908 {
14909 auto &type = expression_type(id: ops[2]);
14910 uint32_t result_type = ops[0];
14911 uint32_t id = ops[1];
14912
14913 if (type.image.dim == spv::DimSubpassData)
14914 {
14915 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[4], op: "fragmentFetchAMD");
14916 }
14917 else
14918 {
14919 emit_trinary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op2: ops[4], op: "fragmentFetchAMD");
14920 }
14921
14922 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
14923 break;
14924 }
14925
14926 // Vulkan 1.1 sub-group stuff ...
14927 case OpGroupNonUniformElect:
14928 case OpGroupNonUniformBroadcast:
14929 case OpGroupNonUniformBroadcastFirst:
14930 case OpGroupNonUniformBallot:
14931 case OpGroupNonUniformInverseBallot:
14932 case OpGroupNonUniformBallotBitExtract:
14933 case OpGroupNonUniformBallotBitCount:
14934 case OpGroupNonUniformBallotFindLSB:
14935 case OpGroupNonUniformBallotFindMSB:
14936 case OpGroupNonUniformShuffle:
14937 case OpGroupNonUniformShuffleXor:
14938 case OpGroupNonUniformShuffleUp:
14939 case OpGroupNonUniformShuffleDown:
14940 case OpGroupNonUniformAll:
14941 case OpGroupNonUniformAny:
14942 case OpGroupNonUniformAllEqual:
14943 case OpGroupNonUniformFAdd:
14944 case OpGroupNonUniformIAdd:
14945 case OpGroupNonUniformFMul:
14946 case OpGroupNonUniformIMul:
14947 case OpGroupNonUniformFMin:
14948 case OpGroupNonUniformFMax:
14949 case OpGroupNonUniformSMin:
14950 case OpGroupNonUniformSMax:
14951 case OpGroupNonUniformUMin:
14952 case OpGroupNonUniformUMax:
14953 case OpGroupNonUniformBitwiseAnd:
14954 case OpGroupNonUniformBitwiseOr:
14955 case OpGroupNonUniformBitwiseXor:
14956 case OpGroupNonUniformLogicalAnd:
14957 case OpGroupNonUniformLogicalOr:
14958 case OpGroupNonUniformLogicalXor:
14959 case OpGroupNonUniformQuadSwap:
14960 case OpGroupNonUniformQuadBroadcast:
14961 emit_subgroup_op(i: instruction);
14962 break;
14963
14964 case OpFUnordEqual:
14965 case OpFUnordLessThan:
14966 case OpFUnordGreaterThan:
14967 case OpFUnordLessThanEqual:
14968 case OpFUnordGreaterThanEqual:
14969 {
14970 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
14971 // but glslang always emits ordered floating point compares for GLSL.
14972 // To get unordered compares, we can test the opposite thing and invert the result.
14973 // This way, we force true when there is any NaN present.
14974 uint32_t op0 = ops[2];
14975 uint32_t op1 = ops[3];
14976
14977 string expr;
14978 if (expression_type(id: op0).vecsize > 1)
14979 {
14980 const char *comp_op = nullptr;
14981 switch (opcode)
14982 {
14983 case OpFUnordEqual:
14984 comp_op = "notEqual";
14985 break;
14986
14987 case OpFUnordLessThan:
14988 comp_op = "greaterThanEqual";
14989 break;
14990
14991 case OpFUnordLessThanEqual:
14992 comp_op = "greaterThan";
14993 break;
14994
14995 case OpFUnordGreaterThan:
14996 comp_op = "lessThanEqual";
14997 break;
14998
14999 case OpFUnordGreaterThanEqual:
15000 comp_op = "lessThan";
15001 break;
15002
15003 default:
15004 assert(0);
15005 break;
15006 }
15007
15008 expr = join(ts: "not(", ts&: comp_op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: "))");
15009 }
15010 else
15011 {
15012 const char *comp_op = nullptr;
15013 switch (opcode)
15014 {
15015 case OpFUnordEqual:
15016 comp_op = " != ";
15017 break;
15018
15019 case OpFUnordLessThan:
15020 comp_op = " >= ";
15021 break;
15022
15023 case OpFUnordLessThanEqual:
15024 comp_op = " > ";
15025 break;
15026
15027 case OpFUnordGreaterThan:
15028 comp_op = " <= ";
15029 break;
15030
15031 case OpFUnordGreaterThanEqual:
15032 comp_op = " < ";
15033 break;
15034
15035 default:
15036 assert(0);
15037 break;
15038 }
15039
15040 expr = join(ts: "!(", ts: to_enclosed_unpacked_expression(id: op0), ts&: comp_op, ts: to_enclosed_unpacked_expression(id: op1), ts: ")");
15041 }
15042
15043 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
15044 inherit_expression_dependencies(dst: ops[1], source: op0);
15045 inherit_expression_dependencies(dst: ops[1], source: op1);
15046 break;
15047 }
15048
15049 case OpReportIntersectionKHR:
15050 // NV is same opcode.
15051 forced_temporaries.insert(x: ops[1]);
15052 if (ray_tracing_is_khr)
15053 GLSL_BFOP(reportIntersectionEXT);
15054 else
15055 GLSL_BFOP(reportIntersectionNV);
15056 flush_control_dependent_expressions(block: current_emitting_block->self);
15057 break;
15058 case OpIgnoreIntersectionNV:
15059 // KHR variant is a terminator.
15060 statement(ts: "ignoreIntersectionNV();");
15061 flush_control_dependent_expressions(block: current_emitting_block->self);
15062 break;
15063 case OpTerminateRayNV:
15064 // KHR variant is a terminator.
15065 statement(ts: "terminateRayNV();");
15066 flush_control_dependent_expressions(block: current_emitting_block->self);
15067 break;
15068 case OpTraceNV:
15069 statement(ts: "traceNV(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
15070 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
15071 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
15072 ts: to_expression(id: ops[9]), ts: ", ", ts: to_expression(id: ops[10]), ts: ");");
15073 flush_control_dependent_expressions(block: current_emitting_block->self);
15074 break;
15075 case OpTraceRayKHR:
15076 if (!has_decoration(id: ops[10], decoration: DecorationLocation))
15077 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
15078 statement(ts: "traceRayEXT(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
15079 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
15080 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
15081 ts: to_expression(id: ops[9]), ts: ", ", ts: get_decoration(id: ops[10], decoration: DecorationLocation), ts: ");");
15082 flush_control_dependent_expressions(block: current_emitting_block->self);
15083 break;
15084 case OpExecuteCallableNV:
15085 statement(ts: "executeCallableNV(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
15086 flush_control_dependent_expressions(block: current_emitting_block->self);
15087 break;
15088 case OpExecuteCallableKHR:
15089 if (!has_decoration(id: ops[1], decoration: DecorationLocation))
15090 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
15091 statement(ts: "executeCallableEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: get_decoration(id: ops[1], decoration: DecorationLocation), ts: ");");
15092 flush_control_dependent_expressions(block: current_emitting_block->self);
15093 break;
15094
15095 // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
15096 case OpRayQueryInitializeKHR:
15097 flush_variable_declaration(id: ops[0]);
15098 statement(ts: "rayQueryInitializeEXT(",
15099 ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ",
15100 ts: to_expression(id: ops[2]), ts: ", ", ts: to_expression(id: ops[3]), ts: ", ",
15101 ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
15102 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ");");
15103 break;
15104 case OpRayQueryProceedKHR:
15105 flush_variable_declaration(id: ops[0]);
15106 emit_op(result_type: ops[0], result_id: ops[1], rhs: join(ts: "rayQueryProceedEXT(", ts: to_expression(id: ops[2]), ts: ")"), forwarding: false);
15107 break;
15108 case OpRayQueryTerminateKHR:
15109 flush_variable_declaration(id: ops[0]);
15110 statement(ts: "rayQueryTerminateEXT(", ts: to_expression(id: ops[0]), ts: ");");
15111 break;
15112 case OpRayQueryGenerateIntersectionKHR:
15113 flush_variable_declaration(id: ops[0]);
15114 statement(ts: "rayQueryGenerateIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
15115 break;
15116 case OpRayQueryConfirmIntersectionKHR:
15117 flush_variable_declaration(id: ops[0]);
15118 statement(ts: "rayQueryConfirmIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ");");
15119 break;
15120#define GLSL_RAY_QUERY_GET_OP(op) \
15121 case OpRayQueryGet##op##KHR: \
15122 flush_variable_declaration(ops[2]); \
15123 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
15124 break
15125#define GLSL_RAY_QUERY_GET_OP2(op) \
15126 case OpRayQueryGet##op##KHR: \
15127 flush_variable_declaration(ops[2]); \
15128 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
15129 break
15130 GLSL_RAY_QUERY_GET_OP(RayTMin);
15131 GLSL_RAY_QUERY_GET_OP(RayFlags);
15132 GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
15133 GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
15134 GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
15135 GLSL_RAY_QUERY_GET_OP2(IntersectionType);
15136 GLSL_RAY_QUERY_GET_OP2(IntersectionT);
15137 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
15138 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
15139 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
15140 GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
15141 GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
15142 GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
15143 GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
15144 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
15145 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
15146 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
15147 GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
15148#undef GLSL_RAY_QUERY_GET_OP
15149#undef GLSL_RAY_QUERY_GET_OP2
15150
15151 case OpConvertUToAccelerationStructureKHR:
15152 {
15153 require_extension_internal(ext: "GL_EXT_ray_tracing");
15154
15155 bool elide_temporary = should_forward(id: ops[2]) && forced_temporaries.count(x: ops[1]) == 0 &&
15156 !hoisted_temporaries.count(x: ops[1]);
15157
15158 if (elide_temporary)
15159 {
15160 GLSL_UFOP(accelerationStructureEXT);
15161 }
15162 else
15163 {
15164 // Force this path in subsequent iterations.
15165 forced_temporaries.insert(x: ops[1]);
15166
15167 // We cannot declare a temporary acceleration structure in GLSL.
15168 // If we get to this point, we'll have to emit a temporary uvec2,
15169 // and cast to RTAS on demand.
15170 statement(ts: declare_temporary(result_type: expression_type_id(id: ops[2]), result_id: ops[1]), ts: to_unpacked_expression(id: ops[2]), ts: ";");
15171 // Use raw SPIRExpression interface to block all usage tracking.
15172 set<SPIRExpression>(id: ops[1], args: join(ts: "accelerationStructureEXT(", ts: to_name(id: ops[1]), ts: ")"), args: ops[0], args: true);
15173 }
15174 break;
15175 }
15176
15177 case OpConvertUToPtr:
15178 {
15179 auto &type = get<SPIRType>(id: ops[0]);
15180 if (type.storage != StorageClassPhysicalStorageBufferEXT)
15181 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
15182
15183 auto &in_type = expression_type(id: ops[2]);
15184 if (in_type.vecsize == 2)
15185 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
15186
15187 auto op = type_to_glsl(type);
15188 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
15189 break;
15190 }
15191
15192 case OpConvertPtrToU:
15193 {
15194 auto &type = get<SPIRType>(id: ops[0]);
15195 auto &ptr_type = expression_type(id: ops[2]);
15196 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
15197 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
15198
15199 if (type.vecsize == 2)
15200 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
15201
15202 auto op = type_to_glsl(type);
15203 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
15204 break;
15205 }
15206
15207 case OpUndef:
15208 // Undefined value has been declared.
15209 break;
15210
15211 case OpLine:
15212 {
15213 emit_line_directive(file_id: ops[0], line_literal: ops[1]);
15214 break;
15215 }
15216
15217 case OpNoLine:
15218 break;
15219
15220 case OpDemoteToHelperInvocationEXT:
15221 if (!options.vulkan_semantics)
15222 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
15223 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
15224 statement(ts&: backend.demote_literal, ts: ";");
15225 break;
15226
15227 case OpIsHelperInvocationEXT:
15228 if (!options.vulkan_semantics)
15229 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
15230 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
15231 // Helper lane state with demote is volatile by nature.
15232 // Do not forward this.
15233 emit_op(result_type: ops[0], result_id: ops[1], rhs: "helperInvocationEXT()", forwarding: false);
15234 break;
15235
15236 case OpBeginInvocationInterlockEXT:
15237 // If the interlock is complex, we emit this elsewhere.
15238 if (!interlocked_is_complex)
15239 {
15240 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
15241 flush_all_active_variables();
15242 // Make sure forwarding doesn't propagate outside interlock region.
15243 }
15244 break;
15245
15246 case OpEndInvocationInterlockEXT:
15247 // If the interlock is complex, we emit this elsewhere.
15248 if (!interlocked_is_complex)
15249 {
15250 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
15251 flush_all_active_variables();
15252 // Make sure forwarding doesn't propagate outside interlock region.
15253 }
15254 break;
15255
15256 case OpSetMeshOutputsEXT:
15257 statement(ts: "SetMeshOutputsEXT(", ts: to_unpacked_expression(id: ops[0]), ts: ", ", ts: to_unpacked_expression(id: ops[1]), ts: ");");
15258 break;
15259
15260 case OpReadClockKHR:
15261 {
15262 auto &type = get<SPIRType>(id: ops[0]);
15263 auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2]));
15264 const char *op = nullptr;
15265 // Forwarding clock statements leads to a scenario where an SSA value can take on different
15266 // values every time it's evaluated. Block any forwarding attempt.
15267 // We also might want to invalidate all expressions to function as a sort of optimization
15268 // barrier, but might be overkill for now.
15269 if (scope == ScopeDevice)
15270 {
15271 require_extension_internal(ext: "GL_EXT_shader_realtime_clock");
15272 if (type.basetype == SPIRType::BaseType::UInt64)
15273 op = "clockRealtimeEXT()";
15274 else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
15275 op = "clockRealtime2x32EXT()";
15276 else
15277 SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
15278 }
15279 else if (scope == ScopeSubgroup)
15280 {
15281 require_extension_internal(ext: "GL_ARB_shader_clock");
15282 if (type.basetype == SPIRType::BaseType::UInt64)
15283 op = "clockARB()";
15284 else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
15285 op = "clock2x32ARB()";
15286 else
15287 SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
15288 }
15289 else
15290 SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode.");
15291
15292 emit_op(result_type: ops[0], result_id: ops[1], rhs: op, forwarding: false);
15293 break;
15294 }
15295
15296 default:
15297 statement(ts: "// unimplemented op ", ts: instruction.op);
15298 break;
15299 }
15300}
15301
15302// Appends function arguments, mapped from global variables, beyond the specified arg index.
15303// This is used when a function call uses fewer arguments than the function defines.
15304// This situation may occur if the function signature has been dynamically modified to
15305// extract global variables referenced from within the function, and convert them to
15306// function arguments. This is necessary for shader languages that do not support global
15307// access to shader input content from within a function (eg. Metal). Each additional
15308// function args uses the name of the global variable. Function nesting will modify the
15309// functions and function calls all the way up the nesting chain.
15310void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
15311{
15312 auto &args = func.arguments;
15313 uint32_t arg_cnt = uint32_t(args.size());
15314 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
15315 {
15316 auto &arg = args[arg_idx];
15317 assert(arg.alias_global_variable);
15318
15319 // If the underlying variable needs to be declared
15320 // (ie. a local variable with deferred declaration), do so now.
15321 uint32_t var_id = get<SPIRVariable>(id: arg.id).basevariable;
15322 if (var_id)
15323 flush_variable_declaration(id: var_id);
15324
15325 arglist.push_back(t: to_func_call_arg(arg, id: arg.id));
15326 }
15327}
15328
15329string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
15330{
15331 if (type.type_alias != TypeID(0) &&
15332 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
15333 {
15334 return to_member_name(type: get<SPIRType>(id: type.type_alias), index);
15335 }
15336
15337 auto &memb = ir.meta[type.self].members;
15338 if (index < memb.size() && !memb[index].alias.empty())
15339 return memb[index].alias;
15340 else
15341 return join(ts: "_m", ts&: index);
15342}
15343
15344string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
15345{
15346 return join(ts: ".", ts: to_member_name(type, index));
15347}
15348
15349string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
15350{
15351 string ret;
15352 auto *member_type = &type;
15353 for (auto &index : indices)
15354 {
15355 ret += join(ts: ".", ts: to_member_name(type: *member_type, index));
15356 member_type = &get<SPIRType>(id: member_type->member_types[index]);
15357 }
15358 return ret;
15359}
15360
15361void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
15362{
15363 auto &memb = ir.meta[type.self].members;
15364 if (index < memb.size() && !memb[index].alias.empty())
15365 {
15366 auto &name = memb[index].alias;
15367 if (name.empty())
15368 return;
15369
15370 ParsedIR::sanitize_identifier(str&: name, member: true, allow_reserved_prefixes: true);
15371 update_name_cache(cache&: type.member_name_cache, name);
15372 }
15373}
15374
15375// Checks whether the ID is a row_major matrix that requires conversion before use
15376bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
15377{
15378 // Natively supported row-major matrices do not need to be converted.
15379 // Legacy targets do not support row major.
15380 if (backend.native_row_major_matrix && !is_legacy())
15381 return false;
15382
15383 auto *e = maybe_get<SPIRExpression>(id);
15384 if (e)
15385 return e->need_transpose;
15386 else
15387 return has_decoration(id, decoration: DecorationRowMajor);
15388}
15389
15390// Checks whether the member is a row_major matrix that requires conversion before use
15391bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
15392{
15393 // Natively supported row-major matrices do not need to be converted.
15394 if (backend.native_row_major_matrix && !is_legacy())
15395 return false;
15396
15397 // Non-matrix or column-major matrix types do not need to be converted.
15398 if (!has_member_decoration(id: type.self, index, decoration: DecorationRowMajor))
15399 return false;
15400
15401 // Only square row-major matrices can be converted at this time.
15402 // Converting non-square matrices will require defining custom GLSL function that
15403 // swaps matrix elements while retaining the original dimensional form of the matrix.
15404 const auto mbr_type = get<SPIRType>(id: type.member_types[index]);
15405 if (mbr_type.columns != mbr_type.vecsize)
15406 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
15407
15408 return true;
15409}
15410
15411// Checks if we need to remap physical type IDs when declaring the type in a buffer.
15412bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
15413{
15414 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
15415}
15416
15417// Checks whether the member is in packed data type, that might need to be unpacked.
15418bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
15419{
15420 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypePacked);
15421}
15422
15423// Wraps the expression string in a function call that converts the
15424// row_major matrix result of the expression to a column_major matrix.
15425// Base implementation uses the standard library transpose() function.
15426// Subclasses may override to use a different function.
15427string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
15428 bool /*is_packed*/, bool relaxed)
15429{
15430 strip_enclosed_expression(expr&: exp_str);
15431 if (!is_matrix(type: exp_type))
15432 {
15433 auto column_index = exp_str.find_last_of(c: '[');
15434 if (column_index == string::npos)
15435 return exp_str;
15436
15437 auto column_expr = exp_str.substr(pos: column_index);
15438 exp_str.resize(n: column_index);
15439
15440 auto end_deferred_index = column_expr.find_last_of(c: ']');
15441 if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size())
15442 {
15443 // If we have any data member fixups, it must be transposed so that it refers to this index.
15444 // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
15445 // and needs to be [1].data[0] instead.
15446 end_deferred_index++;
15447 column_expr = column_expr.substr(pos: end_deferred_index) +
15448 column_expr.substr(pos: 0, n: end_deferred_index);
15449 }
15450
15451 auto transposed_expr = type_to_glsl_constructor(type: exp_type) + "(";
15452
15453 // Loading a column from a row-major matrix. Unroll the load.
15454 for (uint32_t c = 0; c < exp_type.vecsize; c++)
15455 {
15456 transposed_expr += join(ts&: exp_str, ts: '[', ts&: c, ts: ']', ts&: column_expr);
15457 if (c + 1 < exp_type.vecsize)
15458 transposed_expr += ", ";
15459 }
15460
15461 transposed_expr += ")";
15462 return transposed_expr;
15463 }
15464 else if (options.version < 120)
15465 {
15466 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
15467 // these GLSL versions do not support non-square matrices.
15468 if (exp_type.vecsize == 2 && exp_type.columns == 2)
15469 require_polyfill(polyfill: PolyfillTranspose2x2, relaxed);
15470 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
15471 require_polyfill(polyfill: PolyfillTranspose3x3, relaxed);
15472 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
15473 require_polyfill(polyfill: PolyfillTranspose4x4, relaxed);
15474 else
15475 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
15476 return join(ts: "spvTranspose", ts: (options.es && relaxed) ? "MP" : "", ts: "(", ts&: exp_str, ts: ")");
15477 }
15478 else
15479 return join(ts: "transpose(", ts&: exp_str, ts: ")");
15480}
15481
15482string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
15483{
15484 string type_name = type_to_glsl(type, id);
15485 remap_variable_type_name(type, var_name: name, type_name);
15486 return join(ts&: type_name, ts: " ", ts: name, ts: type_to_array_glsl(type, variable_id: id));
15487}
15488
15489bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
15490{
15491 return var.storage == storage;
15492}
15493
15494// Emit a structure member. Subclasses may override to modify output,
15495// or to dynamically add a padding member if needed.
15496void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
15497 const string &qualifier, uint32_t)
15498{
15499 auto &membertype = get<SPIRType>(id: member_type_id);
15500
15501 Bitset memberflags;
15502 auto &memb = ir.meta[type.self].members;
15503 if (index < memb.size())
15504 memberflags = memb[index].decoration_flags;
15505
15506 string qualifiers;
15507 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
15508 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
15509
15510 if (is_block)
15511 qualifiers = to_interpolation_qualifiers(flags: memberflags);
15512
15513 statement(ts: layout_for_member(type, index), ts&: qualifiers, ts: qualifier, ts: flags_to_qualifiers_glsl(type: membertype, flags: memberflags),
15514 ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts: ";");
15515}
15516
15517void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
15518{
15519}
15520
15521string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
15522{
15523 // GL_EXT_buffer_reference variables can be marked as restrict.
15524 if (flags.get(bit: DecorationRestrictPointerEXT))
15525 return "restrict ";
15526
15527 string qual;
15528
15529 if (type_is_floating_point(type) && flags.get(bit: DecorationNoContraction) && backend.support_precise_qualifier)
15530 qual = "precise ";
15531
15532 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
15533 bool type_supports_precision =
15534 type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
15535 type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
15536 type.basetype == SPIRType::Sampler;
15537
15538 if (!type_supports_precision)
15539 return qual;
15540
15541 if (options.es)
15542 {
15543 auto &execution = get_entry_point();
15544
15545 if (type.basetype == SPIRType::UInt && is_legacy_es())
15546 {
15547 // HACK: This is a bool. See comment in type_to_glsl().
15548 qual += "lowp ";
15549 }
15550 else if (flags.get(bit: DecorationRelaxedPrecision))
15551 {
15552 bool implied_fmediump = type.basetype == SPIRType::Float &&
15553 options.fragment.default_float_precision == Options::Mediump &&
15554 execution.model == ExecutionModelFragment;
15555
15556 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
15557 options.fragment.default_int_precision == Options::Mediump &&
15558 execution.model == ExecutionModelFragment;
15559
15560 qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
15561 }
15562 else
15563 {
15564 bool implied_fhighp =
15565 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
15566 execution.model == ExecutionModelFragment) ||
15567 (execution.model != ExecutionModelFragment));
15568
15569 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
15570 ((options.fragment.default_int_precision == Options::Highp &&
15571 execution.model == ExecutionModelFragment) ||
15572 (execution.model != ExecutionModelFragment));
15573
15574 qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
15575 }
15576 }
15577 else if (backend.allow_precision_qualifiers)
15578 {
15579 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
15580 // The default is highp however, so only emit mediump in the rare case that a shader has these.
15581 if (flags.get(bit: DecorationRelaxedPrecision))
15582 qual += "mediump ";
15583 }
15584
15585 return qual;
15586}
15587
15588string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
15589{
15590 auto &type = expression_type(id);
15591 bool use_precision_qualifiers = backend.allow_precision_qualifiers;
15592 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
15593 {
15594 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
15595 auto &result_type = get<SPIRType>(id: type.image.type);
15596 if (result_type.width < 32)
15597 return "mediump ";
15598 }
15599 return flags_to_qualifiers_glsl(type, flags: ir.meta[id].decoration.decoration_flags);
15600}
15601
15602void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
15603{
15604 // Works around weird behavior in glslangValidator where
15605 // a patch out block is translated to just block members getting the decoration.
15606 // To make glslang not complain when we compile again, we have to transform this back to a case where
15607 // the variable itself has Patch decoration, and not members.
15608 // Same for perprimitiveEXT.
15609 auto &type = get<SPIRType>(id: var.basetype);
15610 if (has_decoration(id: type.self, decoration: DecorationBlock))
15611 {
15612 uint32_t member_count = uint32_t(type.member_types.size());
15613 Decoration promoted_decoration = {};
15614 bool do_promote_decoration = false;
15615 for (uint32_t i = 0; i < member_count; i++)
15616 {
15617 if (has_member_decoration(id: type.self, index: i, decoration: DecorationPatch))
15618 {
15619 promoted_decoration = DecorationPatch;
15620 do_promote_decoration = true;
15621 break;
15622 }
15623 else if (has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT))
15624 {
15625 promoted_decoration = DecorationPerPrimitiveEXT;
15626 do_promote_decoration = true;
15627 break;
15628 }
15629 }
15630
15631 if (do_promote_decoration)
15632 {
15633 set_decoration(id: var.self, decoration: promoted_decoration);
15634 for (uint32_t i = 0; i < member_count; i++)
15635 unset_member_decoration(id: type.self, index: i, decoration: promoted_decoration);
15636 }
15637 }
15638}
15639
15640string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
15641{
15642 auto &flags = get_decoration_bitset(id);
15643 string res;
15644
15645 auto *var = maybe_get<SPIRVariable>(id);
15646
15647 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
15648 res += "shared ";
15649 else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
15650 res += "taskPayloadSharedEXT ";
15651
15652 res += to_interpolation_qualifiers(flags);
15653 if (var)
15654 res += to_storage_qualifiers_glsl(var: *var);
15655
15656 auto &type = expression_type(id);
15657 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
15658 {
15659 if (flags.get(bit: DecorationCoherent))
15660 res += "coherent ";
15661 if (flags.get(bit: DecorationRestrict))
15662 res += "restrict ";
15663
15664 if (flags.get(bit: DecorationNonWritable))
15665 res += "readonly ";
15666
15667 bool formatted_load = type.image.format == ImageFormatUnknown;
15668 if (flags.get(bit: DecorationNonReadable))
15669 {
15670 res += "writeonly ";
15671 formatted_load = false;
15672 }
15673
15674 if (formatted_load)
15675 {
15676 if (!options.es)
15677 require_extension_internal(ext: "GL_EXT_shader_image_load_formatted");
15678 else
15679 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
15680 }
15681 }
15682
15683 res += to_precision_qualifiers_glsl(id);
15684
15685 return res;
15686}
15687
15688string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
15689{
15690 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
15691 auto &type = expression_type(id: arg.id);
15692 const char *direction = "";
15693
15694 if (type.pointer)
15695 {
15696 // If we're passing around block types to function, we really mean reference in a pointer sense,
15697 // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct,
15698 // but it works in practice due to legalization. It's ... not great, but you gotta do what you gotta do.
15699 // GLSL will never hit this case since it's not valid.
15700 if (type.storage == StorageClassOutput && get_execution_model() == ExecutionModelMeshEXT &&
15701 has_decoration(id: type.self, decoration: DecorationBlock) && is_builtin_type(type) && arg.write_count)
15702 {
15703 direction = "out ";
15704 }
15705 else if (arg.write_count && arg.read_count)
15706 direction = "inout ";
15707 else if (arg.write_count)
15708 direction = "out ";
15709 }
15710
15711 return join(ts&: direction, ts: to_qualifiers_glsl(id: arg.id), ts: variable_decl(type, name: to_name(id: arg.id), id: arg.id));
15712}
15713
15714string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
15715{
15716 return to_unpacked_expression(id: var.initializer);
15717}
15718
15719string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
15720{
15721#ifndef NDEBUG
15722 auto &type = get<SPIRType>(id: type_id);
15723 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
15724 type.storage == StorageClassGeneric);
15725#endif
15726 uint32_t id = ir.increase_bound_by(count: 1);
15727 ir.make_constant_null(id, type: type_id, add_to_typed_id_set: false);
15728 return constant_expression(c: get<SPIRConstant>(id));
15729}
15730
15731bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
15732{
15733 if (type.pointer)
15734 return false;
15735
15736 if (!type.array.empty() && options.flatten_multidimensional_arrays)
15737 return false;
15738
15739 for (auto &literal : type.array_size_literal)
15740 if (!literal)
15741 return false;
15742
15743 for (auto &memb : type.member_types)
15744 if (!type_can_zero_initialize(type: get<SPIRType>(id: memb)))
15745 return false;
15746
15747 return true;
15748}
15749
15750string CompilerGLSL::variable_decl(const SPIRVariable &variable)
15751{
15752 // Ignore the pointer type since GLSL doesn't have pointers.
15753 auto &type = get_variable_data_type(var: variable);
15754
15755 if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
15756 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
15757
15758 auto res = join(ts: to_qualifiers_glsl(id: variable.self), ts: variable_decl(type, name: to_name(id: variable.self), id: variable.self));
15759
15760 if (variable.loop_variable && variable.static_expression)
15761 {
15762 uint32_t expr = variable.static_expression;
15763 if (ir.ids[expr].get_type() != TypeUndef)
15764 res += join(ts: " = ", ts: to_unpacked_expression(id: variable.static_expression));
15765 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
15766 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
15767 }
15768 else if (variable.initializer && !variable_decl_is_remapped_storage(var: variable, storage: StorageClassWorkgroup))
15769 {
15770 uint32_t expr = variable.initializer;
15771 if (ir.ids[expr].get_type() != TypeUndef)
15772 res += join(ts: " = ", ts: to_initializer_expression(var: variable));
15773 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
15774 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
15775 }
15776
15777 return res;
15778}
15779
15780const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
15781{
15782 auto &flags = get_decoration_bitset(id: variable.self);
15783 if (flags.get(bit: DecorationRelaxedPrecision))
15784 return "mediump ";
15785 else
15786 return "highp ";
15787}
15788
15789string CompilerGLSL::pls_decl(const PlsRemap &var)
15790{
15791 auto &variable = get<SPIRVariable>(id: var.id);
15792
15793 auto op_and_basetype = pls_format_to_basetype(format: var.format);
15794
15795 SPIRType type { op_and_basetype.first };
15796 type.basetype = op_and_basetype.second;
15797 auto vecsize = pls_format_to_components(format: var.format);
15798 if (vecsize > 1)
15799 {
15800 type.op = OpTypeVector;
15801 type.vecsize = vecsize;
15802 }
15803
15804 return join(ts: to_pls_layout(format: var.format), ts: to_pls_qualifiers_glsl(variable), ts: type_to_glsl(type), ts: " ",
15805 ts: to_name(id: variable.self));
15806}
15807
15808uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
15809{
15810 return to_array_size_literal(type, index: uint32_t(type.array.size() - 1));
15811}
15812
15813uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
15814{
15815 assert(type.array.size() == type.array_size_literal.size());
15816
15817 if (type.array_size_literal[index])
15818 {
15819 return type.array[index];
15820 }
15821 else
15822 {
15823 // Use the default spec constant value.
15824 // This is the best we can do.
15825 return evaluate_constant_u32(id: type.array[index]);
15826 }
15827}
15828
15829string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
15830{
15831 assert(type.array.size() == type.array_size_literal.size());
15832
15833 auto &size = type.array[index];
15834 if (!type.array_size_literal[index])
15835 return to_expression(id: size);
15836 else if (size)
15837 return convert_to_string(t: size);
15838 else if (!backend.unsized_array_supported)
15839 {
15840 // For runtime-sized arrays, we can work around
15841 // lack of standard support for this by simply having
15842 // a single element array.
15843 //
15844 // Runtime length arrays must always be the last element
15845 // in an interface block.
15846 return "1";
15847 }
15848 else
15849 return "";
15850}
15851
15852string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
15853{
15854 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
15855 {
15856 // We are using a wrapped pointer type, and we should not emit any array declarations here.
15857 return "";
15858 }
15859
15860 if (type.array.empty())
15861 return "";
15862
15863 if (options.flatten_multidimensional_arrays)
15864 {
15865 string res;
15866 res += "[";
15867 for (auto i = uint32_t(type.array.size()); i; i--)
15868 {
15869 res += enclose_expression(expr: to_array_size(type, index: i - 1));
15870 if (i > 1)
15871 res += " * ";
15872 }
15873 res += "]";
15874 return res;
15875 }
15876 else
15877 {
15878 if (type.array.size() > 1)
15879 {
15880 if (!options.es && options.version < 430)
15881 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
15882 else if (options.es && options.version < 310)
15883 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
15884 "Try using --flatten-multidimensional-arrays or set "
15885 "options.flatten_multidimensional_arrays to true.");
15886 }
15887
15888 string res;
15889 for (auto i = uint32_t(type.array.size()); i; i--)
15890 {
15891 res += "[";
15892 res += to_array_size(type, index: i - 1);
15893 res += "]";
15894 }
15895 return res;
15896 }
15897}
15898
15899string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/)
15900{
15901 auto &imagetype = get<SPIRType>(id: type.image.type);
15902 string res;
15903
15904 switch (imagetype.basetype)
15905 {
15906 case SPIRType::Int64:
15907 res = "i64";
15908 require_extension_internal(ext: "GL_EXT_shader_image_int64");
15909 break;
15910 case SPIRType::UInt64:
15911 res = "u64";
15912 require_extension_internal(ext: "GL_EXT_shader_image_int64");
15913 break;
15914 case SPIRType::Int:
15915 case SPIRType::Short:
15916 case SPIRType::SByte:
15917 res = "i";
15918 break;
15919 case SPIRType::UInt:
15920 case SPIRType::UShort:
15921 case SPIRType::UByte:
15922 res = "u";
15923 break;
15924 default:
15925 break;
15926 }
15927
15928 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
15929 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
15930
15931 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
15932 return res + "subpassInput" + (type.image.ms ? "MS" : "");
15933 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
15934 subpass_input_is_framebuffer_fetch(id))
15935 {
15936 SPIRType sampled_type = get<SPIRType>(id: type.image.type);
15937 sampled_type.vecsize = 4;
15938 return type_to_glsl(type: sampled_type);
15939 }
15940
15941 // If we're emulating subpassInput with samplers, force sampler2D
15942 // so we don't have to specify format.
15943 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
15944 {
15945 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
15946 if (type.image.dim == DimBuffer && type.image.sampled == 1)
15947 res += "sampler";
15948 else
15949 res += type.image.sampled == 2 ? "image" : "texture";
15950 }
15951 else
15952 res += "sampler";
15953
15954 switch (type.image.dim)
15955 {
15956 case Dim1D:
15957 // ES doesn't support 1D. Fake it with 2D.
15958 res += options.es ? "2D" : "1D";
15959 break;
15960 case Dim2D:
15961 res += "2D";
15962 break;
15963 case Dim3D:
15964 res += "3D";
15965 break;
15966 case DimCube:
15967 res += "Cube";
15968 break;
15969 case DimRect:
15970 if (options.es)
15971 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
15972
15973 if (is_legacy_desktop())
15974 require_extension_internal(ext: "GL_ARB_texture_rectangle");
15975
15976 res += "2DRect";
15977 break;
15978
15979 case DimBuffer:
15980 if (options.es && options.version < 320)
15981 require_extension_internal(ext: "GL_EXT_texture_buffer");
15982 else if (!options.es && options.version < 140)
15983 require_extension_internal(ext: "GL_EXT_texture_buffer_object");
15984 res += "Buffer";
15985 break;
15986
15987 case DimSubpassData:
15988 res += "2D";
15989 break;
15990 default:
15991 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
15992 }
15993
15994 if (type.image.ms)
15995 res += "MS";
15996 if (type.image.arrayed)
15997 {
15998 if (is_legacy_desktop())
15999 require_extension_internal(ext: "GL_EXT_texture_array");
16000 res += "Array";
16001 }
16002
16003 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
16004 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
16005 is_depth_image(type, id))
16006 {
16007 res += "Shadow";
16008
16009 if (type.image.dim == DimCube && is_legacy())
16010 {
16011 if (!options.es)
16012 require_extension_internal(ext: "GL_EXT_gpu_shader4");
16013 else
16014 {
16015 require_extension_internal(ext: "GL_NV_shadow_samplers_cube");
16016 res += "NV";
16017 }
16018 }
16019 }
16020
16021 return res;
16022}
16023
16024string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
16025{
16026 if (backend.use_array_constructor && type.array.size() > 1)
16027 {
16028 if (options.flatten_multidimensional_arrays)
16029 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
16030 "e.g. float[][]().");
16031 else if (!options.es && options.version < 430)
16032 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
16033 else if (options.es && options.version < 310)
16034 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
16035 }
16036
16037 auto e = type_to_glsl(type);
16038 if (backend.use_array_constructor)
16039 {
16040 for (uint32_t i = 0; i < type.array.size(); i++)
16041 e += "[]";
16042 }
16043 return e;
16044}
16045
16046// The optional id parameter indicates the object whose type we are trying
16047// to find the description for. It is optional. Most type descriptions do not
16048// depend on a specific object's use of that type.
16049string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
16050{
16051 if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type))
16052 {
16053 // Need to create a magic type name which compacts the entire type information.
16054 auto *parent = &get_pointee_type(type);
16055 string name = type_to_glsl(type: *parent);
16056
16057 uint32_t array_stride = get_decoration(id: type.parent_type, decoration: DecorationArrayStride);
16058
16059 // Resolve all array dimensions in one go since once we lose the pointer type,
16060 // array information is left to to_array_type_glsl. The base type loses array information.
16061 while (is_array(type: *parent))
16062 {
16063 if (parent->array_size_literal.back())
16064 name += join(ts: type.array.back(), ts: "_");
16065 else
16066 name += join(ts: "id", ts: type.array.back(), ts: "_");
16067
16068 name += "stride_" + std::to_string(val: array_stride);
16069
16070 array_stride = get_decoration(id: parent->parent_type, decoration: DecorationArrayStride);
16071 parent = &get<SPIRType>(id: parent->parent_type);
16072 }
16073
16074 name += "Pointer";
16075 return name;
16076 }
16077
16078 switch (type.basetype)
16079 {
16080 case SPIRType::Struct:
16081 // Need OpName lookup here to get a "sensible" name for a struct.
16082 if (backend.explicit_struct_type)
16083 return join(ts: "struct ", ts: to_name(id: type.self));
16084 else
16085 return to_name(id: type.self);
16086
16087 case SPIRType::Image:
16088 case SPIRType::SampledImage:
16089 return image_type_glsl(type, id);
16090
16091 case SPIRType::Sampler:
16092 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
16093 // this distinction into the type system.
16094 return comparison_ids.count(x: id) ? "samplerShadow" : "sampler";
16095
16096 case SPIRType::AccelerationStructure:
16097 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
16098
16099 case SPIRType::RayQuery:
16100 return "rayQueryEXT";
16101
16102 case SPIRType::Void:
16103 return "void";
16104
16105 default:
16106 break;
16107 }
16108
16109 if (type.basetype == SPIRType::UInt && is_legacy())
16110 {
16111 if (options.es)
16112 // HACK: spirv-cross changes bools into uints and generates code which compares them to
16113 // zero. Input code will have already been validated as not to have contained any uints,
16114 // so any remaining uints must in fact be bools. However, simply returning "bool" here
16115 // will result in invalid code. Instead, return an int.
16116 return backend.basic_int_type;
16117 else
16118 require_extension_internal(ext: "GL_EXT_gpu_shader4");
16119 }
16120
16121 if (type.basetype == SPIRType::AtomicCounter)
16122 {
16123 if (options.es && options.version < 310)
16124 SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters.");
16125 else if (!options.es && options.version < 420)
16126 require_extension_internal(ext: "GL_ARB_shader_atomic_counters");
16127 }
16128
16129 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
16130 {
16131 switch (type.basetype)
16132 {
16133 case SPIRType::Boolean:
16134 return "bool";
16135 case SPIRType::SByte:
16136 return backend.basic_int8_type;
16137 case SPIRType::UByte:
16138 return backend.basic_uint8_type;
16139 case SPIRType::Short:
16140 return backend.basic_int16_type;
16141 case SPIRType::UShort:
16142 return backend.basic_uint16_type;
16143 case SPIRType::Int:
16144 return backend.basic_int_type;
16145 case SPIRType::UInt:
16146 return backend.basic_uint_type;
16147 case SPIRType::AtomicCounter:
16148 return "atomic_uint";
16149 case SPIRType::Half:
16150 return "float16_t";
16151 case SPIRType::Float:
16152 return "float";
16153 case SPIRType::Double:
16154 return "double";
16155 case SPIRType::Int64:
16156 return "int64_t";
16157 case SPIRType::UInt64:
16158 return "uint64_t";
16159 default:
16160 return "???";
16161 }
16162 }
16163 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
16164 {
16165 switch (type.basetype)
16166 {
16167 case SPIRType::Boolean:
16168 return join(ts: "bvec", ts: type.vecsize);
16169 case SPIRType::SByte:
16170 return join(ts: "i8vec", ts: type.vecsize);
16171 case SPIRType::UByte:
16172 return join(ts: "u8vec", ts: type.vecsize);
16173 case SPIRType::Short:
16174 return join(ts: "i16vec", ts: type.vecsize);
16175 case SPIRType::UShort:
16176 return join(ts: "u16vec", ts: type.vecsize);
16177 case SPIRType::Int:
16178 return join(ts: "ivec", ts: type.vecsize);
16179 case SPIRType::UInt:
16180 return join(ts: "uvec", ts: type.vecsize);
16181 case SPIRType::Half:
16182 return join(ts: "f16vec", ts: type.vecsize);
16183 case SPIRType::Float:
16184 return join(ts: "vec", ts: type.vecsize);
16185 case SPIRType::Double:
16186 return join(ts: "dvec", ts: type.vecsize);
16187 case SPIRType::Int64:
16188 return join(ts: "i64vec", ts: type.vecsize);
16189 case SPIRType::UInt64:
16190 return join(ts: "u64vec", ts: type.vecsize);
16191 default:
16192 return "???";
16193 }
16194 }
16195 else if (type.vecsize == type.columns) // Simple Matrix builtin
16196 {
16197 switch (type.basetype)
16198 {
16199 case SPIRType::Boolean:
16200 return join(ts: "bmat", ts: type.vecsize);
16201 case SPIRType::Int:
16202 return join(ts: "imat", ts: type.vecsize);
16203 case SPIRType::UInt:
16204 return join(ts: "umat", ts: type.vecsize);
16205 case SPIRType::Half:
16206 return join(ts: "f16mat", ts: type.vecsize);
16207 case SPIRType::Float:
16208 return join(ts: "mat", ts: type.vecsize);
16209 case SPIRType::Double:
16210 return join(ts: "dmat", ts: type.vecsize);
16211 // Matrix types not supported for int64/uint64.
16212 default:
16213 return "???";
16214 }
16215 }
16216 else
16217 {
16218 switch (type.basetype)
16219 {
16220 case SPIRType::Boolean:
16221 return join(ts: "bmat", ts: type.columns, ts: "x", ts: type.vecsize);
16222 case SPIRType::Int:
16223 return join(ts: "imat", ts: type.columns, ts: "x", ts: type.vecsize);
16224 case SPIRType::UInt:
16225 return join(ts: "umat", ts: type.columns, ts: "x", ts: type.vecsize);
16226 case SPIRType::Half:
16227 return join(ts: "f16mat", ts: type.columns, ts: "x", ts: type.vecsize);
16228 case SPIRType::Float:
16229 return join(ts: "mat", ts: type.columns, ts: "x", ts: type.vecsize);
16230 case SPIRType::Double:
16231 return join(ts: "dmat", ts: type.columns, ts: "x", ts: type.vecsize);
16232 // Matrix types not supported for int64/uint64.
16233 default:
16234 return "???";
16235 }
16236 }
16237}
16238
16239void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
16240 const unordered_set<string> &variables_secondary, string &name)
16241{
16242 if (name.empty())
16243 return;
16244
16245 ParsedIR::sanitize_underscores(str&: name);
16246 if (ParsedIR::is_globally_reserved_identifier(str&: name, allow_reserved_prefixes: true))
16247 {
16248 name.clear();
16249 return;
16250 }
16251
16252 update_name_cache(cache_primary&: variables_primary, cache_secondary: variables_secondary, name);
16253}
16254
16255void CompilerGLSL::add_local_variable_name(uint32_t id)
16256{
16257 add_variable(variables_primary&: local_variable_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
16258}
16259
16260void CompilerGLSL::add_resource_name(uint32_t id)
16261{
16262 add_variable(variables_primary&: resource_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
16263}
16264
16265void CompilerGLSL::add_header_line(const std::string &line)
16266{
16267 header_lines.push_back(t: line);
16268}
16269
16270bool CompilerGLSL::has_extension(const std::string &ext) const
16271{
16272 auto itr = find(first: begin(cont: forced_extensions), last: end(cont: forced_extensions), val: ext);
16273 return itr != end(cont: forced_extensions);
16274}
16275
16276void CompilerGLSL::require_extension(const std::string &ext)
16277{
16278 if (!has_extension(ext))
16279 forced_extensions.push_back(t: ext);
16280}
16281
16282const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const
16283{
16284 return forced_extensions;
16285}
16286
16287void CompilerGLSL::require_extension_internal(const string &ext)
16288{
16289 if (backend.supports_extensions && !has_extension(ext))
16290 {
16291 forced_extensions.push_back(t: ext);
16292 force_recompile();
16293 }
16294}
16295
16296void CompilerGLSL::flatten_buffer_block(VariableID id)
16297{
16298 auto &var = get<SPIRVariable>(id);
16299 auto &type = get<SPIRType>(id: var.basetype);
16300 auto name = to_name(id: type.self, allow_alias: false);
16301 auto &flags = get_decoration_bitset(id: type.self);
16302
16303 if (!type.array.empty())
16304 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
16305 if (type.basetype != SPIRType::Struct)
16306 SPIRV_CROSS_THROW(name + " is not a struct.");
16307 if (!flags.get(bit: DecorationBlock))
16308 SPIRV_CROSS_THROW(name + " is not a block.");
16309 if (type.member_types.empty())
16310 SPIRV_CROSS_THROW(name + " is an empty struct.");
16311
16312 flattened_buffer_blocks.insert(x: id);
16313}
16314
16315bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
16316{
16317 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
16318}
16319
16320bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const
16321{
16322 return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources.
16323}
16324
16325bool CompilerGLSL::check_atomic_image(uint32_t id)
16326{
16327 auto &type = expression_type(id);
16328 if (type.storage == StorageClassImage)
16329 {
16330 if (options.es && options.version < 320)
16331 require_extension_internal(ext: "GL_OES_shader_image_atomic");
16332
16333 auto *var = maybe_get_backing_variable(chain: id);
16334 if (var)
16335 {
16336 if (has_decoration(id: var->self, decoration: DecorationNonWritable) || has_decoration(id: var->self, decoration: DecorationNonReadable))
16337 {
16338 unset_decoration(id: var->self, decoration: DecorationNonWritable);
16339 unset_decoration(id: var->self, decoration: DecorationNonReadable);
16340 force_recompile();
16341 }
16342 }
16343 return true;
16344 }
16345 else
16346 return false;
16347}
16348
16349void CompilerGLSL::add_function_overload(const SPIRFunction &func)
16350{
16351 Hasher hasher;
16352 for (auto &arg : func.arguments)
16353 {
16354 // Parameters can vary with pointer type or not,
16355 // but that will not change the signature in GLSL/HLSL,
16356 // so strip the pointer type before hashing.
16357 uint32_t type_id = get_pointee_type_id(type_id: arg.type);
16358 auto &type = get<SPIRType>(id: type_id);
16359
16360 if (!combined_image_samplers.empty())
16361 {
16362 // If we have combined image samplers, we cannot really trust the image and sampler arguments
16363 // we pass down to callees, because they may be shuffled around.
16364 // Ignore these arguments, to make sure that functions need to differ in some other way
16365 // to be considered different overloads.
16366 if (type.basetype == SPIRType::SampledImage ||
16367 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
16368 {
16369 continue;
16370 }
16371 }
16372
16373 hasher.u32(value: type_id);
16374 }
16375 uint64_t types_hash = hasher.get();
16376
16377 auto function_name = to_name(id: func.self);
16378 auto itr = function_overloads.find(x: function_name);
16379 if (itr != end(cont&: function_overloads))
16380 {
16381 // There exists a function with this name already.
16382 auto &overloads = itr->second;
16383 if (overloads.count(x: types_hash) != 0)
16384 {
16385 // Overload conflict, assign a new name.
16386 add_resource_name(id: func.self);
16387 function_overloads[to_name(id: func.self)].insert(x: types_hash);
16388 }
16389 else
16390 {
16391 // Can reuse the name.
16392 overloads.insert(x: types_hash);
16393 }
16394 }
16395 else
16396 {
16397 // First time we see this function name.
16398 add_resource_name(id: func.self);
16399 function_overloads[to_name(id: func.self)].insert(x: types_hash);
16400 }
16401}
16402
16403void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
16404{
16405 if (func.self != ir.default_entry_point)
16406 add_function_overload(func);
16407
16408 // Avoid shadow declarations.
16409 local_variable_names = resource_names;
16410
16411 string decl;
16412
16413 auto &type = get<SPIRType>(id: func.return_type);
16414 decl += flags_to_qualifiers_glsl(type, flags: return_flags);
16415 decl += type_to_glsl(type);
16416 decl += type_to_array_glsl(type, 0);
16417 decl += " ";
16418
16419 if (func.self == ir.default_entry_point)
16420 {
16421 // If we need complex fallback in GLSL, we just wrap main() in a function
16422 // and interlock the entire shader ...
16423 if (interlocked_is_complex)
16424 decl += "spvMainInterlockedBody";
16425 else
16426 decl += "main";
16427
16428 processing_entry_point = true;
16429 }
16430 else
16431 decl += to_name(id: func.self);
16432
16433 decl += "(";
16434 SmallVector<string> arglist;
16435 for (auto &arg : func.arguments)
16436 {
16437 // Do not pass in separate images or samplers if we're remapping
16438 // to combined image samplers.
16439 if (skip_argument(id: arg.id))
16440 continue;
16441
16442 // Might change the variable name if it already exists in this function.
16443 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
16444 // to use same name for variables.
16445 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
16446 add_local_variable_name(id: arg.id);
16447
16448 arglist.push_back(t: argument_decl(arg));
16449
16450 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
16451 auto *var = maybe_get<SPIRVariable>(id: arg.id);
16452 if (var)
16453 var->parameter = &arg;
16454 }
16455
16456 for (auto &arg : func.shadow_arguments)
16457 {
16458 // Might change the variable name if it already exists in this function.
16459 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
16460 // to use same name for variables.
16461 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
16462 add_local_variable_name(id: arg.id);
16463
16464 arglist.push_back(t: argument_decl(arg));
16465
16466 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
16467 auto *var = maybe_get<SPIRVariable>(id: arg.id);
16468 if (var)
16469 var->parameter = &arg;
16470 }
16471
16472 decl += merge(list: arglist);
16473 decl += ")";
16474 statement(ts&: decl);
16475}
16476
16477void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
16478{
16479 // Avoid potential cycles.
16480 if (func.active)
16481 return;
16482 func.active = true;
16483
16484 // If we depend on a function, emit that function before we emit our own function.
16485 for (auto block : func.blocks)
16486 {
16487 auto &b = get<SPIRBlock>(id: block);
16488 for (auto &i : b.ops)
16489 {
16490 auto ops = stream(instr: i);
16491 auto op = static_cast<Op>(i.op);
16492
16493 if (op == OpFunctionCall)
16494 {
16495 // Recursively emit functions which are called.
16496 uint32_t id = ops[2];
16497 emit_function(func&: get<SPIRFunction>(id), return_flags: ir.meta[ops[1]].decoration.decoration_flags);
16498 }
16499 }
16500 }
16501
16502 if (func.entry_line.file_id != 0)
16503 emit_line_directive(file_id: func.entry_line.file_id, line_literal: func.entry_line.line_literal);
16504 emit_function_prototype(func, return_flags);
16505 begin_scope();
16506
16507 if (func.self == ir.default_entry_point)
16508 emit_entry_point_declarations();
16509
16510 current_function = &func;
16511 auto &entry_block = get<SPIRBlock>(id: func.entry_block);
16512
16513 sort(first: begin(cont&: func.constant_arrays_needed_on_stack), last: end(cont&: func.constant_arrays_needed_on_stack));
16514 for (auto &array : func.constant_arrays_needed_on_stack)
16515 {
16516 auto &c = get<SPIRConstant>(id: array);
16517 auto &type = get<SPIRType>(id: c.constant_type);
16518 statement(ts: variable_decl(type, name: join(ts: "_", ts&: array, ts: "_array_copy")), ts: " = ", ts: constant_expression(c), ts: ";");
16519 }
16520
16521 for (auto &v : func.local_variables)
16522 {
16523 auto &var = get<SPIRVariable>(id: v);
16524 var.deferred_declaration = false;
16525 if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
16526 continue;
16527
16528 if (variable_decl_is_remapped_storage(var, storage: StorageClassWorkgroup))
16529 {
16530 // Special variable type which cannot have initializer,
16531 // need to be declared as standalone variables.
16532 // Comes from MSL which can push global variables as local variables in main function.
16533 add_local_variable_name(id: var.self);
16534 statement(ts: variable_decl(variable: var), ts: ";");
16535 var.deferred_declaration = false;
16536 }
16537 else if (var.storage == StorageClassPrivate)
16538 {
16539 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
16540 // Comes from MSL which can push global variables as local variables in main function.
16541 // We could just declare them right now, but we would miss out on an important initialization case which is
16542 // LUT declaration in MSL.
16543 // If we don't declare the variable when it is assigned we're forced to go through a helper function
16544 // which copies elements one by one.
16545 add_local_variable_name(id: var.self);
16546
16547 if (var.initializer)
16548 {
16549 statement(ts: variable_decl(variable: var), ts: ";");
16550 var.deferred_declaration = false;
16551 }
16552 else
16553 {
16554 auto &dominated = entry_block.dominated_variables;
16555 if (find(first: begin(cont&: dominated), last: end(cont&: dominated), val: var.self) == end(cont&: dominated))
16556 entry_block.dominated_variables.push_back(t: var.self);
16557 var.deferred_declaration = true;
16558 }
16559 }
16560 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
16561 {
16562 // No need to declare this variable, it has a static expression.
16563 var.deferred_declaration = false;
16564 }
16565 else if (expression_is_lvalue(id: v))
16566 {
16567 add_local_variable_name(id: var.self);
16568
16569 // Loop variables should never be declared early, they are explicitly emitted in a loop.
16570 if (var.initializer && !var.loop_variable)
16571 statement(ts: variable_decl_function_local(var), ts: ";");
16572 else
16573 {
16574 // Don't declare variable until first use to declutter the GLSL output quite a lot.
16575 // If we don't touch the variable before first branch,
16576 // declare it then since we need variable declaration to be in top scope.
16577 var.deferred_declaration = true;
16578 }
16579 }
16580 else
16581 {
16582 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
16583 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
16584 // This means that when we OpStore to these variables, we just write in the expression ID directly.
16585 // This breaks any kind of branching, since the variable must be statically assigned.
16586 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
16587 var.statically_assigned = true;
16588 }
16589
16590 var.loop_variable_enable = false;
16591
16592 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
16593 if (var.loop_variable)
16594 {
16595 var.deferred_declaration = false;
16596 // Need to reset the static expression so we can fallback to initializer if need be.
16597 var.static_expression = 0;
16598 }
16599 }
16600
16601 // Enforce declaration order for regression testing purposes.
16602 for (auto &block_id : func.blocks)
16603 {
16604 auto &block = get<SPIRBlock>(id: block_id);
16605 sort(first: begin(cont&: block.dominated_variables), last: end(cont&: block.dominated_variables));
16606 }
16607
16608 for (auto &line : current_function->fixup_hooks_in)
16609 line();
16610
16611 emit_block_chain(block&: entry_block);
16612
16613 end_scope();
16614 processing_entry_point = false;
16615 statement(ts: "");
16616
16617 // Make sure deferred declaration state for local variables is cleared when we are done with function.
16618 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
16619 for (auto &v : func.local_variables)
16620 {
16621 auto &var = get<SPIRVariable>(id: v);
16622 var.deferred_declaration = false;
16623 }
16624}
16625
16626void CompilerGLSL::emit_fixup()
16627{
16628 if (is_vertex_like_shader())
16629 {
16630 if (options.vertex.fixup_clipspace)
16631 {
16632 const char *suffix = backend.float_literal_suffix ? "f" : "";
16633 statement(ts: "gl_Position.z = 2.0", ts&: suffix, ts: " * gl_Position.z - gl_Position.w;");
16634 }
16635
16636 if (options.vertex.flip_vert_y)
16637 statement(ts: "gl_Position.y = -gl_Position.y;");
16638 }
16639}
16640
16641void CompilerGLSL::flush_phi(BlockID from, BlockID to)
16642{
16643 auto &child = get<SPIRBlock>(id: to);
16644 if (child.ignore_phi_from_block == from)
16645 return;
16646
16647 unordered_set<uint32_t> temporary_phi_variables;
16648
16649 for (auto itr = begin(cont&: child.phi_variables); itr != end(cont&: child.phi_variables); ++itr)
16650 {
16651 auto &phi = *itr;
16652
16653 if (phi.parent == from)
16654 {
16655 auto &var = get<SPIRVariable>(id: phi.function_variable);
16656
16657 // A Phi variable might be a loop variable, so flush to static expression.
16658 if (var.loop_variable && !var.loop_variable_enable)
16659 var.static_expression = phi.local_variable;
16660 else
16661 {
16662 flush_variable_declaration(id: phi.function_variable);
16663
16664 // Check if we are going to write to a Phi variable that another statement will read from
16665 // as part of another Phi node in our target block.
16666 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
16667 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
16668 bool need_saved_temporary =
16669 find_if(first: itr + 1, last: end(cont&: child.phi_variables), pred: [&](const SPIRBlock::Phi &future_phi) -> bool {
16670 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
16671 }) != end(cont&: child.phi_variables);
16672
16673 if (need_saved_temporary)
16674 {
16675 // Need to make sure we declare the phi variable with a copy at the right scope.
16676 // We cannot safely declare a temporary here since we might be inside a continue block.
16677 if (!var.allocate_temporary_copy)
16678 {
16679 var.allocate_temporary_copy = true;
16680 force_recompile();
16681 }
16682 statement(ts: "_", ts&: phi.function_variable, ts: "_copy", ts: " = ", ts: to_name(id: phi.function_variable), ts: ";");
16683 temporary_phi_variables.insert(x: phi.function_variable);
16684 }
16685
16686 // This might be called in continue block, so make sure we
16687 // use this to emit ESSL 1.0 compliant increments/decrements.
16688 auto lhs = to_expression(id: phi.function_variable);
16689
16690 string rhs;
16691 if (temporary_phi_variables.count(x: phi.local_variable))
16692 rhs = join(ts: "_", ts&: phi.local_variable, ts: "_copy");
16693 else
16694 rhs = to_pointer_expression(id: phi.local_variable);
16695
16696 if (!optimize_read_modify_write(type: get<SPIRType>(id: var.basetype), lhs, rhs))
16697 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
16698 }
16699
16700 register_write(chain: phi.function_variable);
16701 }
16702 }
16703}
16704
16705void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
16706{
16707 auto &to_block = get<SPIRBlock>(id: to);
16708 if (from == to)
16709 return;
16710
16711 assert(is_continue(to));
16712 if (to_block.complex_continue)
16713 {
16714 // Just emit the whole block chain as is.
16715 auto usage_counts = expression_usage_counts;
16716
16717 emit_block_chain(block&: to_block);
16718
16719 // Expression usage counts are moot after returning from the continue block.
16720 expression_usage_counts = usage_counts;
16721 }
16722 else
16723 {
16724 auto &from_block = get<SPIRBlock>(id: from);
16725 bool outside_control_flow = false;
16726 uint32_t loop_dominator = 0;
16727
16728 // FIXME: Refactor this to not use the old loop_dominator tracking.
16729 if (from_block.merge_block)
16730 {
16731 // If we are a loop header, we don't set the loop dominator,
16732 // so just use "self" here.
16733 loop_dominator = from;
16734 }
16735 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
16736 {
16737 loop_dominator = from_block.loop_dominator;
16738 }
16739
16740 if (loop_dominator != 0)
16741 {
16742 auto &cfg = get_cfg_for_current_function();
16743
16744 // For non-complex continue blocks, we implicitly branch to the continue block
16745 // by having the continue block be part of the loop header in for (; ; continue-block).
16746 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: loop_dominator, to: from);
16747 }
16748
16749 // Some simplification for for-loops. We always end up with a useless continue;
16750 // statement since we branch to a loop block.
16751 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
16752 // we can avoid writing out an explicit continue statement.
16753 // Similar optimization to return statements if we know we're outside flow control.
16754 if (!outside_control_flow)
16755 statement(ts: "continue;");
16756 }
16757}
16758
16759void CompilerGLSL::branch(BlockID from, BlockID to)
16760{
16761 flush_phi(from, to);
16762 flush_control_dependent_expressions(block: from);
16763
16764 bool to_is_continue = is_continue(next: to);
16765
16766 // This is only a continue if we branch to our loop dominator.
16767 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(id: from).loop_dominator == to)
16768 {
16769 // This can happen if we had a complex continue block which was emitted.
16770 // Once the continue block tries to branch to the loop header, just emit continue;
16771 // and end the chain here.
16772 statement(ts: "continue;");
16773 }
16774 else if (from != to && is_break(next: to))
16775 {
16776 // We cannot break to ourselves, so check explicitly for from != to.
16777 // This case can trigger if a loop header is all three of these things:
16778 // - Continue block
16779 // - Loop header
16780 // - Break merge target all at once ...
16781
16782 // Very dirty workaround.
16783 // Switch constructs are able to break, but they cannot break out of a loop at the same time,
16784 // yet SPIR-V allows it.
16785 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
16786 // write to the ladder here, and defer the break.
16787 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
16788 if (is_loop_break(next: to))
16789 {
16790 for (size_t n = current_emitting_switch_stack.size(); n; n--)
16791 {
16792 auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
16793
16794 if (current_emitting_switch &&
16795 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
16796 get<SPIRBlock>(id: current_emitting_switch->loop_dominator).merge_block == to)
16797 {
16798 if (!current_emitting_switch->need_ladder_break)
16799 {
16800 force_recompile();
16801 current_emitting_switch->need_ladder_break = true;
16802 }
16803
16804 statement(ts: "_", ts&: current_emitting_switch->self, ts: "_ladder_break = true;");
16805 }
16806 else
16807 break;
16808 }
16809 }
16810 statement(ts: "break;");
16811 }
16812 else if (to_is_continue || from == to)
16813 {
16814 // For from == to case can happen for a do-while loop which branches into itself.
16815 // We don't mark these cases as continue blocks, but the only possible way to branch into
16816 // ourselves is through means of continue blocks.
16817
16818 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
16819 // We can branch to the continue block after we merge execution.
16820
16821 // Here we make use of structured control flow rules from spec:
16822 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
16823 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
16824 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
16825 auto &block_meta = ir.block_meta[to];
16826 bool branching_to_merge =
16827 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
16828 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
16829 if (!to_is_continue || !branching_to_merge)
16830 branch_to_continue(from, to);
16831 }
16832 else if (!is_conditional(next: to))
16833 emit_block_chain(block&: get<SPIRBlock>(id: to));
16834
16835 // It is important that we check for break before continue.
16836 // A block might serve two purposes, a break block for the inner scope, and
16837 // a continue block in the outer scope.
16838 // Inner scope always takes precedence.
16839}
16840
16841void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
16842{
16843 auto &from_block = get<SPIRBlock>(id: from);
16844 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
16845
16846 // If we branch directly to our selection merge target, we don't need a code path.
16847 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, to: true_block);
16848 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, to: false_block);
16849
16850 if (!true_block_needs_code && !false_block_needs_code)
16851 return;
16852
16853 // We might have a loop merge here. Only consider selection flattening constructs.
16854 // Loop hints are handled explicitly elsewhere.
16855 if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
16856 emit_block_hints(block: from_block);
16857
16858 if (true_block_needs_code)
16859 {
16860 statement(ts: "if (", ts: to_expression(id: cond), ts: ")");
16861 begin_scope();
16862 branch(from, to: true_block);
16863 end_scope();
16864
16865 if (false_block_needs_code)
16866 {
16867 statement(ts: "else");
16868 begin_scope();
16869 branch(from, to: false_block);
16870 end_scope();
16871 }
16872 }
16873 else if (false_block_needs_code)
16874 {
16875 // Only need false path, use negative conditional.
16876 statement(ts: "if (!", ts: to_enclosed_expression(id: cond), ts: ")");
16877 begin_scope();
16878 branch(from, to: false_block);
16879 end_scope();
16880 }
16881}
16882
16883// FIXME: This currently cannot handle complex continue blocks
16884// as in do-while.
16885// This should be seen as a "trivial" continue block.
16886string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
16887{
16888 auto *block = &get<SPIRBlock>(id: continue_block);
16889
16890 // While emitting the continue block, declare_temporary will check this
16891 // if we have to emit temporaries.
16892 current_continue_block = block;
16893
16894 SmallVector<string> statements;
16895
16896 // Capture all statements into our list.
16897 auto *old = redirect_statement;
16898 redirect_statement = &statements;
16899
16900 // Stamp out all blocks one after each other.
16901 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
16902 {
16903 // Write out all instructions we have in this block.
16904 emit_block_instructions(block&: *block);
16905
16906 // For plain branchless for/while continue blocks.
16907 if (block->next_block)
16908 {
16909 flush_phi(from: continue_block, to: block->next_block);
16910 block = &get<SPIRBlock>(id: block->next_block);
16911 }
16912 // For do while blocks. The last block will be a select block.
16913 else if (block->true_block && follow_true_block)
16914 {
16915 flush_phi(from: continue_block, to: block->true_block);
16916 block = &get<SPIRBlock>(id: block->true_block);
16917 }
16918 else if (block->false_block && follow_false_block)
16919 {
16920 flush_phi(from: continue_block, to: block->false_block);
16921 block = &get<SPIRBlock>(id: block->false_block);
16922 }
16923 else
16924 {
16925 SPIRV_CROSS_THROW("Invalid continue block detected!");
16926 }
16927 }
16928
16929 // Restore old pointer.
16930 redirect_statement = old;
16931
16932 // Somewhat ugly, strip off the last ';' since we use ',' instead.
16933 // Ideally, we should select this behavior in statement().
16934 for (auto &s : statements)
16935 {
16936 if (!s.empty() && s.back() == ';')
16937 s.erase(pos: s.size() - 1, n: 1);
16938 }
16939
16940 current_continue_block = nullptr;
16941 return merge(list: statements);
16942}
16943
16944void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
16945{
16946 // While loops do not take initializers, so declare all of them outside.
16947 for (auto &loop_var : block.loop_variables)
16948 {
16949 auto &var = get<SPIRVariable>(id: loop_var);
16950 statement(ts: variable_decl(variable: var), ts: ";");
16951 }
16952}
16953
16954string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
16955{
16956 if (block.loop_variables.empty())
16957 return "";
16958
16959 bool same_types = for_loop_initializers_are_same_type(block);
16960 // We can only declare for loop initializers if all variables are of same type.
16961 // If we cannot do this, declare individual variables before the loop header.
16962
16963 // We might have a loop variable candidate which was not assigned to for some reason.
16964 uint32_t missing_initializers = 0;
16965 for (auto &variable : block.loop_variables)
16966 {
16967 uint32_t expr = get<SPIRVariable>(id: variable).static_expression;
16968
16969 // Sometimes loop variables are initialized with OpUndef, but we can just declare
16970 // a plain variable without initializer in this case.
16971 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
16972 missing_initializers++;
16973 }
16974
16975 if (block.loop_variables.size() == 1 && missing_initializers == 0)
16976 {
16977 return variable_decl(variable: get<SPIRVariable>(id: block.loop_variables.front()));
16978 }
16979 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
16980 {
16981 for (auto &loop_var : block.loop_variables)
16982 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
16983 return "";
16984 }
16985 else
16986 {
16987 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
16988 // Separate the two streams.
16989 string expr;
16990
16991 for (auto &loop_var : block.loop_variables)
16992 {
16993 uint32_t static_expr = get<SPIRVariable>(id: loop_var).static_expression;
16994 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
16995 {
16996 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
16997 }
16998 else
16999 {
17000 auto &var = get<SPIRVariable>(id: loop_var);
17001 auto &type = get_variable_data_type(var);
17002 if (expr.empty())
17003 {
17004 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
17005 expr = join(ts: to_qualifiers_glsl(id: var.self), ts: type_to_glsl(type), ts: " ");
17006 }
17007 else
17008 {
17009 expr += ", ";
17010 // In MSL, being based on C++, the asterisk marking a pointer
17011 // binds to the identifier, not the type.
17012 if (type.pointer)
17013 expr += "* ";
17014 }
17015
17016 expr += join(ts: to_name(id: loop_var), ts: " = ", ts: to_pointer_expression(id: var.static_expression));
17017 }
17018 }
17019 return expr;
17020 }
17021}
17022
17023bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
17024{
17025 if (block.loop_variables.size() <= 1)
17026 return true;
17027
17028 uint32_t expected = 0;
17029 Bitset expected_flags;
17030 for (auto &var : block.loop_variables)
17031 {
17032 // Don't care about uninitialized variables as they will not be part of the initializers.
17033 uint32_t expr = get<SPIRVariable>(id: var).static_expression;
17034 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
17035 continue;
17036
17037 if (expected == 0)
17038 {
17039 expected = get<SPIRVariable>(id: var).basetype;
17040 expected_flags = get_decoration_bitset(id: var);
17041 }
17042 else if (expected != get<SPIRVariable>(id: var).basetype)
17043 return false;
17044
17045 // Precision flags and things like that must also match.
17046 if (expected_flags != get_decoration_bitset(id: var))
17047 return false;
17048 }
17049
17050 return true;
17051}
17052
17053void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block)
17054{
17055 // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise,
17056 // which breaks loop optimizations.
17057 // Any line directive would be declared outside the loop body, which would just be confusing either way.
17058 bool old_block_debug_directives = block_debug_directives;
17059 block_debug_directives = true;
17060 emit_block_instructions(block);
17061 block_debug_directives = old_block_debug_directives;
17062}
17063
17064bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
17065{
17066 SPIRBlock::ContinueBlockType continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
17067
17068 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
17069 {
17070 uint32_t current_count = statement_count;
17071 // If we're trying to create a true for loop,
17072 // we need to make sure that all opcodes before branch statement do not actually emit any code.
17073 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
17074 emit_block_instructions_with_masked_debug(block);
17075
17076 bool condition_is_temporary = forced_temporaries.find(x: block.condition) == end(cont&: forced_temporaries);
17077
17078 bool flushes_phi = flush_phi_required(from: block.self, to: block.true_block) ||
17079 flush_phi_required(from: block.self, to: block.false_block);
17080
17081 // This can work! We only did trivial things which could be forwarded in block body!
17082 if (!flushes_phi && current_count == statement_count && condition_is_temporary)
17083 {
17084 switch (continue_type)
17085 {
17086 case SPIRBlock::ForLoop:
17087 {
17088 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
17089 flush_undeclared_variables(block);
17090
17091 // Important that we do this in this order because
17092 // emitting the continue block can invalidate the condition expression.
17093 auto initializer = emit_for_loop_initializers(block);
17094 auto condition = to_expression(id: block.condition);
17095
17096 // Condition might have to be inverted.
17097 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17098 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17099
17100 emit_block_hints(block);
17101 if (method != SPIRBlock::MergeToSelectContinueForLoop)
17102 {
17103 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
17104 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
17105 }
17106 else
17107 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; )");
17108 break;
17109 }
17110
17111 case SPIRBlock::WhileLoop:
17112 {
17113 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
17114 flush_undeclared_variables(block);
17115 emit_while_loop_initializers(block);
17116 emit_block_hints(block);
17117
17118 auto condition = to_expression(id: block.condition);
17119 // Condition might have to be inverted.
17120 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17121 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17122
17123 statement(ts: "while (", ts&: condition, ts: ")");
17124 break;
17125 }
17126
17127 default:
17128 block.disable_block_optimization = true;
17129 force_recompile();
17130 begin_scope(); // We'll see an end_scope() later.
17131 return false;
17132 }
17133
17134 begin_scope();
17135 return true;
17136 }
17137 else
17138 {
17139 block.disable_block_optimization = true;
17140 force_recompile();
17141 begin_scope(); // We'll see an end_scope() later.
17142 return false;
17143 }
17144 }
17145 else if (method == SPIRBlock::MergeToDirectForLoop)
17146 {
17147 auto &child = get<SPIRBlock>(id: block.next_block);
17148
17149 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
17150 flush_undeclared_variables(block&: child);
17151
17152 uint32_t current_count = statement_count;
17153
17154 // If we're trying to create a true for loop,
17155 // we need to make sure that all opcodes before branch statement do not actually emit any code.
17156 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
17157 emit_block_instructions_with_masked_debug(block&: child);
17158
17159 bool condition_is_temporary = forced_temporaries.find(x: child.condition) == end(cont&: forced_temporaries);
17160
17161 bool flushes_phi = flush_phi_required(from: child.self, to: child.true_block) ||
17162 flush_phi_required(from: child.self, to: child.false_block);
17163
17164 if (!flushes_phi && current_count == statement_count && condition_is_temporary)
17165 {
17166 uint32_t target_block = child.true_block;
17167
17168 switch (continue_type)
17169 {
17170 case SPIRBlock::ForLoop:
17171 {
17172 // Important that we do this in this order because
17173 // emitting the continue block can invalidate the condition expression.
17174 auto initializer = emit_for_loop_initializers(block);
17175 auto condition = to_expression(id: child.condition);
17176
17177 // Condition might have to be inverted.
17178 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17179 {
17180 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17181 target_block = child.false_block;
17182 }
17183
17184 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
17185 emit_block_hints(block);
17186 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
17187 break;
17188 }
17189
17190 case SPIRBlock::WhileLoop:
17191 {
17192 emit_while_loop_initializers(block);
17193 emit_block_hints(block);
17194
17195 auto condition = to_expression(id: child.condition);
17196 // Condition might have to be inverted.
17197 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17198 {
17199 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17200 target_block = child.false_block;
17201 }
17202
17203 statement(ts: "while (", ts&: condition, ts: ")");
17204 break;
17205 }
17206
17207 default:
17208 block.disable_block_optimization = true;
17209 force_recompile();
17210 begin_scope(); // We'll see an end_scope() later.
17211 return false;
17212 }
17213
17214 begin_scope();
17215 branch(from: child.self, to: target_block);
17216 return true;
17217 }
17218 else
17219 {
17220 block.disable_block_optimization = true;
17221 force_recompile();
17222 begin_scope(); // We'll see an end_scope() later.
17223 return false;
17224 }
17225 }
17226 else
17227 return false;
17228}
17229
17230void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
17231{
17232 for (auto &v : block.dominated_variables)
17233 flush_variable_declaration(id: v);
17234}
17235
17236void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
17237{
17238 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
17239 // Need to sort these to ensure that reference output is stable.
17240 sort(first: begin(cont&: temporaries), last: end(cont&: temporaries),
17241 comp: [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
17242
17243 for (auto &tmp : temporaries)
17244 {
17245 auto &type = get<SPIRType>(id: tmp.first);
17246
17247 // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
17248 // This should be ignored unless we're doing actual variable pointers and backend supports it.
17249 // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
17250 if (type.pointer && !backend.native_pointers)
17251 continue;
17252
17253 add_local_variable_name(id: tmp.second);
17254 auto &flags = get_decoration_bitset(id: tmp.second);
17255
17256 // Not all targets support pointer literals, so don't bother with that case.
17257 string initializer;
17258 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
17259 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: tmp.first));
17260
17261 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: tmp.second)), ts&: initializer, ts: ";");
17262
17263 hoisted_temporaries.insert(x: tmp.second);
17264 forced_temporaries.insert(x: tmp.second);
17265
17266 // The temporary might be read from before it's assigned, set up the expression now.
17267 set<SPIRExpression>(id: tmp.second, args: to_name(id: tmp.second), args&: tmp.first, args: true);
17268
17269 // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
17270 // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
17271 auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: tmp.second);
17272 if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
17273 {
17274 uint32_t mirror_id = mirrored_precision_itr->second;
17275 auto &mirror_flags = get_decoration_bitset(id: mirror_id);
17276 statement(ts: flags_to_qualifiers_glsl(type, flags: mirror_flags),
17277 ts: variable_decl(type, name: to_name(id: mirror_id)),
17278 ts&: initializer, ts: ";");
17279 // The temporary might be read from before it's assigned, set up the expression now.
17280 set<SPIRExpression>(id: mirror_id, args: to_name(id: mirror_id), args&: tmp.first, args: true);
17281 hoisted_temporaries.insert(x: mirror_id);
17282 }
17283 }
17284}
17285
17286void CompilerGLSL::emit_block_chain(SPIRBlock &block)
17287{
17288 bool select_branch_to_true_block = false;
17289 bool select_branch_to_false_block = false;
17290 bool skip_direct_branch = false;
17291 bool emitted_loop_header_variables = false;
17292 bool force_complex_continue_block = false;
17293 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
17294
17295 if (block.merge == SPIRBlock::MergeLoop)
17296 add_loop_level();
17297
17298 // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries.
17299 for (auto var_id : block.dominated_variables)
17300 {
17301 auto &var = get<SPIRVariable>(id: var_id);
17302 if (var.phi_variable)
17303 {
17304 auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: var_id);
17305 if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() &&
17306 find_if(first: block.declare_temporary.begin(), last: block.declare_temporary.end(),
17307 pred: [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) {
17308 return p.second == mirrored_precision_itr->second;
17309 }) == block.declare_temporary.end())
17310 {
17311 block.declare_temporary.push_back(t: { var.basetype, mirrored_precision_itr->second });
17312 }
17313 }
17314 }
17315
17316 emit_hoisted_temporaries(temporaries&: block.declare_temporary);
17317
17318 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
17319 if (block.continue_block)
17320 {
17321 continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
17322 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
17323 if (continue_type == SPIRBlock::ComplexLoop)
17324 block.complex_continue = true;
17325 }
17326
17327 // If we have loop variables, stop masking out access to the variable now.
17328 for (auto var_id : block.loop_variables)
17329 {
17330 auto &var = get<SPIRVariable>(id: var_id);
17331 var.loop_variable_enable = true;
17332 // We're not going to declare the variable directly, so emit a copy here.
17333 emit_variable_temporary_copies(var);
17334 }
17335
17336 // Remember deferred declaration state. We will restore it before returning.
17337 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
17338 for (size_t i = 0; i < block.dominated_variables.size(); i++)
17339 {
17340 uint32_t var_id = block.dominated_variables[i];
17341 auto &var = get<SPIRVariable>(id: var_id);
17342 rearm_dominated_variables[i] = var.deferred_declaration;
17343 }
17344
17345 // This is the method often used by spirv-opt to implement loops.
17346 // The loop header goes straight into the continue block.
17347 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
17348 // it *MUST* be used in the continue block. This loop method will not work.
17349 if (!is_legacy_es() && block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectContinueForLoop))
17350 {
17351 flush_undeclared_variables(block);
17352 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectContinueForLoop))
17353 {
17354 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17355 select_branch_to_false_block = true;
17356 else
17357 select_branch_to_true_block = true;
17358
17359 emitted_loop_header_variables = true;
17360 force_complex_continue_block = true;
17361 }
17362 }
17363 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
17364 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectForLoop))
17365 {
17366 flush_undeclared_variables(block);
17367 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectForLoop))
17368 {
17369 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
17370 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17371 select_branch_to_false_block = true;
17372 else
17373 select_branch_to_true_block = true;
17374
17375 emitted_loop_header_variables = true;
17376 }
17377 }
17378 // This is the newer loop behavior in glslang which branches from Loop header directly to
17379 // a new block, which in turn has a OpBranchSelection without a selection merge.
17380 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToDirectForLoop))
17381 {
17382 flush_undeclared_variables(block);
17383 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToDirectForLoop))
17384 {
17385 skip_direct_branch = true;
17386 emitted_loop_header_variables = true;
17387 }
17388 }
17389 else if (continue_type == SPIRBlock::DoWhileLoop)
17390 {
17391 flush_undeclared_variables(block);
17392 emit_while_loop_initializers(block);
17393 emitted_loop_header_variables = true;
17394 // We have some temporaries where the loop header is the dominator.
17395 // We risk a case where we have code like:
17396 // for (;;) { create-temporary; break; } consume-temporary;
17397 // so force-declare temporaries here.
17398 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
17399 statement(ts: "do");
17400 begin_scope();
17401
17402 emit_block_instructions(block);
17403 }
17404 else if (block.merge == SPIRBlock::MergeLoop)
17405 {
17406 flush_undeclared_variables(block);
17407 emit_while_loop_initializers(block);
17408 emitted_loop_header_variables = true;
17409
17410 // We have a generic loop without any distinguishable pattern like for, while or do while.
17411 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
17412 continue_type = SPIRBlock::ComplexLoop;
17413
17414 // We have some temporaries where the loop header is the dominator.
17415 // We risk a case where we have code like:
17416 // for (;;) { create-temporary; break; } consume-temporary;
17417 // so force-declare temporaries here.
17418 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
17419 emit_block_hints(block);
17420 statement(ts: "for (;;)");
17421 begin_scope();
17422
17423 emit_block_instructions(block);
17424 }
17425 else
17426 {
17427 emit_block_instructions(block);
17428 }
17429
17430 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
17431 // as writes to said loop variables might have been masked out, we need a recompile.
17432 if (!emitted_loop_header_variables && !block.loop_variables.empty())
17433 {
17434 force_recompile_guarantee_forward_progress();
17435 for (auto var : block.loop_variables)
17436 get<SPIRVariable>(id: var).loop_variable = false;
17437 block.loop_variables.clear();
17438 }
17439
17440 flush_undeclared_variables(block);
17441 bool emit_next_block = true;
17442
17443 // Handle end of block.
17444 switch (block.terminator)
17445 {
17446 case SPIRBlock::Direct:
17447 // True when emitting complex continue block.
17448 if (block.loop_dominator == block.next_block)
17449 {
17450 branch(from: block.self, to: block.next_block);
17451 emit_next_block = false;
17452 }
17453 // True if MergeToDirectForLoop succeeded.
17454 else if (skip_direct_branch)
17455 emit_next_block = false;
17456 else if (is_continue(next: block.next_block) || is_break(next: block.next_block) || is_conditional(next: block.next_block))
17457 {
17458 branch(from: block.self, to: block.next_block);
17459 emit_next_block = false;
17460 }
17461 break;
17462
17463 case SPIRBlock::Select:
17464 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
17465 if (select_branch_to_true_block)
17466 {
17467 if (force_complex_continue_block)
17468 {
17469 assert(block.true_block == block.continue_block);
17470
17471 // We're going to emit a continue block directly here, so make sure it's marked as complex.
17472 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
17473 bool old_complex = complex_continue;
17474 complex_continue = true;
17475 branch(from: block.self, to: block.true_block);
17476 complex_continue = old_complex;
17477 }
17478 else
17479 branch(from: block.self, to: block.true_block);
17480 }
17481 else if (select_branch_to_false_block)
17482 {
17483 if (force_complex_continue_block)
17484 {
17485 assert(block.false_block == block.continue_block);
17486
17487 // We're going to emit a continue block directly here, so make sure it's marked as complex.
17488 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
17489 bool old_complex = complex_continue;
17490 complex_continue = true;
17491 branch(from: block.self, to: block.false_block);
17492 complex_continue = old_complex;
17493 }
17494 else
17495 branch(from: block.self, to: block.false_block);
17496 }
17497 else
17498 branch(from: block.self, cond: block.condition, true_block: block.true_block, false_block: block.false_block);
17499 break;
17500
17501 case SPIRBlock::MultiSelect:
17502 {
17503 auto &type = expression_type(id: block.condition);
17504 bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
17505 type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
17506
17507 if (block.merge == SPIRBlock::MergeNone)
17508 SPIRV_CROSS_THROW("Switch statement is not structured");
17509
17510 if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
17511 {
17512 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
17513 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
17514 }
17515
17516 const char *label_suffix = "";
17517 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
17518 label_suffix = "u";
17519 else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
17520 label_suffix = "l";
17521 else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
17522 label_suffix = "ul";
17523 else if (type.basetype == SPIRType::UShort)
17524 label_suffix = backend.uint16_t_literal_suffix;
17525 else if (type.basetype == SPIRType::Short)
17526 label_suffix = backend.int16_t_literal_suffix;
17527
17528 current_emitting_switch_stack.push_back(t: &block);
17529
17530 if (block.need_ladder_break)
17531 statement(ts: "bool _", ts&: block.self, ts: "_ladder_break = false;");
17532
17533 // Find all unique case constructs.
17534 unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
17535 SmallVector<uint32_t> block_declaration_order;
17536 SmallVector<uint64_t> literals_to_merge;
17537
17538 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
17539 // and let the default: block handle it.
17540 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
17541 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
17542 auto &cases = get_case_list(block);
17543 for (auto &c : cases)
17544 {
17545 if (c.block != block.next_block && c.block != block.default_block)
17546 {
17547 if (!case_constructs.count(x: c.block))
17548 block_declaration_order.push_back(t: c.block);
17549 case_constructs[c.block].push_back(t: c.value);
17550 }
17551 else if (c.block == block.next_block && block.default_block != block.next_block)
17552 {
17553 // We might have to flush phi inside specific case labels.
17554 // If we can piggyback on default:, do so instead.
17555 literals_to_merge.push_back(t: c.value);
17556 }
17557 }
17558
17559 // Empty literal array -> default.
17560 if (block.default_block != block.next_block)
17561 {
17562 auto &default_block = get<SPIRBlock>(id: block.default_block);
17563
17564 // We need to slide in the default block somewhere in this chain
17565 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
17566 // Only consider trivial fall-through cases here.
17567 size_t num_blocks = block_declaration_order.size();
17568 bool injected_block = false;
17569
17570 for (size_t i = 0; i < num_blocks; i++)
17571 {
17572 auto &case_block = get<SPIRBlock>(id: block_declaration_order[i]);
17573 if (execution_is_direct_branch(from: case_block, to: default_block))
17574 {
17575 // Fallthrough to default block, we must inject the default block here.
17576 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i + 1, value: block.default_block);
17577 injected_block = true;
17578 break;
17579 }
17580 else if (execution_is_direct_branch(from: default_block, to: case_block))
17581 {
17582 // Default case is falling through to another case label, we must inject the default block here.
17583 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i, value: block.default_block);
17584 injected_block = true;
17585 break;
17586 }
17587 }
17588
17589 // Order does not matter.
17590 if (!injected_block)
17591 block_declaration_order.push_back(t: block.default_block);
17592 else if (is_legacy_es())
17593 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
17594
17595 case_constructs[block.default_block] = {};
17596 }
17597
17598 size_t num_blocks = block_declaration_order.size();
17599
17600 const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
17601 {
17602 if (is_unsigned_case)
17603 return convert_to_string(t: literal);
17604
17605 // For smaller cases, the literals are compiled as 32 bit wide
17606 // literals so we don't need to care for all sizes specifically.
17607 if (width <= 32)
17608 {
17609 return convert_to_string(t: int64_t(int32_t(literal)));
17610 }
17611
17612 return convert_to_string(t: int64_t(literal));
17613 };
17614
17615 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
17616 const char *suffix) -> string {
17617 string ret;
17618 size_t count = labels.size();
17619 for (size_t i = 0; i < count; i++)
17620 {
17621 if (i)
17622 ret += " || ";
17623 ret += join(ts: count > 1 ? "(" : "", ts: to_enclosed_expression(id: condition), ts: " == ", ts: labels[i], ts&: suffix,
17624 ts: count > 1 ? ")" : "");
17625 }
17626 return ret;
17627 };
17628
17629 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
17630 // we need to flush phi nodes outside the switch block in a branch,
17631 // and skip any Phi handling inside the case label to make fall-through work as expected.
17632 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
17633 // inside the case label if at all possible.
17634 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
17635 {
17636 if (flush_phi_required(from: block.self, to: block_declaration_order[i]) &&
17637 flush_phi_required(from: block_declaration_order[i - 1], to: block_declaration_order[i]))
17638 {
17639 uint32_t target_block = block_declaration_order[i];
17640
17641 // Make sure we flush Phi, it might have been marked to be ignored earlier.
17642 get<SPIRBlock>(id: target_block).ignore_phi_from_block = 0;
17643
17644 auto &literals = case_constructs[target_block];
17645
17646 if (literals.empty())
17647 {
17648 // Oh boy, gotta make a complete negative test instead! o.o
17649 // Find all possible literals that would *not* make us enter the default block.
17650 // If none of those literals match, we flush Phi ...
17651 SmallVector<string> conditions;
17652 for (size_t j = 0; j < num_blocks; j++)
17653 {
17654 auto &negative_literals = case_constructs[block_declaration_order[j]];
17655 for (auto &case_label : negative_literals)
17656 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
17657 ts: " != ", ts: to_case_label(case_label, type.width, unsigned_case)));
17658 }
17659
17660 statement(ts: "if (", ts: merge(list: conditions, between: " && "), ts: ")");
17661 begin_scope();
17662 flush_phi(from: block.self, to: target_block);
17663 end_scope();
17664 }
17665 else
17666 {
17667 SmallVector<string> conditions;
17668 conditions.reserve(count: literals.size());
17669 for (auto &case_label : literals)
17670 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
17671 ts: " == ", ts: to_case_label(case_label, type.width, unsigned_case)));
17672 statement(ts: "if (", ts: merge(list: conditions, between: " || "), ts: ")");
17673 begin_scope();
17674 flush_phi(from: block.self, to: target_block);
17675 end_scope();
17676 }
17677
17678 // Mark the block so that we don't flush Phi from header to case label.
17679 get<SPIRBlock>(id: target_block).ignore_phi_from_block = block.self;
17680 }
17681 }
17682
17683 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
17684 // non-structured exits with the help of a switch block.
17685 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
17686 bool block_like_switch = cases.empty();
17687
17688 // If this is true, the switch is completely meaningless, and we should just avoid it.
17689 bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
17690
17691 if (!collapsed_switch)
17692 {
17693 if (block_like_switch || is_legacy())
17694 {
17695 // ESSL 1.0 is not guaranteed to support do/while.
17696 if (is_legacy_es())
17697 {
17698 uint32_t counter = statement_count;
17699 statement(ts: "for (int spvDummy", ts&: counter, ts: " = 0; spvDummy", ts&: counter, ts: " < 1; spvDummy", ts&: counter,
17700 ts: "++)");
17701 }
17702 else
17703 statement(ts: "do");
17704 }
17705 else
17706 {
17707 emit_block_hints(block);
17708 statement(ts: "switch (", ts: to_unpacked_expression(id: block.condition), ts: ")");
17709 }
17710 begin_scope();
17711 }
17712
17713 for (size_t i = 0; i < num_blocks; i++)
17714 {
17715 uint32_t target_block = block_declaration_order[i];
17716 auto &literals = case_constructs[target_block];
17717
17718 if (literals.empty())
17719 {
17720 // Default case.
17721 if (!block_like_switch)
17722 {
17723 if (is_legacy())
17724 statement(ts: "else");
17725 else
17726 statement(ts: "default:");
17727 }
17728 }
17729 else
17730 {
17731 if (is_legacy())
17732 {
17733 statement(ts: (i ? "else " : ""), ts: "if (", ts: to_legacy_case_label(block.condition, literals, label_suffix),
17734 ts: ")");
17735 }
17736 else
17737 {
17738 for (auto &case_literal : literals)
17739 {
17740 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
17741 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
17742 }
17743 }
17744 }
17745
17746 auto &case_block = get<SPIRBlock>(id: target_block);
17747 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
17748 execution_is_direct_branch(from: case_block, to: get<SPIRBlock>(id: block_declaration_order[i + 1])))
17749 {
17750 // We will fall through here, so just terminate the block chain early.
17751 // We still need to deal with Phi potentially.
17752 // No need for a stack-like thing here since we only do fall-through when there is a
17753 // single trivial branch to fall-through target..
17754 current_emitting_switch_fallthrough = true;
17755 }
17756 else
17757 current_emitting_switch_fallthrough = false;
17758
17759 if (!block_like_switch)
17760 begin_scope();
17761 branch(from: block.self, to: target_block);
17762 if (!block_like_switch)
17763 end_scope();
17764
17765 current_emitting_switch_fallthrough = false;
17766 }
17767
17768 // Might still have to flush phi variables if we branch from loop header directly to merge target.
17769 // This is supposed to emit all cases where we branch from header to merge block directly.
17770 // There are two main scenarios where cannot rely on default fallthrough.
17771 // - There is an explicit default: label already.
17772 // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
17773 // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
17774 bool header_merge_requires_phi = flush_phi_required(from: block.self, to: block.next_block);
17775 bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
17776 if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
17777 {
17778 for (auto &case_literal : literals_to_merge)
17779 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
17780
17781 if (block.default_block == block.next_block)
17782 {
17783 if (is_legacy())
17784 statement(ts: "else");
17785 else
17786 statement(ts: "default:");
17787 }
17788
17789 begin_scope();
17790 flush_phi(from: block.self, to: block.next_block);
17791 statement(ts: "break;");
17792 end_scope();
17793 }
17794
17795 if (!collapsed_switch)
17796 {
17797 if ((block_like_switch || is_legacy()) && !is_legacy_es())
17798 end_scope_decl(decl: "while(false)");
17799 else
17800 end_scope();
17801 }
17802 else
17803 flush_phi(from: block.self, to: block.next_block);
17804
17805 if (block.need_ladder_break)
17806 {
17807 statement(ts: "if (_", ts&: block.self, ts: "_ladder_break)");
17808 begin_scope();
17809 statement(ts: "break;");
17810 end_scope();
17811 }
17812
17813 current_emitting_switch_stack.pop_back();
17814 break;
17815 }
17816
17817 case SPIRBlock::Return:
17818 {
17819 for (auto &line : current_function->fixup_hooks_out)
17820 line();
17821
17822 if (processing_entry_point)
17823 emit_fixup();
17824
17825 auto &cfg = get_cfg_for_current_function();
17826
17827 if (block.return_value)
17828 {
17829 auto &type = expression_type(id: block.return_value);
17830 if (!type.array.empty() && !backend.can_return_array)
17831 {
17832 // If we cannot return arrays, we will have a special out argument we can write to instead.
17833 // The backend is responsible for setting this up, and redirection the return values as appropriate.
17834 if (ir.ids[block.return_value].get_type() != TypeUndef)
17835 {
17836 emit_array_copy(expr: "spvReturnValue", lhs_id: 0, rhs_id: block.return_value, lhs_storage: StorageClassFunction,
17837 rhs_storage: get_expression_effective_storage_class(ptr: block.return_value));
17838 }
17839
17840 if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
17841 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
17842 {
17843 statement(ts: "return;");
17844 }
17845 }
17846 else
17847 {
17848 // OpReturnValue can return Undef, so don't emit anything for this case.
17849 if (ir.ids[block.return_value].get_type() != TypeUndef)
17850 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
17851 }
17852 }
17853 else if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
17854 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
17855 {
17856 // If this block is the very final block and not called from control flow,
17857 // we do not need an explicit return which looks out of place. Just end the function here.
17858 // In the very weird case of for(;;) { return; } executing return is unconditional,
17859 // but we actually need a return here ...
17860 statement(ts: "return;");
17861 }
17862 break;
17863 }
17864
17865 // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
17866 case SPIRBlock::Kill:
17867 statement(ts&: backend.discard_literal, ts: ";");
17868 if (block.return_value)
17869 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
17870 break;
17871
17872 case SPIRBlock::Unreachable:
17873 {
17874 // Avoid emitting false fallthrough, which can happen for
17875 // if (cond) break; else discard; inside a case label.
17876 // Discard is not always implementable as a terminator.
17877
17878 auto &cfg = get_cfg_for_current_function();
17879 bool inner_dominator_is_switch = false;
17880 ID id = block.self;
17881
17882 while (id)
17883 {
17884 auto &iter_block = get<SPIRBlock>(id);
17885 if (iter_block.terminator == SPIRBlock::MultiSelect ||
17886 iter_block.merge == SPIRBlock::MergeLoop)
17887 {
17888 ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
17889 iter_block.merge_block : iter_block.next_block;
17890 bool outside_construct = next_block && cfg.find_common_dominator(a: next_block, b: block.self) == next_block;
17891 if (!outside_construct)
17892 {
17893 inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
17894 break;
17895 }
17896 }
17897
17898 if (cfg.get_preceding_edges(block: id).empty())
17899 break;
17900
17901 id = cfg.get_immediate_dominator(block: id);
17902 }
17903
17904 if (inner_dominator_is_switch)
17905 statement(ts: "break; // unreachable workaround");
17906
17907 emit_next_block = false;
17908 break;
17909 }
17910
17911 case SPIRBlock::IgnoreIntersection:
17912 statement(ts: "ignoreIntersectionEXT;");
17913 break;
17914
17915 case SPIRBlock::TerminateRay:
17916 statement(ts: "terminateRayEXT;");
17917 break;
17918
17919 case SPIRBlock::EmitMeshTasks:
17920 emit_mesh_tasks(block);
17921 break;
17922
17923 default:
17924 SPIRV_CROSS_THROW("Unimplemented block terminator.");
17925 }
17926
17927 if (block.next_block && emit_next_block)
17928 {
17929 // If we hit this case, we're dealing with an unconditional branch, which means we will output
17930 // that block after this. If we had selection merge, we already flushed phi variables.
17931 if (block.merge != SPIRBlock::MergeSelection)
17932 {
17933 flush_phi(from: block.self, to: block.next_block);
17934 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
17935 get<SPIRBlock>(id: block.next_block).invalidate_expressions = block.invalidate_expressions;
17936 }
17937
17938 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
17939 if (!current_emitting_switch_fallthrough)
17940 {
17941 // For merge selects we might have ignored the fact that a merge target
17942 // could have been a break; or continue;
17943 // We will need to deal with it here.
17944 if (is_loop_break(next: block.next_block))
17945 {
17946 // Cannot check for just break, because switch statements will also use break.
17947 assert(block.merge == SPIRBlock::MergeSelection);
17948 statement(ts: "break;");
17949 }
17950 else if (is_continue(next: block.next_block))
17951 {
17952 assert(block.merge == SPIRBlock::MergeSelection);
17953 branch_to_continue(from: block.self, to: block.next_block);
17954 }
17955 else if (BlockID(block.self) != block.next_block)
17956 emit_block_chain(block&: get<SPIRBlock>(id: block.next_block));
17957 }
17958 }
17959
17960 if (block.merge == SPIRBlock::MergeLoop)
17961 {
17962 if (continue_type == SPIRBlock::DoWhileLoop)
17963 {
17964 // Make sure that we run the continue block to get the expressions set, but this
17965 // should become an empty string.
17966 // We have no fallbacks if we cannot forward everything to temporaries ...
17967 const auto &continue_block = get<SPIRBlock>(id: block.continue_block);
17968 bool positive_test = execution_is_noop(from: get<SPIRBlock>(id: continue_block.true_block),
17969 to: get<SPIRBlock>(id: continue_block.loop_dominator));
17970
17971 uint32_t current_count = statement_count;
17972 auto statements = emit_continue_block(continue_block: block.continue_block, follow_true_block: positive_test, follow_false_block: !positive_test);
17973 if (statement_count != current_count)
17974 {
17975 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
17976 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
17977 force_recompile();
17978 }
17979
17980 // Might have to invert the do-while test here.
17981 auto condition = to_expression(id: continue_block.condition);
17982 if (!positive_test)
17983 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17984
17985 end_scope_decl(decl: join(ts: "while (", ts&: condition, ts: ")"));
17986 }
17987 else
17988 end_scope();
17989
17990 loop_level_saver.release();
17991
17992 // We cannot break out of two loops at once, so don't check for break; here.
17993 // Using block.self as the "from" block isn't quite right, but it has the same scope
17994 // and dominance structure, so it's fine.
17995 if (is_continue(next: block.merge_block))
17996 branch_to_continue(from: block.self, to: block.merge_block);
17997 else
17998 emit_block_chain(block&: get<SPIRBlock>(id: block.merge_block));
17999 }
18000
18001 // Forget about control dependent expressions now.
18002 block.invalidate_expressions.clear();
18003
18004 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
18005 // re-declare variables if necessary.
18006 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
18007 for (size_t i = 0; i < block.dominated_variables.size(); i++)
18008 {
18009 uint32_t var = block.dominated_variables[i];
18010 get<SPIRVariable>(id: var).deferred_declaration = rearm_dominated_variables[i];
18011 }
18012
18013 // Just like for deferred declaration, we need to forget about loop variable enable
18014 // if our block chain is reinstantiated later.
18015 for (auto &var_id : block.loop_variables)
18016 get<SPIRVariable>(id: var_id).loop_variable_enable = false;
18017}
18018
18019void CompilerGLSL::begin_scope()
18020{
18021 statement(ts: "{");
18022 indent++;
18023}
18024
18025void CompilerGLSL::end_scope()
18026{
18027 if (!indent)
18028 SPIRV_CROSS_THROW("Popping empty indent stack.");
18029 indent--;
18030 statement(ts: "}");
18031}
18032
18033void CompilerGLSL::end_scope(const string &trailer)
18034{
18035 if (!indent)
18036 SPIRV_CROSS_THROW("Popping empty indent stack.");
18037 indent--;
18038 statement(ts: "}", ts: trailer);
18039}
18040
18041void CompilerGLSL::end_scope_decl()
18042{
18043 if (!indent)
18044 SPIRV_CROSS_THROW("Popping empty indent stack.");
18045 indent--;
18046 statement(ts: "};");
18047}
18048
18049void CompilerGLSL::end_scope_decl(const string &decl)
18050{
18051 if (!indent)
18052 SPIRV_CROSS_THROW("Popping empty indent stack.");
18053 indent--;
18054 statement(ts: "} ", ts: decl, ts: ";");
18055}
18056
18057void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
18058{
18059 // If our variable is remapped, and we rely on type-remapping information as
18060 // well, then we cannot pass the variable as a function parameter.
18061 // Fixing this is non-trivial without stamping out variants of the same function,
18062 // so for now warn about this and suggest workarounds instead.
18063 for (uint32_t i = 0; i < length; i++)
18064 {
18065 auto *var = maybe_get<SPIRVariable>(id: args[i]);
18066 if (!var || !var->remapped_variable)
18067 continue;
18068
18069 auto &type = get<SPIRType>(id: var->basetype);
18070 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
18071 {
18072 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
18073 "This will not work correctly because type-remapping information is lost. "
18074 "To workaround, please consider not passing the subpass input as a function parameter, "
18075 "or use in/out variables instead which do not need type remapping information.");
18076 }
18077 }
18078}
18079
18080const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
18081{
18082 // FIXME: This is kind of hacky. There should be a cleaner way.
18083 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
18084 if ((offset + 1) < current_emitting_block->ops.size())
18085 return &current_emitting_block->ops[offset + 1];
18086 else
18087 return nullptr;
18088}
18089
18090uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
18091{
18092 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
18093 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
18094 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
18095}
18096
18097bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass)
18098{
18099 string lhs;
18100 if (expr)
18101 lhs = expr;
18102 else
18103 lhs = to_expression(id: lhs_id);
18104
18105 statement(ts&: lhs, ts: " = ", ts: to_expression(id: rhs_id), ts: ";");
18106 return true;
18107}
18108
18109bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
18110{
18111 if (!backend.force_gl_in_out_block)
18112 return false;
18113 // This path is only relevant for GL backends.
18114
18115 auto *var = maybe_get<SPIRVariable>(id: target_id);
18116 if (!var || var->storage != StorageClassOutput)
18117 return false;
18118
18119 if (!is_builtin_variable(var: *var) || BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)) != BuiltInSampleMask)
18120 return false;
18121
18122 auto &type = expression_type(id: source_id);
18123 string array_expr;
18124 if (type.array_size_literal.back())
18125 {
18126 array_expr = convert_to_string(t: type.array.back());
18127 if (type.array.back() == 0)
18128 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
18129 }
18130 else
18131 array_expr = to_expression(id: type.array.back());
18132
18133 SPIRType target_type { OpTypeInt };
18134 target_type.basetype = SPIRType::Int;
18135
18136 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
18137 begin_scope();
18138 statement(ts: to_expression(id: target_id), ts: "[i] = ",
18139 ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts: to_expression(id: source_id), ts: "[i]")),
18140 ts: ";");
18141 end_scope();
18142
18143 return true;
18144}
18145
18146void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
18147{
18148 if (!backend.force_gl_in_out_block)
18149 return;
18150 // This path is only relevant for GL backends.
18151
18152 auto *var = maybe_get<SPIRVariable>(id: source_id);
18153 if (!var)
18154 return;
18155
18156 if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
18157 return;
18158
18159 auto &type = get_variable_data_type(var: *var);
18160 if (type.array.empty())
18161 return;
18162
18163 auto builtin = BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn));
18164 bool is_builtin = is_builtin_variable(var: *var) &&
18165 (builtin == BuiltInPointSize ||
18166 builtin == BuiltInPosition ||
18167 builtin == BuiltInSampleMask);
18168 bool is_tess = is_tessellation_shader();
18169 bool is_patch = has_decoration(id: var->self, decoration: DecorationPatch);
18170 bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
18171
18172 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
18173 // We must unroll the array load.
18174 // For builtins, we couldn't catch this case normally,
18175 // because this is resolved in the OpAccessChain in most cases.
18176 // If we load the entire array, we have no choice but to unroll here.
18177 if (!is_patch && (is_builtin || is_tess))
18178 {
18179 auto new_expr = join(ts: "_", ts&: target_id, ts: "_unrolled");
18180 statement(ts: variable_decl(type, name: new_expr, id: target_id), ts: ";");
18181 string array_expr;
18182 if (type.array_size_literal.back())
18183 {
18184 array_expr = convert_to_string(t: type.array.back());
18185 if (type.array.back() == 0)
18186 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
18187 }
18188 else
18189 array_expr = to_expression(id: type.array.back());
18190
18191 // The array size might be a specialization constant, so use a for-loop instead.
18192 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
18193 begin_scope();
18194 if (is_builtin && !is_sample_mask)
18195 statement(ts&: new_expr, ts: "[i] = gl_in[i].", ts&: expr, ts: ";");
18196 else if (is_sample_mask)
18197 {
18198 SPIRType target_type { OpTypeInt };
18199 target_type.basetype = SPIRType::Int;
18200 statement(ts&: new_expr, ts: "[i] = ", ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts&: expr, ts: "[i]")), ts: ";");
18201 }
18202 else
18203 statement(ts&: new_expr, ts: "[i] = ", ts&: expr, ts: "[i];");
18204 end_scope();
18205
18206 expr = std::move(new_expr);
18207 }
18208}
18209
18210void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
18211{
18212 // We will handle array cases elsewhere.
18213 if (!expr_type.array.empty())
18214 return;
18215
18216 auto *var = maybe_get_backing_variable(chain: source_id);
18217 if (var)
18218 source_id = var->self;
18219
18220 // Only interested in standalone builtin variables.
18221 if (!has_decoration(id: source_id, decoration: DecorationBuiltIn))
18222 {
18223 // Except for int attributes in legacy GLSL, which are cast from float.
18224 if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput)
18225 expr = join(ts: type_to_glsl(type: expr_type), ts: "(", ts&: expr, ts: ")");
18226 return;
18227 }
18228
18229 auto builtin = static_cast<BuiltIn>(get_decoration(id: source_id, decoration: DecorationBuiltIn));
18230 auto expected_type = expr_type.basetype;
18231
18232 // TODO: Fill in for more builtins.
18233 switch (builtin)
18234 {
18235 case BuiltInLayer:
18236 case BuiltInPrimitiveId:
18237 case BuiltInViewportIndex:
18238 case BuiltInInstanceId:
18239 case BuiltInInstanceIndex:
18240 case BuiltInVertexId:
18241 case BuiltInVertexIndex:
18242 case BuiltInSampleId:
18243 case BuiltInBaseVertex:
18244 case BuiltInBaseInstance:
18245 case BuiltInDrawIndex:
18246 case BuiltInFragStencilRefEXT:
18247 case BuiltInInstanceCustomIndexNV:
18248 case BuiltInSampleMask:
18249 case BuiltInPrimitiveShadingRateKHR:
18250 case BuiltInShadingRateKHR:
18251 expected_type = SPIRType::Int;
18252 break;
18253
18254 case BuiltInGlobalInvocationId:
18255 case BuiltInLocalInvocationId:
18256 case BuiltInWorkgroupId:
18257 case BuiltInLocalInvocationIndex:
18258 case BuiltInWorkgroupSize:
18259 case BuiltInNumWorkgroups:
18260 case BuiltInIncomingRayFlagsNV:
18261 case BuiltInLaunchIdNV:
18262 case BuiltInLaunchSizeNV:
18263 case BuiltInPrimitiveTriangleIndicesEXT:
18264 case BuiltInPrimitiveLineIndicesEXT:
18265 case BuiltInPrimitivePointIndicesEXT:
18266 expected_type = SPIRType::UInt;
18267 break;
18268
18269 default:
18270 break;
18271 }
18272
18273 if (expected_type != expr_type.basetype)
18274 expr = bitcast_expression(target_type: expr_type, expr_type: expected_type, expr);
18275}
18276
18277SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type)
18278{
18279 // TODO: Fill in for more builtins.
18280 switch (builtin)
18281 {
18282 case BuiltInLayer:
18283 case BuiltInPrimitiveId:
18284 case BuiltInViewportIndex:
18285 case BuiltInFragStencilRefEXT:
18286 case BuiltInSampleMask:
18287 case BuiltInPrimitiveShadingRateKHR:
18288 case BuiltInShadingRateKHR:
18289 return SPIRType::Int;
18290
18291 default:
18292 return default_type;
18293 }
18294}
18295
18296void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
18297{
18298 auto *var = maybe_get_backing_variable(chain: target_id);
18299 if (var)
18300 target_id = var->self;
18301
18302 // Only interested in standalone builtin variables.
18303 if (!has_decoration(id: target_id, decoration: DecorationBuiltIn))
18304 return;
18305
18306 auto builtin = static_cast<BuiltIn>(get_decoration(id: target_id, decoration: DecorationBuiltIn));
18307 auto expected_type = get_builtin_basetype(builtin, default_type: expr_type.basetype);
18308
18309 if (expected_type != expr_type.basetype)
18310 {
18311 auto type = expr_type;
18312 type.basetype = expected_type;
18313 expr = bitcast_expression(target_type: type, expr_type: expr_type.basetype, expr);
18314 }
18315}
18316
18317void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
18318{
18319 if (*backend.nonuniform_qualifier == '\0')
18320 return;
18321
18322 auto *var = maybe_get_backing_variable(chain: ptr_id);
18323 if (!var)
18324 return;
18325
18326 if (var->storage != StorageClassUniformConstant &&
18327 var->storage != StorageClassStorageBuffer &&
18328 var->storage != StorageClassUniform)
18329 return;
18330
18331 auto &backing_type = get<SPIRType>(id: var->basetype);
18332 if (backing_type.array.empty())
18333 return;
18334
18335 // If we get here, we know we're accessing an arrayed resource which
18336 // might require nonuniform qualifier.
18337
18338 auto start_array_index = expr.find_first_of(c: '[');
18339
18340 if (start_array_index == string::npos)
18341 return;
18342
18343 // We've opened a bracket, track expressions until we can close the bracket.
18344 // This must be our resource index.
18345 size_t end_array_index = string::npos;
18346 unsigned bracket_count = 1;
18347 for (size_t index = start_array_index + 1; index < expr.size(); index++)
18348 {
18349 if (expr[index] == ']')
18350 {
18351 if (--bracket_count == 0)
18352 {
18353 end_array_index = index;
18354 break;
18355 }
18356 }
18357 else if (expr[index] == '[')
18358 bracket_count++;
18359 }
18360
18361 assert(bracket_count == 0);
18362
18363 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
18364 // nothing we can do here to express that.
18365 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
18366 return;
18367
18368 start_array_index++;
18369
18370 expr = join(ts: expr.substr(pos: 0, n: start_array_index), ts&: backend.nonuniform_qualifier, ts: "(",
18371 ts: expr.substr(pos: start_array_index, n: end_array_index - start_array_index), ts: ")",
18372 ts: expr.substr(pos: end_array_index, n: string::npos));
18373}
18374
18375void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
18376{
18377 if ((options.es && options.version < 310) || (!options.es && options.version < 140))
18378 return;
18379
18380 switch (block.hint)
18381 {
18382 case SPIRBlock::HintFlatten:
18383 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18384 statement(ts: "SPIRV_CROSS_FLATTEN");
18385 break;
18386 case SPIRBlock::HintDontFlatten:
18387 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18388 statement(ts: "SPIRV_CROSS_BRANCH");
18389 break;
18390 case SPIRBlock::HintUnroll:
18391 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18392 statement(ts: "SPIRV_CROSS_UNROLL");
18393 break;
18394 case SPIRBlock::HintDontUnroll:
18395 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18396 statement(ts: "SPIRV_CROSS_LOOP");
18397 break;
18398 default:
18399 break;
18400 }
18401}
18402
18403void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
18404{
18405 preserved_aliases[id] = get_name(id);
18406}
18407
18408void CompilerGLSL::reset_name_caches()
18409{
18410 for (auto &preserved : preserved_aliases)
18411 set_name(id: preserved.first, name: preserved.second);
18412
18413 preserved_aliases.clear();
18414 resource_names.clear();
18415 block_input_names.clear();
18416 block_output_names.clear();
18417 block_ubo_names.clear();
18418 block_ssbo_names.clear();
18419 block_names.clear();
18420 function_overloads.clear();
18421}
18422
18423void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
18424{
18425 if (visited.count(x: type.self))
18426 return;
18427 visited.insert(x: type.self);
18428
18429 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
18430 {
18431 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
18432
18433 if (mbr_type.basetype == SPIRType::Struct)
18434 {
18435 // If there are multiple aliases, the output might be somewhat unpredictable,
18436 // but the only real alternative in that case is to do nothing, which isn't any better.
18437 // This check should be fine in practice.
18438 if (get_name(id: mbr_type.self).empty() && !get_member_name(id: type.self, index: i).empty())
18439 {
18440 auto anon_name = join(ts: "anon_", ts: get_member_name(id: type.self, index: i));
18441 ParsedIR::sanitize_underscores(str&: anon_name);
18442 set_name(id: mbr_type.self, name: anon_name);
18443 }
18444
18445 fixup_anonymous_struct_names(visited, type: mbr_type);
18446 }
18447 }
18448}
18449
18450void CompilerGLSL::fixup_anonymous_struct_names()
18451{
18452 // HLSL codegen can often end up emitting anonymous structs inside blocks, which
18453 // breaks GL linking since all names must match ...
18454 // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
18455
18456 // Breaks exponential explosion with weird type trees.
18457 std::unordered_set<uint32_t> visited;
18458
18459 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) {
18460 if (type.basetype == SPIRType::Struct &&
18461 (has_decoration(id: type.self, decoration: DecorationBlock) ||
18462 has_decoration(id: type.self, decoration: DecorationBufferBlock)))
18463 {
18464 fixup_anonymous_struct_names(visited, type);
18465 }
18466 });
18467}
18468
18469void CompilerGLSL::fixup_type_alias()
18470{
18471 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
18472 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) {
18473 if (!type.type_alias)
18474 return;
18475
18476 if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock))
18477 {
18478 // Top-level block types should never alias anything else.
18479 type.type_alias = 0;
18480 }
18481 else if (type_is_block_like(type) && type.self == ID(self))
18482 {
18483 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
18484 // i.e. blocks which are placed inside buffers.
18485 // Become the master.
18486 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t other_id, SPIRType &other_type) {
18487 if (other_id == self)
18488 return;
18489
18490 if (other_type.type_alias == type.type_alias)
18491 other_type.type_alias = self;
18492 });
18493
18494 this->get<SPIRType>(id: type.type_alias).type_alias = self;
18495 type.type_alias = 0;
18496 }
18497 });
18498}
18499
18500void CompilerGLSL::reorder_type_alias()
18501{
18502 // Reorder declaration of types so that the master of the type alias is always emitted first.
18503 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
18504 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
18505 auto loop_lock = ir.create_loop_hard_lock();
18506
18507 auto &type_ids = ir.ids_for_type[TypeType];
18508 for (auto alias_itr = begin(cont&: type_ids); alias_itr != end(cont&: type_ids); ++alias_itr)
18509 {
18510 auto &type = get<SPIRType>(id: *alias_itr);
18511 if (type.type_alias != TypeID(0) &&
18512 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
18513 {
18514 // We will skip declaring this type, so make sure the type_alias type comes before.
18515 auto master_itr = find(first: begin(cont&: type_ids), last: end(cont&: type_ids), val: ID(type.type_alias));
18516 assert(master_itr != end(type_ids));
18517
18518 if (alias_itr < master_itr)
18519 {
18520 // Must also swap the type order for the constant-type joined array.
18521 auto &joined_types = ir.ids_for_constant_undef_or_type;
18522 auto alt_alias_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *alias_itr);
18523 auto alt_master_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *master_itr);
18524 assert(alt_alias_itr != end(joined_types));
18525 assert(alt_master_itr != end(joined_types));
18526
18527 swap(a&: *alias_itr, b&: *master_itr);
18528 swap(a&: *alt_alias_itr, b&: *alt_master_itr);
18529 }
18530 }
18531 }
18532}
18533
18534void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
18535{
18536 // If we are redirecting statements, ignore the line directive.
18537 // Common case here is continue blocks.
18538 if (redirect_statement)
18539 return;
18540
18541 // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit
18542 // any line directives, because it's not possible.
18543 if (block_debug_directives)
18544 return;
18545
18546 if (options.emit_line_directives)
18547 {
18548 require_extension_internal(ext: "GL_GOOGLE_cpp_style_line_directive");
18549 statement_no_indent(ts: "#line ", ts&: line_literal, ts: " \"", ts&: get<SPIRString>(id: file_id).str, ts: "\"");
18550 }
18551}
18552
18553void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
18554 SmallVector<uint32_t> chain)
18555{
18556 // Fully unroll all member/array indices one by one.
18557
18558 auto &lhs_type = get<SPIRType>(id: lhs_type_id);
18559 auto &rhs_type = get<SPIRType>(id: rhs_type_id);
18560
18561 if (!lhs_type.array.empty())
18562 {
18563 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
18564 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
18565 uint32_t array_size = to_array_size_literal(type: lhs_type);
18566 chain.push_back(t: 0);
18567
18568 for (uint32_t i = 0; i < array_size; i++)
18569 {
18570 chain.back() = i;
18571 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.parent_type, rhs_id, rhs_type_id: rhs_type.parent_type, chain);
18572 }
18573 }
18574 else if (lhs_type.basetype == SPIRType::Struct)
18575 {
18576 chain.push_back(t: 0);
18577 uint32_t member_count = uint32_t(lhs_type.member_types.size());
18578 for (uint32_t i = 0; i < member_count; i++)
18579 {
18580 chain.back() = i;
18581 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.member_types[i], rhs_id, rhs_type_id: rhs_type.member_types[i], chain);
18582 }
18583 }
18584 else
18585 {
18586 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
18587 // particularly in MSL.
18588 // To deal with this, we emit access chains and go through emit_store_statement
18589 // to deal with all the special cases we can encounter.
18590
18591 AccessChainMeta lhs_meta, rhs_meta;
18592 auto lhs = access_chain_internal(base: lhs_id, indices: chain.data(), count: uint32_t(chain.size()),
18593 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &lhs_meta);
18594 auto rhs = access_chain_internal(base: rhs_id, indices: chain.data(), count: uint32_t(chain.size()),
18595 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &rhs_meta);
18596
18597 uint32_t id = ir.increase_bound_by(count: 2);
18598 lhs_id = id;
18599 rhs_id = id + 1;
18600
18601 {
18602 auto &lhs_expr = set<SPIRExpression>(id: lhs_id, args: std::move(lhs), args&: lhs_type_id, args: true);
18603 lhs_expr.need_transpose = lhs_meta.need_transpose;
18604
18605 if (lhs_meta.storage_is_packed)
18606 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
18607 if (lhs_meta.storage_physical_type != 0)
18608 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: lhs_meta.storage_physical_type);
18609
18610 forwarded_temporaries.insert(x: lhs_id);
18611 suppressed_usage_tracking.insert(x: lhs_id);
18612 }
18613
18614 {
18615 auto &rhs_expr = set<SPIRExpression>(id: rhs_id, args: std::move(rhs), args&: rhs_type_id, args: true);
18616 rhs_expr.need_transpose = rhs_meta.need_transpose;
18617
18618 if (rhs_meta.storage_is_packed)
18619 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
18620 if (rhs_meta.storage_physical_type != 0)
18621 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: rhs_meta.storage_physical_type);
18622
18623 forwarded_temporaries.insert(x: rhs_id);
18624 suppressed_usage_tracking.insert(x: rhs_id);
18625 }
18626
18627 emit_store_statement(lhs_expression: lhs_id, rhs_expression: rhs_id);
18628 }
18629}
18630
18631bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
18632{
18633 if (!has_decoration(id, decoration: DecorationInputAttachmentIndex))
18634 return false;
18635
18636 uint32_t input_attachment_index = get_decoration(id, decoration: DecorationInputAttachmentIndex);
18637 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
18638 if (remap.first == input_attachment_index)
18639 return true;
18640
18641 return false;
18642}
18643
18644const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
18645{
18646 const SPIRVariable *ret = nullptr;
18647 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
18648 if (has_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) &&
18649 get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) == index)
18650 {
18651 ret = &var;
18652 }
18653 });
18654 return ret;
18655}
18656
18657const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
18658{
18659 const SPIRVariable *ret = nullptr;
18660 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
18661 if (var.storage == StorageClassOutput && get_decoration(id: var.self, decoration: DecorationLocation) == location)
18662 ret = &var;
18663 });
18664 return ret;
18665}
18666
18667void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
18668{
18669 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
18670 {
18671 auto *subpass_var = find_subpass_input_by_attachment_index(index: remap.first);
18672 auto *output_var = find_color_output_by_location(location: remap.second);
18673 if (!subpass_var)
18674 continue;
18675 if (!output_var)
18676 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
18677 "to read from it.");
18678 if (is_array(type: get<SPIRType>(id: output_var->basetype)))
18679 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
18680
18681 auto &func = get<SPIRFunction>(id: get_entry_point().self);
18682 func.fixup_hooks_in.push_back(t: [=]() {
18683 if (is_legacy())
18684 {
18685 statement(ts: to_expression(id: subpass_var->self), ts: " = ", ts: "gl_LastFragData[",
18686 ts: get_decoration(id: output_var->self, decoration: DecorationLocation), ts: "];");
18687 }
18688 else
18689 {
18690 uint32_t num_rt_components = this->get<SPIRType>(id: output_var->basetype).vecsize;
18691 statement(ts: to_expression(id: subpass_var->self), ts: vector_swizzle(vecsize: num_rt_components, index: 0), ts: " = ",
18692 ts: to_expression(id: output_var->self), ts: ";");
18693 }
18694 });
18695 }
18696}
18697
18698bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
18699{
18700 return is_depth_image(type: get<SPIRType>(id: get<SPIRVariable>(id).basetype), id);
18701}
18702
18703const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
18704{
18705 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
18706 "GL_KHR_shader_subgroup_basic",
18707 "GL_KHR_shader_subgroup_vote",
18708 "GL_KHR_shader_subgroup_arithmetic",
18709 "GL_NV_gpu_shader_5",
18710 "GL_NV_shader_thread_group",
18711 "GL_NV_shader_thread_shuffle",
18712 "GL_ARB_shader_ballot",
18713 "GL_ARB_shader_group_vote",
18714 "GL_AMD_gcn_shader" };
18715 return retval[c];
18716}
18717
18718SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
18719{
18720 switch (c)
18721 {
18722 case ARB_shader_ballot:
18723 return { "GL_ARB_shader_int64" };
18724 case AMD_gcn_shader:
18725 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
18726 default:
18727 return {};
18728 }
18729}
18730
18731const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
18732{
18733 switch (c)
18734 {
18735 case ARB_shader_ballot:
18736 return "defined(GL_ARB_shader_int64)";
18737 case AMD_gcn_shader:
18738 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
18739 default:
18740 return "";
18741 }
18742}
18743
18744CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
18745 get_feature_dependencies(Feature feature)
18746{
18747 switch (feature)
18748 {
18749 case SubgroupAllEqualT:
18750 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
18751 case SubgroupElect:
18752 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
18753 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
18754 return { SubgroupMask };
18755 case SubgroupBallotBitCount:
18756 return { SubgroupBallot };
18757 case SubgroupArithmeticIAddReduce:
18758 case SubgroupArithmeticIAddInclusiveScan:
18759 case SubgroupArithmeticFAddReduce:
18760 case SubgroupArithmeticFAddInclusiveScan:
18761 case SubgroupArithmeticIMulReduce:
18762 case SubgroupArithmeticIMulInclusiveScan:
18763 case SubgroupArithmeticFMulReduce:
18764 case SubgroupArithmeticFMulInclusiveScan:
18765 return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
18766 case SubgroupArithmeticIAddExclusiveScan:
18767 case SubgroupArithmeticFAddExclusiveScan:
18768 case SubgroupArithmeticIMulExclusiveScan:
18769 case SubgroupArithmeticFMulExclusiveScan:
18770 return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
18771 SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
18772 default:
18773 return {};
18774 }
18775}
18776
18777CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
18778 get_feature_dependency_mask(Feature feature)
18779{
18780 return build_mask(features: get_feature_dependencies(feature));
18781}
18782
18783bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
18784{
18785 static const bool retval[FeatureCount] = {
18786 false, false, false, false, false, false,
18787 true, // SubgroupBalloFindLSB_MSB
18788 false, false, false, false,
18789 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
18790 false, false, true, false,
18791 false, false, false, false, false, false, // iadd, fadd
18792 false, false, false, false, false, false, // imul , fmul
18793 };
18794
18795 return retval[feature];
18796}
18797
18798CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
18799 get_KHR_extension_for_feature(Feature feature)
18800{
18801 static const Candidate extensions[FeatureCount] = {
18802 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
18803 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
18804 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
18805 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
18806 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18807 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18808 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18809 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18810 };
18811
18812 return extensions[feature];
18813}
18814
18815void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
18816{
18817 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
18818}
18819
18820bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
18821{
18822 return (feature_mask & (1u << feature)) != 0;
18823}
18824
18825CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
18826{
18827 Result res;
18828
18829 for (uint32_t i = 0u; i < FeatureCount; ++i)
18830 {
18831 if (feature_mask & (1u << i))
18832 {
18833 auto feature = static_cast<Feature>(i);
18834 std::unordered_set<uint32_t> unique_candidates;
18835
18836 auto candidates = get_candidates_for_feature(ft: feature);
18837 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
18838
18839 auto deps = get_feature_dependencies(feature);
18840 for (Feature d : deps)
18841 {
18842 candidates = get_candidates_for_feature(ft: d);
18843 if (!candidates.empty())
18844 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
18845 }
18846
18847 for (uint32_t c : unique_candidates)
18848 ++res.weights[static_cast<Candidate>(c)];
18849 }
18850 }
18851
18852 return res;
18853}
18854
18855CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
18856 get_candidates_for_feature(Feature ft, const Result &r)
18857{
18858 auto c = get_candidates_for_feature(ft);
18859 auto cmp = [&r](Candidate a, Candidate b) {
18860 if (r.weights[a] == r.weights[b])
18861 return a < b; // Prefer candidates with lower enum value
18862 return r.weights[a] > r.weights[b];
18863 };
18864 std::sort(first: c.begin(), last: c.end(), comp: cmp);
18865 return c;
18866}
18867
18868CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
18869 get_candidates_for_feature(Feature feature)
18870{
18871 switch (feature)
18872 {
18873 case SubgroupMask:
18874 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
18875 case SubgroupSize:
18876 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
18877 case SubgroupInvocationID:
18878 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
18879 case SubgroupID:
18880 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
18881 case NumSubgroups:
18882 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
18883 case SubgroupBroadcast_First:
18884 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
18885 case SubgroupBallotFindLSB_MSB:
18886 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
18887 case SubgroupAll_Any_AllEqualBool:
18888 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
18889 case SubgroupAllEqualT:
18890 return {}; // depends on other features only
18891 case SubgroupElect:
18892 return {}; // depends on other features only
18893 case SubgroupBallot:
18894 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
18895 case SubgroupBarrier:
18896 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
18897 case SubgroupMemBarrier:
18898 return { KHR_shader_subgroup_basic };
18899 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
18900 return {};
18901 case SubgroupBallotBitExtract:
18902 return { NV_shader_thread_group };
18903 case SubgroupBallotBitCount:
18904 return {};
18905 case SubgroupArithmeticIAddReduce:
18906 case SubgroupArithmeticIAddExclusiveScan:
18907 case SubgroupArithmeticIAddInclusiveScan:
18908 case SubgroupArithmeticFAddReduce:
18909 case SubgroupArithmeticFAddExclusiveScan:
18910 case SubgroupArithmeticFAddInclusiveScan:
18911 case SubgroupArithmeticIMulReduce:
18912 case SubgroupArithmeticIMulExclusiveScan:
18913 case SubgroupArithmeticIMulInclusiveScan:
18914 case SubgroupArithmeticFMulReduce:
18915 case SubgroupArithmeticFMulExclusiveScan:
18916 case SubgroupArithmeticFMulInclusiveScan:
18917 return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
18918 default:
18919 return {};
18920 }
18921}
18922
18923CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
18924 const SmallVector<Feature> &features)
18925{
18926 FeatureMask mask = 0;
18927 for (Feature f : features)
18928 mask |= FeatureMask(1) << f;
18929 return mask;
18930}
18931
18932CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
18933{
18934 for (auto &weight : weights)
18935 weight = 0;
18936
18937 // Make sure KHR_shader_subgroup extensions are always prefered.
18938 const uint32_t big_num = FeatureCount;
18939 weights[KHR_shader_subgroup_ballot] = big_num;
18940 weights[KHR_shader_subgroup_basic] = big_num;
18941 weights[KHR_shader_subgroup_vote] = big_num;
18942 weights[KHR_shader_subgroup_arithmetic] = big_num;
18943}
18944
18945void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
18946{
18947 // Must be ordered to maintain deterministic output, so vector is appropriate.
18948 if (find(first: begin(cont&: workaround_ubo_load_overload_types), last: end(cont&: workaround_ubo_load_overload_types), val: id) ==
18949 end(cont&: workaround_ubo_load_overload_types))
18950 {
18951 force_recompile();
18952 workaround_ubo_load_overload_types.push_back(t: id);
18953 }
18954}
18955
18956void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
18957{
18958 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
18959 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
18960 // ensure row_major decoration is actually respected.
18961 auto *var = maybe_get_backing_variable(chain: ptr);
18962 if (!var)
18963 return;
18964
18965 auto &backing_type = get<SPIRType>(id: var->basetype);
18966 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
18967 has_decoration(id: backing_type.self, decoration: DecorationBlock);
18968 if (!is_ubo)
18969 return;
18970
18971 auto *type = &get<SPIRType>(id: loaded_type);
18972 bool rewrite = false;
18973 bool relaxed = options.es;
18974
18975 if (is_matrix(type: *type))
18976 {
18977 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
18978 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
18979 // If there is any row-major action going on, we apply the workaround.
18980 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
18981 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
18982 type = &backing_type;
18983 }
18984 else
18985 {
18986 // If we're loading a composite, we don't have overloads like these.
18987 relaxed = false;
18988 }
18989
18990 if (type->basetype == SPIRType::Struct)
18991 {
18992 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
18993 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
18994 {
18995 auto decorations = combined_decoration_for_member(type: *type, index: i);
18996 if (decorations.get(bit: DecorationRowMajor))
18997 rewrite = true;
18998
18999 // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
19000 if (!decorations.get(bit: DecorationRelaxedPrecision))
19001 relaxed = false;
19002 }
19003 }
19004
19005 if (rewrite)
19006 {
19007 request_workaround_wrapper_overload(id: loaded_type);
19008 expr = join(ts: "spvWorkaroundRowMajor", ts: (relaxed ? "MP" : ""), ts: "(", ts&: expr, ts: ")");
19009 }
19010}
19011
19012void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
19013{
19014 masked_output_locations.insert(x: { .location: location, .component: component });
19015}
19016
19017void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
19018{
19019 masked_output_builtins.insert(x: builtin);
19020}
19021
19022bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
19023{
19024 auto &type = get<SPIRType>(id: var.basetype);
19025 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
19026 // Blocks by themselves are never masked. Must be masked per-member.
19027 if (is_block)
19028 return false;
19029
19030 bool is_builtin = has_decoration(id: var.self, decoration: DecorationBuiltIn);
19031
19032 if (is_builtin)
19033 {
19034 return is_stage_output_builtin_masked(builtin: BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)));
19035 }
19036 else
19037 {
19038 if (!has_decoration(id: var.self, decoration: DecorationLocation))
19039 return false;
19040
19041 return is_stage_output_location_masked(
19042 location: get_decoration(id: var.self, decoration: DecorationLocation),
19043 component: get_decoration(id: var.self, decoration: DecorationComponent));
19044 }
19045}
19046
19047bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
19048{
19049 auto &type = get<SPIRType>(id: var.basetype);
19050 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
19051 if (!is_block)
19052 return false;
19053
19054 BuiltIn builtin = BuiltInMax;
19055 if (is_member_builtin(type, index, builtin: &builtin))
19056 {
19057 return is_stage_output_builtin_masked(builtin);
19058 }
19059 else
19060 {
19061 uint32_t location = get_declared_member_location(var, mbr_idx: index, strip_array);
19062 uint32_t component = get_member_decoration(id: type.self, index, decoration: DecorationComponent);
19063 return is_stage_output_location_masked(location, component);
19064 }
19065}
19066
19067bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
19068{
19069 if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT))
19070 return true;
19071
19072 auto &type = get<SPIRType>(id: var.basetype);
19073 if (!has_decoration(id: type.self, decoration: DecorationBlock))
19074 return false;
19075
19076 for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
19077 if (!has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT))
19078 return false;
19079
19080 return true;
19081}
19082
19083bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
19084{
19085 return masked_output_locations.count(x: { .location: location, .component: component }) != 0;
19086}
19087
19088bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
19089{
19090 return masked_output_builtins.count(x: builtin) != 0;
19091}
19092
19093uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
19094{
19095 auto &block_type = get<SPIRType>(id: var.basetype);
19096 if (has_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation))
19097 return get_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation);
19098 else
19099 return get_accumulated_member_location(var, mbr_idx, strip_array);
19100}
19101
19102uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
19103{
19104 auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
19105 uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation);
19106
19107 for (uint32_t i = 0; i < mbr_idx; i++)
19108 {
19109 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
19110
19111 // Start counting from any place we have a new location decoration.
19112 if (has_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation))
19113 location = get_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation);
19114
19115 uint32_t location_count = type_to_location_count(type: mbr_type);
19116 location += location_count;
19117 }
19118
19119 return location;
19120}
19121
19122StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
19123{
19124 auto *var = maybe_get_backing_variable(chain: ptr);
19125
19126 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
19127 // We're looking for the effective storage class of a given expression.
19128 // An access chain or forwarded OpLoads from such access chains
19129 // will generally have the storage class of the underlying variable, but if the load was not forwarded
19130 // we have lost any address space qualifiers.
19131 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(id: ptr).access_chain &&
19132 (forced_temporaries.count(x: ptr) != 0 || forwarded_temporaries.count(x: ptr) == 0);
19133
19134 if (var && !forced_temporary)
19135 {
19136 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassWorkgroup))
19137 return StorageClassWorkgroup;
19138 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassStorageBuffer))
19139 return StorageClassStorageBuffer;
19140
19141 // Normalize SSBOs to StorageBuffer here.
19142 if (var->storage == StorageClassUniform &&
19143 has_decoration(id: get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
19144 return StorageClassStorageBuffer;
19145 else
19146 return var->storage;
19147 }
19148 else
19149 return expression_type(id: ptr).storage;
19150}
19151
19152uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
19153{
19154 uint32_t count;
19155 if (type.basetype == SPIRType::Struct)
19156 {
19157 uint32_t mbr_count = uint32_t(type.member_types.size());
19158 count = 0;
19159 for (uint32_t i = 0; i < mbr_count; i++)
19160 count += type_to_location_count(type: get<SPIRType>(id: type.member_types[i]));
19161 }
19162 else
19163 {
19164 count = type.columns > 1 ? type.columns : 1;
19165 }
19166
19167 uint32_t dim_count = uint32_t(type.array.size());
19168 for (uint32_t i = 0; i < dim_count; i++)
19169 count *= to_array_size_literal(type, index: i);
19170
19171 return count;
19172}
19173
19174std::string CompilerGLSL::format_float(float value) const
19175{
19176 if (float_formatter)
19177 return float_formatter->format_float(value);
19178
19179 // default behavior
19180 return convert_to_string(t: value, locale_radix_point: current_locale_radix_character);
19181}
19182
19183std::string CompilerGLSL::format_double(double value) const
19184{
19185 if (float_formatter)
19186 return float_formatter->format_double(value);
19187
19188 // default behavior
19189 return convert_to_string(t: value, locale_radix_point: current_locale_radix_character);
19190}
19191
19192

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp