1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_glsl.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_common.hpp"
27#include <algorithm>
28#include <assert.h>
29#include <cmath>
30#include <limits>
31#include <locale.h>
32#include <utility>
33#include <array>
34
35#ifndef _WIN32
36#ifndef __ghs__
37#include <langinfo.h>
38#endif
39#endif
40#include <locale.h>
41
42using namespace spv;
43using namespace SPIRV_CROSS_NAMESPACE;
44using namespace std;
45
46enum ExtraSubExpressionType
47{
48 // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
49 EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
50 EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
51};
52
53static bool is_unsigned_opcode(Op op)
54{
55 // Don't have to be exhaustive, only relevant for legacy target checking ...
56 switch (op)
57 {
58 case OpShiftRightLogical:
59 case OpUGreaterThan:
60 case OpUGreaterThanEqual:
61 case OpULessThan:
62 case OpULessThanEqual:
63 case OpUConvert:
64 case OpUDiv:
65 case OpUMod:
66 case OpUMulExtended:
67 case OpConvertUToF:
68 case OpConvertFToU:
69 return true;
70
71 default:
72 return false;
73 }
74}
75
76static bool is_unsigned_glsl_opcode(GLSLstd450 op)
77{
78 // Don't have to be exhaustive, only relevant for legacy target checking ...
79 switch (op)
80 {
81 case GLSLstd450UClamp:
82 case GLSLstd450UMin:
83 case GLSLstd450UMax:
84 case GLSLstd450FindUMsb:
85 return true;
86
87 default:
88 return false;
89 }
90}
91
92static bool packing_is_vec4_padded(BufferPackingStandard packing)
93{
94 switch (packing)
95 {
96 case BufferPackingHLSLCbuffer:
97 case BufferPackingHLSLCbufferPackOffset:
98 case BufferPackingStd140:
99 case BufferPackingStd140EnhancedLayout:
100 return true;
101
102 default:
103 return false;
104 }
105}
106
107static bool packing_is_hlsl(BufferPackingStandard packing)
108{
109 switch (packing)
110 {
111 case BufferPackingHLSLCbuffer:
112 case BufferPackingHLSLCbufferPackOffset:
113 return true;
114
115 default:
116 return false;
117 }
118}
119
120static bool packing_has_flexible_offset(BufferPackingStandard packing)
121{
122 switch (packing)
123 {
124 case BufferPackingStd140:
125 case BufferPackingStd430:
126 case BufferPackingScalar:
127 case BufferPackingHLSLCbuffer:
128 return false;
129
130 default:
131 return true;
132 }
133}
134
135static bool packing_is_scalar(BufferPackingStandard packing)
136{
137 switch (packing)
138 {
139 case BufferPackingScalar:
140 case BufferPackingScalarEnhancedLayout:
141 return true;
142
143 default:
144 return false;
145 }
146}
147
148static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
149{
150 switch (packing)
151 {
152 case BufferPackingStd140EnhancedLayout:
153 return BufferPackingStd140;
154 case BufferPackingStd430EnhancedLayout:
155 return BufferPackingStd430;
156 case BufferPackingHLSLCbufferPackOffset:
157 return BufferPackingHLSLCbuffer;
158 case BufferPackingScalarEnhancedLayout:
159 return BufferPackingScalar;
160 default:
161 return packing;
162 }
163}
164
165void CompilerGLSL::init()
166{
167 if (ir.source.known)
168 {
169 options.es = ir.source.es;
170 options.version = ir.source.version;
171 }
172
173 // Query the locale to see what the decimal point is.
174 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
175 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
176 // tricky.
177#ifdef _WIN32
178 // On Windows, localeconv uses thread-local storage, so it should be fine.
179 const struct lconv *conv = localeconv();
180 if (conv && conv->decimal_point)
181 current_locale_radix_character = *conv->decimal_point;
182#elif defined(__ANDROID__) && __ANDROID_API__ < 26 || defined(__ghs__) || defined(__QNXNTO__) || defined(__VXWORKS__)
183 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
184 const struct lconv *conv = localeconv();
185 if (conv && conv->decimal_point)
186 current_locale_radix_character = *conv->decimal_point;
187#else
188 // localeconv, the portable function is not MT safe ...
189 const char *decimal_point = nl_langinfo(RADIXCHAR);
190 if (decimal_point && *decimal_point != '\0')
191 current_locale_radix_character = *decimal_point;
192#endif
193}
194
195static const char *to_pls_layout(PlsFormat format)
196{
197 switch (format)
198 {
199 case PlsR11FG11FB10F:
200 return "layout(r11f_g11f_b10f) ";
201 case PlsR32F:
202 return "layout(r32f) ";
203 case PlsRG16F:
204 return "layout(rg16f) ";
205 case PlsRGB10A2:
206 return "layout(rgb10_a2) ";
207 case PlsRGBA8:
208 return "layout(rgba8) ";
209 case PlsRG16:
210 return "layout(rg16) ";
211 case PlsRGBA8I:
212 return "layout(rgba8i)";
213 case PlsRG16I:
214 return "layout(rg16i) ";
215 case PlsRGB10A2UI:
216 return "layout(rgb10_a2ui) ";
217 case PlsRGBA8UI:
218 return "layout(rgba8ui) ";
219 case PlsRG16UI:
220 return "layout(rg16ui) ";
221 case PlsR32UI:
222 return "layout(r32ui) ";
223 default:
224 return "";
225 }
226}
227
228static std::pair<spv::Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format)
229{
230 switch (format)
231 {
232 default:
233 case PlsR11FG11FB10F:
234 case PlsR32F:
235 case PlsRG16F:
236 case PlsRGB10A2:
237 case PlsRGBA8:
238 case PlsRG16:
239 return std::make_pair(x: spv::OpTypeFloat, y: SPIRType::Float);
240
241 case PlsRGBA8I:
242 case PlsRG16I:
243 return std::make_pair(x: spv::OpTypeInt, y: SPIRType::Int);
244
245 case PlsRGB10A2UI:
246 case PlsRGBA8UI:
247 case PlsRG16UI:
248 case PlsR32UI:
249 return std::make_pair(x: spv::OpTypeInt, y: SPIRType::UInt);
250 }
251}
252
253static uint32_t pls_format_to_components(PlsFormat format)
254{
255 switch (format)
256 {
257 default:
258 case PlsR32F:
259 case PlsR32UI:
260 return 1;
261
262 case PlsRG16F:
263 case PlsRG16:
264 case PlsRG16UI:
265 case PlsRG16I:
266 return 2;
267
268 case PlsR11FG11FB10F:
269 return 3;
270
271 case PlsRGB10A2:
272 case PlsRGBA8:
273 case PlsRGBA8I:
274 case PlsRGB10A2UI:
275 case PlsRGBA8UI:
276 return 4;
277 }
278}
279
280const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
281{
282 static const char *const swizzle[4][4] = {
283 { ".x", ".y", ".z", ".w" },
284 { ".xy", ".yz", ".zw", nullptr },
285 { ".xyz", ".yzw", nullptr, nullptr },
286#if defined(__GNUC__) && (__GNUC__ == 9)
287 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
288 // This array ends up being compiled as all nullptrs, tripping the assertions below.
289 { "", nullptr, nullptr, "$" },
290#else
291 { "", nullptr, nullptr, nullptr },
292#endif
293 };
294
295 assert(vecsize >= 1 && vecsize <= 4);
296 assert(index >= 0 && index < 4);
297 assert(swizzle[vecsize - 1][index]);
298
299 return swizzle[vecsize - 1][index];
300}
301
302void CompilerGLSL::reset(uint32_t iteration_count)
303{
304 // Sanity check the iteration count to be robust against a certain class of bugs where
305 // we keep forcing recompilations without making clear forward progress.
306 // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
307 // Certain types of recompilations are considered to make forward progress,
308 // but in almost all situations, we'll never see more than 3 iterations.
309 // It is highly context-sensitive when we need to force recompilation,
310 // and it is not practical with the current architecture
311 // to resolve everything up front.
312 if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
313 SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
314
315 // We do some speculative optimizations which should pretty much always work out,
316 // but just in case the SPIR-V is rather weird, recompile until it's happy.
317 // This typically only means one extra pass.
318 clear_force_recompile();
319
320 // Clear invalid expression tracking.
321 invalid_expressions.clear();
322 composite_insert_overwritten.clear();
323 current_function = nullptr;
324
325 // Clear temporary usage tracking.
326 expression_usage_counts.clear();
327 forwarded_temporaries.clear();
328 suppressed_usage_tracking.clear();
329
330 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
331 flushed_phi_variables.clear();
332
333 current_emitting_switch_stack.clear();
334
335 reset_name_caches();
336
337 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) {
338 func.active = false;
339 func.flush_undeclared = true;
340 });
341
342 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
343
344 ir.reset_all_of_type<SPIRExpression>();
345 ir.reset_all_of_type<SPIRAccessChain>();
346
347 statement_count = 0;
348 indent = 0;
349 current_loop_level = 0;
350}
351
352void CompilerGLSL::remap_pls_variables()
353{
354 for (auto &input : pls_inputs)
355 {
356 auto &var = get<SPIRVariable>(id: input.id);
357
358 bool input_is_target = false;
359 if (var.storage == StorageClassUniformConstant)
360 {
361 auto &type = get<SPIRType>(id: var.basetype);
362 input_is_target = type.image.dim == DimSubpassData;
363 }
364
365 if (var.storage != StorageClassInput && !input_is_target)
366 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
367 var.remapped_variable = true;
368 }
369
370 for (auto &output : pls_outputs)
371 {
372 auto &var = get<SPIRVariable>(id: output.id);
373 if (var.storage != StorageClassOutput)
374 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
375 var.remapped_variable = true;
376 }
377}
378
379void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
380{
381 subpass_to_framebuffer_fetch_attachment.push_back(x: { input_attachment_index, color_location });
382 inout_color_attachments.push_back(x: { color_location, coherent });
383}
384
385bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
386{
387 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
388 pred: [&](const std::pair<uint32_t, bool> &elem) {
389 return elem.first == location;
390 }) != end(cont: inout_color_attachments);
391}
392
393bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
394{
395 return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments),
396 pred: [&](const std::pair<uint32_t, bool> &elem) {
397 return elem.first == location && !elem.second;
398 }) != end(cont: inout_color_attachments);
399}
400
401void CompilerGLSL::find_static_extensions()
402{
403 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
404 if (type.basetype == SPIRType::Double)
405 {
406 if (options.es)
407 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
408 if (!options.es && options.version < 400)
409 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
410 }
411 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
412 {
413 if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
414 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
415 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
416 }
417 else if (type.basetype == SPIRType::Half)
418 {
419 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_float16");
420 if (options.vulkan_semantics)
421 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
422 }
423 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
424 {
425 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int8");
426 if (options.vulkan_semantics)
427 require_extension_internal(ext: "GL_EXT_shader_8bit_storage");
428 }
429 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
430 {
431 require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int16");
432 if (options.vulkan_semantics)
433 require_extension_internal(ext: "GL_EXT_shader_16bit_storage");
434 }
435 });
436
437 auto &execution = get_entry_point();
438 switch (execution.model)
439 {
440 case ExecutionModelGLCompute:
441 if (!options.es && options.version < 430)
442 require_extension_internal(ext: "GL_ARB_compute_shader");
443 if (options.es && options.version < 310)
444 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
445 break;
446
447 case ExecutionModelGeometry:
448 if (options.es && options.version < 320)
449 require_extension_internal(ext: "GL_EXT_geometry_shader");
450 if (!options.es && options.version < 150)
451 require_extension_internal(ext: "GL_ARB_geometry_shader4");
452
453 if (execution.flags.get(bit: ExecutionModeInvocations) && execution.invocations != 1)
454 {
455 // Instanced GS is part of 400 core or this extension.
456 if (!options.es && options.version < 400)
457 require_extension_internal(ext: "GL_ARB_gpu_shader5");
458 }
459 break;
460
461 case ExecutionModelTessellationEvaluation:
462 case ExecutionModelTessellationControl:
463 if (options.es && options.version < 320)
464 require_extension_internal(ext: "GL_EXT_tessellation_shader");
465 if (!options.es && options.version < 400)
466 require_extension_internal(ext: "GL_ARB_tessellation_shader");
467 break;
468
469 case ExecutionModelRayGenerationKHR:
470 case ExecutionModelIntersectionKHR:
471 case ExecutionModelAnyHitKHR:
472 case ExecutionModelClosestHitKHR:
473 case ExecutionModelMissKHR:
474 case ExecutionModelCallableKHR:
475 // NV enums are aliases.
476 if (options.es || options.version < 460)
477 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
478 if (!options.vulkan_semantics)
479 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
480
481 // Need to figure out if we should target KHR or NV extension based on capabilities.
482 for (auto &cap : ir.declared_capabilities)
483 {
484 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
485 cap == CapabilityRayTraversalPrimitiveCullingKHR)
486 {
487 ray_tracing_is_khr = true;
488 break;
489 }
490 }
491
492 if (ray_tracing_is_khr)
493 {
494 // In KHR ray tracing we pass payloads by pointer instead of location,
495 // so make sure we assign locations properly.
496 ray_tracing_khr_fixup_locations();
497 require_extension_internal(ext: "GL_EXT_ray_tracing");
498 }
499 else
500 require_extension_internal(ext: "GL_NV_ray_tracing");
501 break;
502
503 case ExecutionModelMeshEXT:
504 case ExecutionModelTaskEXT:
505 if (options.es || options.version < 450)
506 SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
507 if (!options.vulkan_semantics)
508 SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
509 require_extension_internal(ext: "GL_EXT_mesh_shader");
510 break;
511
512 default:
513 break;
514 }
515
516 if (!pls_inputs.empty() || !pls_outputs.empty())
517 {
518 if (execution.model != ExecutionModelFragment)
519 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
520 require_extension_internal(ext: "GL_EXT_shader_pixel_local_storage");
521 }
522
523 if (!inout_color_attachments.empty())
524 {
525 if (execution.model != ExecutionModelFragment)
526 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
527 if (options.vulkan_semantics)
528 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
529
530 bool has_coherent = false;
531 bool has_incoherent = false;
532
533 for (auto &att : inout_color_attachments)
534 {
535 if (att.second)
536 has_coherent = true;
537 else
538 has_incoherent = true;
539 }
540
541 if (has_coherent)
542 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch");
543 if (has_incoherent)
544 require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch_non_coherent");
545 }
546
547 if (options.separate_shader_objects && !options.es && options.version < 410)
548 require_extension_internal(ext: "GL_ARB_separate_shader_objects");
549
550 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
551 {
552 if (!options.vulkan_semantics)
553 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
554 if (options.es && options.version < 320)
555 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
556 else if (!options.es && options.version < 450)
557 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
558 require_extension_internal(ext: "GL_EXT_buffer_reference2");
559 }
560 else if (ir.addressing_model != AddressingModelLogical)
561 {
562 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
563 }
564
565 // Check for nonuniform qualifier and passthrough.
566 // Instead of looping over all decorations to find this, just look at capabilities.
567 for (auto &cap : ir.declared_capabilities)
568 {
569 switch (cap)
570 {
571 case CapabilityShaderNonUniformEXT:
572 if (!options.vulkan_semantics)
573 require_extension_internal(ext: "GL_NV_gpu_shader5");
574 else
575 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
576 break;
577 case CapabilityRuntimeDescriptorArrayEXT:
578 if (!options.vulkan_semantics)
579 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
580 require_extension_internal(ext: "GL_EXT_nonuniform_qualifier");
581 break;
582
583 case CapabilityGeometryShaderPassthroughNV:
584 if (execution.model == ExecutionModelGeometry)
585 {
586 require_extension_internal(ext: "GL_NV_geometry_shader_passthrough");
587 execution.geometry_passthrough = true;
588 }
589 break;
590
591 case CapabilityVariablePointers:
592 case CapabilityVariablePointersStorageBuffer:
593 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
594
595 case CapabilityMultiView:
596 if (options.vulkan_semantics)
597 require_extension_internal(ext: "GL_EXT_multiview");
598 else
599 {
600 require_extension_internal(ext: "GL_OVR_multiview2");
601 if (options.ovr_multiview_view_count == 0)
602 SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
603 if (get_execution_model() != ExecutionModelVertex)
604 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
605 }
606 break;
607
608 case CapabilityRayQueryKHR:
609 if (options.es || options.version < 460 || !options.vulkan_semantics)
610 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
611 require_extension_internal(ext: "GL_EXT_ray_query");
612 ray_tracing_is_khr = true;
613 break;
614
615 case CapabilityRayTraversalPrimitiveCullingKHR:
616 if (options.es || options.version < 460 || !options.vulkan_semantics)
617 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
618 require_extension_internal(ext: "GL_EXT_ray_flags_primitive_culling");
619 ray_tracing_is_khr = true;
620 break;
621
622 default:
623 break;
624 }
625 }
626
627 if (options.ovr_multiview_view_count)
628 {
629 if (options.vulkan_semantics)
630 SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
631 if (get_execution_model() != ExecutionModelVertex)
632 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
633 require_extension_internal(ext: "GL_OVR_multiview2");
634 }
635
636 // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
637 for (auto &ext : ir.declared_extensions)
638 if (ext == "SPV_NV_fragment_shader_barycentric")
639 barycentric_is_nv = true;
640}
641
642void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
643{
644 uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ?
645 required_polyfills_relaxed : required_polyfills;
646
647 if ((polyfills & polyfill) == 0)
648 {
649 polyfills |= polyfill;
650 force_recompile();
651 }
652}
653
654void CompilerGLSL::ray_tracing_khr_fixup_locations()
655{
656 uint32_t location = 0;
657 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
658 // Incoming payload storage can also be used for tracing.
659 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
660 var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
661 return;
662 if (is_hidden_variable(var))
663 return;
664 set_decoration(id: var.self, decoration: DecorationLocation, argument: location++);
665 });
666}
667
668string CompilerGLSL::compile()
669{
670 ir.fixup_reserved_names();
671
672 if (!options.vulkan_semantics)
673 {
674 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
675 backend.nonuniform_qualifier = "";
676 backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
677 }
678 backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
679 backend.force_gl_in_out_block = true;
680 backend.supports_extensions = true;
681 backend.use_array_constructor = true;
682 backend.workgroup_size_is_hidden = true;
683 backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
684 backend.support_precise_qualifier =
685 (!options.es && options.version >= 400) || (options.es && options.version >= 320);
686
687 if (is_legacy_es())
688 backend.support_case_fallthrough = false;
689
690 // Scan the SPIR-V to find trivial uses of extensions.
691 fixup_anonymous_struct_names();
692 fixup_type_alias();
693 reorder_type_alias();
694 build_function_control_flow_graphs_and_analyze();
695 find_static_extensions();
696 fixup_image_load_store_access();
697 update_active_builtins();
698 analyze_image_and_sampler_usage();
699 analyze_interlocked_resource_usage();
700 if (!inout_color_attachments.empty())
701 emit_inout_fragment_outputs_copy_to_subpass_inputs();
702
703 // Shaders might cast unrelated data to pointers of non-block types.
704 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
705 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
706 analyze_non_block_pointer_types();
707
708 uint32_t pass_count = 0;
709 do
710 {
711 reset(iteration_count: pass_count);
712
713 buffer.reset();
714
715 emit_header();
716 emit_resources();
717 emit_extension_workarounds(model: get_execution_model());
718
719 if (required_polyfills != 0)
720 emit_polyfills(polyfills: required_polyfills, relaxed: false);
721 if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0)
722 emit_polyfills(polyfills: required_polyfills_relaxed, relaxed: true);
723
724 emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset());
725
726 pass_count++;
727 } while (is_forcing_recompilation());
728
729 // Implement the interlocked wrapper function at the end.
730 // The body was implemented in lieu of main().
731 if (interlocked_is_complex)
732 {
733 statement(ts: "void main()");
734 begin_scope();
735 statement(ts: "// Interlocks were used in a way not compatible with GLSL, this is very slow.");
736 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
737 statement(ts: "spvMainInterlockedBody();");
738 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
739 end_scope();
740 }
741
742 // Entry point in GLSL is always main().
743 get_entry_point().name = "main";
744
745 return buffer.str();
746}
747
748std::string CompilerGLSL::get_partial_source()
749{
750 return buffer.str();
751}
752
753void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
754 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
755{
756 auto &execution = get_entry_point();
757 bool builtin_workgroup = execution.workgroup_size.constant != 0;
758 bool use_local_size_id = !builtin_workgroup && execution.flags.get(bit: ExecutionModeLocalSizeId);
759
760 if (wg_x.id)
761 {
762 if (options.vulkan_semantics)
763 arguments.push_back(t: join(ts: "local_size_x_id = ", ts: wg_x.constant_id));
764 else
765 arguments.push_back(t: join(ts: "local_size_x = ", ts&: get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name));
766 }
767 else if (use_local_size_id && execution.workgroup_size.id_x)
768 arguments.push_back(t: join(ts: "local_size_x = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar()));
769 else
770 arguments.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
771
772 if (wg_y.id)
773 {
774 if (options.vulkan_semantics)
775 arguments.push_back(t: join(ts: "local_size_y_id = ", ts: wg_y.constant_id));
776 else
777 arguments.push_back(t: join(ts: "local_size_y = ", ts&: get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name));
778 }
779 else if (use_local_size_id && execution.workgroup_size.id_y)
780 arguments.push_back(t: join(ts: "local_size_y = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar()));
781 else
782 arguments.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
783
784 if (wg_z.id)
785 {
786 if (options.vulkan_semantics)
787 arguments.push_back(t: join(ts: "local_size_z_id = ", ts: wg_z.constant_id));
788 else
789 arguments.push_back(t: join(ts: "local_size_z = ", ts&: get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name));
790 }
791 else if (use_local_size_id && execution.workgroup_size.id_z)
792 arguments.push_back(t: join(ts: "local_size_z = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar()));
793 else
794 arguments.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
795}
796
797void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
798{
799 if (options.vulkan_semantics)
800 {
801 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
802 require_extension_internal(ext: ShaderSubgroupSupportHelper::get_extension_name(c: khr_extension));
803 }
804 else
805 {
806 if (!shader_subgroup_supporter.is_feature_requested(feature))
807 force_recompile();
808 shader_subgroup_supporter.request_feature(feature);
809 }
810}
811
812void CompilerGLSL::emit_header()
813{
814 auto &execution = get_entry_point();
815 statement(ts: "#version ", ts&: options.version, ts: options.es && options.version > 100 ? " es" : "");
816
817 if (!options.es && options.version < 420)
818 {
819 // Needed for binding = # on UBOs, etc.
820 if (options.enable_420pack_extension)
821 {
822 statement(ts: "#ifdef GL_ARB_shading_language_420pack");
823 statement(ts: "#extension GL_ARB_shading_language_420pack : require");
824 statement(ts: "#endif");
825 }
826 // Needed for: layout(early_fragment_tests) in;
827 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
828 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
829 }
830
831 // Needed for: layout(post_depth_coverage) in;
832 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
833 require_extension_internal(ext: "GL_ARB_post_depth_coverage");
834
835 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
836 bool interlock_used = execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT) ||
837 execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) ||
838 execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) ||
839 execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT);
840
841 if (interlock_used)
842 {
843 if (options.es)
844 {
845 if (options.version < 310)
846 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
847 require_extension_internal(ext: "GL_NV_fragment_shader_interlock");
848 }
849 else
850 {
851 if (options.version < 420)
852 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
853 require_extension_internal(ext: "GL_ARB_fragment_shader_interlock");
854 }
855 }
856
857 for (auto &ext : forced_extensions)
858 {
859 if (ext == "GL_ARB_gpu_shader_int64")
860 {
861 statement(ts: "#if defined(GL_ARB_gpu_shader_int64)");
862 statement(ts: "#extension GL_ARB_gpu_shader_int64 : require");
863 if (!options.vulkan_semantics || options.es)
864 {
865 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
866 statement(ts: "#extension GL_NV_gpu_shader5 : require");
867 }
868 statement(ts: "#else");
869 statement(ts: "#error No extension available for 64-bit integers.");
870 statement(ts: "#endif");
871 }
872 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
873 {
874 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
875 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
876 statement(ts: "#if defined(GL_AMD_gpu_shader_half_float)");
877 statement(ts: "#extension GL_AMD_gpu_shader_half_float : require");
878 if (!options.vulkan_semantics)
879 {
880 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
881 statement(ts: "#extension GL_NV_gpu_shader5 : require");
882 }
883 else
884 {
885 statement(ts: "#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
886 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
887 }
888 statement(ts: "#else");
889 statement(ts: "#error No extension available for FP16.");
890 statement(ts: "#endif");
891 }
892 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
893 {
894 if (options.vulkan_semantics)
895 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
896 else
897 {
898 statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
899 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
900 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
901 statement(ts: "#extension GL_NV_gpu_shader5 : require");
902 statement(ts: "#else");
903 statement(ts: "#error No extension available for Int8.");
904 statement(ts: "#endif");
905 }
906 }
907 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
908 {
909 if (options.vulkan_semantics)
910 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
911 else
912 {
913 statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
914 statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
915 statement(ts: "#elif defined(GL_AMD_gpu_shader_int16)");
916 statement(ts: "#extension GL_AMD_gpu_shader_int16 : require");
917 statement(ts: "#elif defined(GL_NV_gpu_shader5)");
918 statement(ts: "#extension GL_NV_gpu_shader5 : require");
919 statement(ts: "#else");
920 statement(ts: "#error No extension available for Int16.");
921 statement(ts: "#endif");
922 }
923 }
924 else if (ext == "GL_ARB_post_depth_coverage")
925 {
926 if (options.es)
927 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
928 else
929 {
930 statement(ts: "#if defined(GL_ARB_post_depth_coverge)");
931 statement(ts: "#extension GL_ARB_post_depth_coverage : require");
932 statement(ts: "#else");
933 statement(ts: "#extension GL_EXT_post_depth_coverage : require");
934 statement(ts: "#endif");
935 }
936 }
937 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
938 {
939 // Soft-enable this extension on plain GLSL.
940 statement(ts: "#ifdef ", ts&: ext);
941 statement(ts: "#extension ", ts&: ext, ts: " : enable");
942 statement(ts: "#endif");
943 }
944 else if (ext == "GL_EXT_control_flow_attributes")
945 {
946 // These are just hints so we can conditionally enable and fallback in the shader.
947 statement(ts: "#if defined(GL_EXT_control_flow_attributes)");
948 statement(ts: "#extension GL_EXT_control_flow_attributes : require");
949 statement(ts: "#define SPIRV_CROSS_FLATTEN [[flatten]]");
950 statement(ts: "#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
951 statement(ts: "#define SPIRV_CROSS_UNROLL [[unroll]]");
952 statement(ts: "#define SPIRV_CROSS_LOOP [[dont_unroll]]");
953 statement(ts: "#else");
954 statement(ts: "#define SPIRV_CROSS_FLATTEN");
955 statement(ts: "#define SPIRV_CROSS_BRANCH");
956 statement(ts: "#define SPIRV_CROSS_UNROLL");
957 statement(ts: "#define SPIRV_CROSS_LOOP");
958 statement(ts: "#endif");
959 }
960 else if (ext == "GL_NV_fragment_shader_interlock")
961 {
962 statement(ts: "#extension GL_NV_fragment_shader_interlock : require");
963 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
964 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
965 }
966 else if (ext == "GL_ARB_fragment_shader_interlock")
967 {
968 statement(ts: "#ifdef GL_ARB_fragment_shader_interlock");
969 statement(ts: "#extension GL_ARB_fragment_shader_interlock : enable");
970 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
971 statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
972 statement(ts: "#elif defined(GL_INTEL_fragment_shader_ordering)");
973 statement(ts: "#extension GL_INTEL_fragment_shader_ordering : enable");
974 statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
975 statement(ts: "#define SPIRV_Cross_endInvocationInterlock()");
976 statement(ts: "#endif");
977 }
978 else
979 statement(ts: "#extension ", ts&: ext, ts: " : require");
980 }
981
982 if (!options.vulkan_semantics)
983 {
984 using Supp = ShaderSubgroupSupportHelper;
985 auto result = shader_subgroup_supporter.resolve();
986
987 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
988 {
989 auto feature = static_cast<Supp::Feature>(feature_index);
990 if (!shader_subgroup_supporter.is_feature_requested(feature))
991 continue;
992
993 auto exts = Supp::get_candidates_for_feature(ft: feature, r: result);
994 if (exts.empty())
995 continue;
996
997 statement(ts: "");
998
999 for (auto &ext : exts)
1000 {
1001 const char *name = Supp::get_extension_name(c: ext);
1002 const char *extra_predicate = Supp::get_extra_required_extension_predicate(c: ext);
1003 auto extra_names = Supp::get_extra_required_extension_names(c: ext);
1004 statement(ts: &ext != &exts.front() ? "#elif" : "#if", ts: " defined(", ts&: name, ts: ")",
1005 ts: (*extra_predicate != '\0' ? " && " : ""), ts&: extra_predicate);
1006 for (const auto &e : extra_names)
1007 statement(ts: "#extension ", ts: e, ts: " : enable");
1008 statement(ts: "#extension ", ts&: name, ts: " : require");
1009 }
1010
1011 if (!Supp::can_feature_be_implemented_without_extensions(feature))
1012 {
1013 statement(ts: "#else");
1014 statement(ts: "#error No extensions available to emulate requested subgroup feature.");
1015 }
1016
1017 statement(ts: "#endif");
1018 }
1019 }
1020
1021 for (auto &header : header_lines)
1022 statement(ts&: header);
1023
1024 SmallVector<string> inputs;
1025 SmallVector<string> outputs;
1026
1027 switch (execution.model)
1028 {
1029 case ExecutionModelVertex:
1030 if (options.ovr_multiview_view_count)
1031 inputs.push_back(t: join(ts: "num_views = ", ts&: options.ovr_multiview_view_count));
1032 break;
1033 case ExecutionModelGeometry:
1034 if ((execution.flags.get(bit: ExecutionModeInvocations)) && execution.invocations != 1)
1035 inputs.push_back(t: join(ts: "invocations = ", ts&: execution.invocations));
1036 if (execution.flags.get(bit: ExecutionModeInputPoints))
1037 inputs.push_back(t: "points");
1038 if (execution.flags.get(bit: ExecutionModeInputLines))
1039 inputs.push_back(t: "lines");
1040 if (execution.flags.get(bit: ExecutionModeInputLinesAdjacency))
1041 inputs.push_back(t: "lines_adjacency");
1042 if (execution.flags.get(bit: ExecutionModeTriangles))
1043 inputs.push_back(t: "triangles");
1044 if (execution.flags.get(bit: ExecutionModeInputTrianglesAdjacency))
1045 inputs.push_back(t: "triangles_adjacency");
1046
1047 if (!execution.geometry_passthrough)
1048 {
1049 // For passthrough, these are implies and cannot be declared in shader.
1050 outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices));
1051 if (execution.flags.get(bit: ExecutionModeOutputTriangleStrip))
1052 outputs.push_back(t: "triangle_strip");
1053 if (execution.flags.get(bit: ExecutionModeOutputPoints))
1054 outputs.push_back(t: "points");
1055 if (execution.flags.get(bit: ExecutionModeOutputLineStrip))
1056 outputs.push_back(t: "line_strip");
1057 }
1058 break;
1059
1060 case ExecutionModelTessellationControl:
1061 if (execution.flags.get(bit: ExecutionModeOutputVertices))
1062 outputs.push_back(t: join(ts: "vertices = ", ts&: execution.output_vertices));
1063 break;
1064
1065 case ExecutionModelTessellationEvaluation:
1066 if (execution.flags.get(bit: ExecutionModeQuads))
1067 inputs.push_back(t: "quads");
1068 if (execution.flags.get(bit: ExecutionModeTriangles))
1069 inputs.push_back(t: "triangles");
1070 if (execution.flags.get(bit: ExecutionModeIsolines))
1071 inputs.push_back(t: "isolines");
1072 if (execution.flags.get(bit: ExecutionModePointMode))
1073 inputs.push_back(t: "point_mode");
1074
1075 if (!execution.flags.get(bit: ExecutionModeIsolines))
1076 {
1077 if (execution.flags.get(bit: ExecutionModeVertexOrderCw))
1078 inputs.push_back(t: "cw");
1079 if (execution.flags.get(bit: ExecutionModeVertexOrderCcw))
1080 inputs.push_back(t: "ccw");
1081 }
1082
1083 if (execution.flags.get(bit: ExecutionModeSpacingFractionalEven))
1084 inputs.push_back(t: "fractional_even_spacing");
1085 if (execution.flags.get(bit: ExecutionModeSpacingFractionalOdd))
1086 inputs.push_back(t: "fractional_odd_spacing");
1087 if (execution.flags.get(bit: ExecutionModeSpacingEqual))
1088 inputs.push_back(t: "equal_spacing");
1089 break;
1090
1091 case ExecutionModelGLCompute:
1092 case ExecutionModelTaskEXT:
1093 case ExecutionModelMeshEXT:
1094 {
1095 if (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId))
1096 {
1097 SpecializationConstant wg_x, wg_y, wg_z;
1098 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
1099
1100 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1101 // declarations before we can emit the work group size.
1102 if (options.vulkan_semantics ||
1103 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1104 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
1105 }
1106 else
1107 {
1108 inputs.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x));
1109 inputs.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y));
1110 inputs.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z));
1111 }
1112
1113 if (execution.model == ExecutionModelMeshEXT)
1114 {
1115 outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices));
1116 outputs.push_back(t: join(ts: "max_primitives = ", ts&: execution.output_primitives));
1117 if (execution.flags.get(bit: ExecutionModeOutputTrianglesEXT))
1118 outputs.push_back(t: "triangles");
1119 else if (execution.flags.get(bit: ExecutionModeOutputLinesEXT))
1120 outputs.push_back(t: "lines");
1121 else if (execution.flags.get(bit: ExecutionModeOutputPoints))
1122 outputs.push_back(t: "points");
1123 }
1124 break;
1125 }
1126
1127 case ExecutionModelFragment:
1128 if (options.es)
1129 {
1130 switch (options.fragment.default_float_precision)
1131 {
1132 case Options::Lowp:
1133 statement(ts: "precision lowp float;");
1134 break;
1135
1136 case Options::Mediump:
1137 statement(ts: "precision mediump float;");
1138 break;
1139
1140 case Options::Highp:
1141 statement(ts: "precision highp float;");
1142 break;
1143
1144 default:
1145 break;
1146 }
1147
1148 switch (options.fragment.default_int_precision)
1149 {
1150 case Options::Lowp:
1151 statement(ts: "precision lowp int;");
1152 break;
1153
1154 case Options::Mediump:
1155 statement(ts: "precision mediump int;");
1156 break;
1157
1158 case Options::Highp:
1159 statement(ts: "precision highp int;");
1160 break;
1161
1162 default:
1163 break;
1164 }
1165 }
1166
1167 if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests))
1168 inputs.push_back(t: "early_fragment_tests");
1169 if (execution.flags.get(bit: ExecutionModePostDepthCoverage))
1170 inputs.push_back(t: "post_depth_coverage");
1171
1172 if (interlock_used)
1173 statement(ts: "#if defined(GL_ARB_fragment_shader_interlock)");
1174
1175 if (execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT))
1176 statement(ts: "layout(pixel_interlock_ordered) in;");
1177 else if (execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT))
1178 statement(ts: "layout(pixel_interlock_unordered) in;");
1179 else if (execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT))
1180 statement(ts: "layout(sample_interlock_ordered) in;");
1181 else if (execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT))
1182 statement(ts: "layout(sample_interlock_unordered) in;");
1183
1184 if (interlock_used)
1185 {
1186 statement(ts: "#elif !defined(GL_INTEL_fragment_shader_ordering)");
1187 statement(ts: "#error Fragment Shader Interlock/Ordering extension missing!");
1188 statement(ts: "#endif");
1189 }
1190
1191 if (!options.es && execution.flags.get(bit: ExecutionModeDepthGreater))
1192 statement(ts: "layout(depth_greater) out float gl_FragDepth;");
1193 else if (!options.es && execution.flags.get(bit: ExecutionModeDepthLess))
1194 statement(ts: "layout(depth_less) out float gl_FragDepth;");
1195
1196 break;
1197
1198 default:
1199 break;
1200 }
1201
1202 for (auto &cap : ir.declared_capabilities)
1203 if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1204 statement(ts: "layout(primitive_culling);");
1205
1206 if (!inputs.empty())
1207 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
1208 if (!outputs.empty())
1209 statement(ts: "layout(", ts: merge(list: outputs), ts: ") out;");
1210
1211 statement(ts: "");
1212}
1213
1214bool CompilerGLSL::type_is_empty(const SPIRType &type)
1215{
1216 return type.basetype == SPIRType::Struct && type.member_types.empty();
1217}
1218
1219void CompilerGLSL::emit_struct(SPIRType &type)
1220{
1221 // Struct types can be stamped out multiple times
1222 // with just different offsets, matrix layouts, etc ...
1223 // Type-punning with these types is legal, which complicates things
1224 // when we are storing struct and array types in an SSBO for example.
1225 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1226 if (type.type_alias != TypeID(0) &&
1227 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
1228 return;
1229
1230 add_resource_name(id: type.self);
1231 auto name = type_to_glsl(type);
1232
1233 statement(ts: !backend.explicit_struct_type ? "struct " : "", ts&: name);
1234 begin_scope();
1235
1236 type.member_name_cache.clear();
1237
1238 uint32_t i = 0;
1239 bool emitted = false;
1240 for (auto &member : type.member_types)
1241 {
1242 add_member_name(type, name: i);
1243 emit_struct_member(type, member_type_id: member, index: i);
1244 i++;
1245 emitted = true;
1246 }
1247
1248 // Don't declare empty structs in GLSL, this is not allowed.
1249 if (type_is_empty(type) && !backend.supports_empty_struct)
1250 {
1251 statement(ts: "int empty_struct_member;");
1252 emitted = true;
1253 }
1254
1255 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationPaddingTarget))
1256 emit_struct_padding_target(type);
1257
1258 end_scope_decl();
1259
1260 if (emitted)
1261 statement(ts: "");
1262}
1263
1264string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1265{
1266 string res;
1267 //if (flags & (1ull << DecorationSmooth))
1268 // res += "smooth ";
1269 if (flags.get(bit: DecorationFlat))
1270 res += "flat ";
1271 if (flags.get(bit: DecorationNoPerspective))
1272 {
1273 if (options.es)
1274 {
1275 if (options.version < 300)
1276 SPIRV_CROSS_THROW("noperspective requires ESSL 300.");
1277 require_extension_internal(ext: "GL_NV_shader_noperspective_interpolation");
1278 }
1279 else if (is_legacy_desktop())
1280 require_extension_internal(ext: "GL_EXT_gpu_shader4");
1281 res += "noperspective ";
1282 }
1283 if (flags.get(bit: DecorationCentroid))
1284 res += "centroid ";
1285 if (flags.get(bit: DecorationPatch))
1286 res += "patch ";
1287 if (flags.get(bit: DecorationSample))
1288 {
1289 if (options.es)
1290 {
1291 if (options.version < 300)
1292 SPIRV_CROSS_THROW("sample requires ESSL 300.");
1293 else if (options.version < 320)
1294 require_extension_internal(ext: "GL_OES_shader_multisample_interpolation");
1295 }
1296 res += "sample ";
1297 }
1298 if (flags.get(bit: DecorationInvariant) && (options.es || options.version >= 120))
1299 res += "invariant ";
1300 if (flags.get(bit: DecorationPerPrimitiveEXT))
1301 {
1302 res += "perprimitiveEXT ";
1303 require_extension_internal(ext: "GL_EXT_mesh_shader");
1304 }
1305
1306 if (flags.get(bit: DecorationExplicitInterpAMD))
1307 {
1308 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
1309 res += "__explicitInterpAMD ";
1310 }
1311
1312 if (flags.get(bit: DecorationPerVertexKHR))
1313 {
1314 if (options.es && options.version < 320)
1315 SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
1316 else if (!options.es && options.version < 450)
1317 SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
1318
1319 if (barycentric_is_nv)
1320 {
1321 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
1322 res += "pervertexNV ";
1323 }
1324 else
1325 {
1326 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
1327 res += "pervertexEXT ";
1328 }
1329 }
1330
1331 return res;
1332}
1333
1334string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1335{
1336 if (is_legacy())
1337 return "";
1338
1339 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1340 if (!is_block)
1341 return "";
1342
1343 auto &memb = ir.meta[type.self].members;
1344 if (index >= memb.size())
1345 return "";
1346 auto &dec = memb[index];
1347
1348 SmallVector<string> attr;
1349
1350 if (has_member_decoration(id: type.self, index, decoration: DecorationPassthroughNV))
1351 attr.push_back(t: "passthrough");
1352
1353 // We can only apply layouts on members in block interfaces.
1354 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1355 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1356 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1357 //
1358 // We would like to go from (SPIR-V style):
1359 //
1360 // struct Foo { layout(row_major) mat4 matrix; };
1361 // buffer UBO { Foo foo; };
1362 //
1363 // to
1364 //
1365 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1366 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1367 auto flags = combined_decoration_for_member(type, index);
1368
1369 if (flags.get(bit: DecorationRowMajor))
1370 attr.push_back(t: "row_major");
1371 // We don't emit any global layouts, so column_major is default.
1372 //if (flags & (1ull << DecorationColMajor))
1373 // attr.push_back("column_major");
1374
1375 if (dec.decoration_flags.get(bit: DecorationLocation) && can_use_io_location(storage: type.storage, block: true))
1376 attr.push_back(t: join(ts: "location = ", ts&: dec.location));
1377
1378 // Can only declare component if we can declare location.
1379 if (dec.decoration_flags.get(bit: DecorationComponent) && can_use_io_location(storage: type.storage, block: true))
1380 {
1381 if (!options.es)
1382 {
1383 if (options.version < 440 && options.version >= 140)
1384 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
1385 else if (options.version < 140)
1386 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1387 attr.push_back(t: join(ts: "component = ", ts&: dec.component));
1388 }
1389 else
1390 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1391 }
1392
1393 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1394 // This is only done selectively in GLSL as needed.
1395 if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) &&
1396 dec.decoration_flags.get(bit: DecorationOffset))
1397 attr.push_back(t: join(ts: "offset = ", ts&: dec.offset));
1398 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(bit: DecorationOffset))
1399 attr.push_back(t: join(ts: "xfb_offset = ", ts&: dec.offset));
1400
1401 if (attr.empty())
1402 return "";
1403
1404 string res = "layout(";
1405 res += merge(list: attr);
1406 res += ") ";
1407 return res;
1408}
1409
1410const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1411{
1412 if (options.es && is_desktop_only_format(format))
1413 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1414
1415 switch (format)
1416 {
1417 case ImageFormatRgba32f:
1418 return "rgba32f";
1419 case ImageFormatRgba16f:
1420 return "rgba16f";
1421 case ImageFormatR32f:
1422 return "r32f";
1423 case ImageFormatRgba8:
1424 return "rgba8";
1425 case ImageFormatRgba8Snorm:
1426 return "rgba8_snorm";
1427 case ImageFormatRg32f:
1428 return "rg32f";
1429 case ImageFormatRg16f:
1430 return "rg16f";
1431 case ImageFormatRgba32i:
1432 return "rgba32i";
1433 case ImageFormatRgba16i:
1434 return "rgba16i";
1435 case ImageFormatR32i:
1436 return "r32i";
1437 case ImageFormatRgba8i:
1438 return "rgba8i";
1439 case ImageFormatRg32i:
1440 return "rg32i";
1441 case ImageFormatRg16i:
1442 return "rg16i";
1443 case ImageFormatRgba32ui:
1444 return "rgba32ui";
1445 case ImageFormatRgba16ui:
1446 return "rgba16ui";
1447 case ImageFormatR32ui:
1448 return "r32ui";
1449 case ImageFormatRgba8ui:
1450 return "rgba8ui";
1451 case ImageFormatRg32ui:
1452 return "rg32ui";
1453 case ImageFormatRg16ui:
1454 return "rg16ui";
1455 case ImageFormatR11fG11fB10f:
1456 return "r11f_g11f_b10f";
1457 case ImageFormatR16f:
1458 return "r16f";
1459 case ImageFormatRgb10A2:
1460 return "rgb10_a2";
1461 case ImageFormatR8:
1462 return "r8";
1463 case ImageFormatRg8:
1464 return "rg8";
1465 case ImageFormatR16:
1466 return "r16";
1467 case ImageFormatRg16:
1468 return "rg16";
1469 case ImageFormatRgba16:
1470 return "rgba16";
1471 case ImageFormatR16Snorm:
1472 return "r16_snorm";
1473 case ImageFormatRg16Snorm:
1474 return "rg16_snorm";
1475 case ImageFormatRgba16Snorm:
1476 return "rgba16_snorm";
1477 case ImageFormatR8Snorm:
1478 return "r8_snorm";
1479 case ImageFormatRg8Snorm:
1480 return "rg8_snorm";
1481 case ImageFormatR8ui:
1482 return "r8ui";
1483 case ImageFormatRg8ui:
1484 return "rg8ui";
1485 case ImageFormatR16ui:
1486 return "r16ui";
1487 case ImageFormatRgb10a2ui:
1488 return "rgb10_a2ui";
1489 case ImageFormatR8i:
1490 return "r8i";
1491 case ImageFormatRg8i:
1492 return "rg8i";
1493 case ImageFormatR16i:
1494 return "r16i";
1495 case ImageFormatR64i:
1496 return "r64i";
1497 case ImageFormatR64ui:
1498 return "r64ui";
1499 default:
1500 case ImageFormatUnknown:
1501 return nullptr;
1502 }
1503}
1504
1505uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1506{
1507 switch (type.basetype)
1508 {
1509 case SPIRType::Double:
1510 case SPIRType::Int64:
1511 case SPIRType::UInt64:
1512 return 8;
1513 case SPIRType::Float:
1514 case SPIRType::Int:
1515 case SPIRType::UInt:
1516 return 4;
1517 case SPIRType::Half:
1518 case SPIRType::Short:
1519 case SPIRType::UShort:
1520 return 2;
1521 case SPIRType::SByte:
1522 case SPIRType::UByte:
1523 return 1;
1524
1525 default:
1526 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1527 }
1528}
1529
1530uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1531 BufferPackingStandard packing)
1532{
1533 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1534 // and is 64-bit.
1535 if (is_physical_pointer(type))
1536 {
1537 if (!type.pointer)
1538 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1539
1540 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1541 {
1542 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1543 return 16;
1544 else
1545 return 8;
1546 }
1547 else
1548 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1549 }
1550 else if (is_array(type))
1551 {
1552 uint32_t minimum_alignment = 1;
1553 if (packing_is_vec4_padded(packing))
1554 minimum_alignment = 16;
1555
1556 auto *tmp = &get<SPIRType>(id: type.parent_type);
1557 while (!tmp->array.empty())
1558 tmp = &get<SPIRType>(id: tmp->parent_type);
1559
1560 // Get the alignment of the base type, then maybe round up.
1561 return max(a: minimum_alignment, b: type_to_packed_alignment(type: *tmp, flags, packing));
1562 }
1563
1564 if (type.basetype == SPIRType::Struct)
1565 {
1566 // Rule 9. Structs alignments are maximum alignment of its members.
1567 uint32_t alignment = 1;
1568 for (uint32_t i = 0; i < type.member_types.size(); i++)
1569 {
1570 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1571 alignment =
1572 max(a: alignment, b: type_to_packed_alignment(type: get<SPIRType>(id: type.member_types[i]), flags: member_flags, packing));
1573 }
1574
1575 // In std140, struct alignment is rounded up to 16.
1576 if (packing_is_vec4_padded(packing))
1577 alignment = max<uint32_t>(a: alignment, b: 16u);
1578
1579 return alignment;
1580 }
1581 else
1582 {
1583 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1584
1585 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1586 if (packing_is_scalar(packing))
1587 return base_alignment;
1588
1589 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1590 // a vec4, this is handled outside since that part knows our current offset.
1591 if (type.columns == 1 && packing_is_hlsl(packing))
1592 return base_alignment;
1593
1594 // From 7.6.2.2 in GL 4.5 core spec.
1595 // Rule 1
1596 if (type.vecsize == 1 && type.columns == 1)
1597 return base_alignment;
1598
1599 // Rule 2
1600 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1601 return type.vecsize * base_alignment;
1602
1603 // Rule 3
1604 if (type.vecsize == 3 && type.columns == 1)
1605 return 4 * base_alignment;
1606
1607 // Rule 4 implied. Alignment does not change in std430.
1608
1609 // Rule 5. Column-major matrices are stored as arrays of
1610 // vectors.
1611 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1612 {
1613 if (packing_is_vec4_padded(packing))
1614 return 4 * base_alignment;
1615 else if (type.vecsize == 3)
1616 return 4 * base_alignment;
1617 else
1618 return type.vecsize * base_alignment;
1619 }
1620
1621 // Rule 6 implied.
1622
1623 // Rule 7.
1624 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1625 {
1626 if (packing_is_vec4_padded(packing))
1627 return 4 * base_alignment;
1628 else if (type.columns == 3)
1629 return 4 * base_alignment;
1630 else
1631 return type.columns * base_alignment;
1632 }
1633
1634 // Rule 8 implied.
1635 }
1636
1637 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1638}
1639
1640uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1641 BufferPackingStandard packing)
1642{
1643 // Array stride is equal to aligned size of the underlying type.
1644 uint32_t parent = type.parent_type;
1645 assert(parent);
1646
1647 auto &tmp = get<SPIRType>(id: parent);
1648
1649 uint32_t size = type_to_packed_size(type: tmp, flags, packing);
1650 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1651 return (size + alignment - 1) & ~(alignment - 1);
1652}
1653
1654uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1655{
1656 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1657 // and is 64-bit.
1658 if (is_physical_pointer(type))
1659 {
1660 if (!type.pointer)
1661 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1662
1663 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1664 return 8;
1665 else
1666 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1667 }
1668 else if (is_array(type))
1669 {
1670 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1671
1672 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1673 // so that it is possible to pack other vectors into the last element.
1674 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1675 packed_size -= (4 - type.vecsize) * (type.width / 8);
1676
1677 return packed_size;
1678 }
1679
1680 uint32_t size = 0;
1681
1682 if (type.basetype == SPIRType::Struct)
1683 {
1684 uint32_t pad_alignment = 1;
1685
1686 for (uint32_t i = 0; i < type.member_types.size(); i++)
1687 {
1688 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1689 auto &member_type = get<SPIRType>(id: type.member_types[i]);
1690
1691 uint32_t packed_alignment = type_to_packed_alignment(type: member_type, flags: member_flags, packing);
1692 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1693
1694 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1695 // GL 4.5 spec, 7.6.2.2.
1696 if (member_type.basetype == SPIRType::Struct)
1697 pad_alignment = packed_alignment;
1698 else
1699 pad_alignment = 1;
1700
1701 size = (size + alignment - 1) & ~(alignment - 1);
1702 size += type_to_packed_size(type: member_type, flags: member_flags, packing);
1703 }
1704 }
1705 else
1706 {
1707 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1708
1709 if (packing_is_scalar(packing))
1710 {
1711 size = type.vecsize * type.columns * base_alignment;
1712 }
1713 else
1714 {
1715 if (type.columns == 1)
1716 size = type.vecsize * base_alignment;
1717
1718 if (flags.get(bit: DecorationColMajor) && type.columns > 1)
1719 {
1720 if (packing_is_vec4_padded(packing))
1721 size = type.columns * 4 * base_alignment;
1722 else if (type.vecsize == 3)
1723 size = type.columns * 4 * base_alignment;
1724 else
1725 size = type.columns * type.vecsize * base_alignment;
1726 }
1727
1728 if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1)
1729 {
1730 if (packing_is_vec4_padded(packing))
1731 size = type.vecsize * 4 * base_alignment;
1732 else if (type.columns == 3)
1733 size = type.vecsize * 4 * base_alignment;
1734 else
1735 size = type.vecsize * type.columns * base_alignment;
1736 }
1737
1738 // For matrices in HLSL, the last element has a size which depends on its vector size,
1739 // so that it is possible to pack other vectors into the last element.
1740 if (packing_is_hlsl(packing) && type.columns > 1)
1741 size -= (4 - type.vecsize) * (type.width / 8);
1742 }
1743 }
1744
1745 return size;
1746}
1747
1748bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1749 uint32_t *failed_validation_index, uint32_t start_offset,
1750 uint32_t end_offset)
1751{
1752 // This is very tricky and error prone, but try to be exhaustive and correct here.
1753 // SPIR-V doesn't directly say if we're using std430 or std140.
1754 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1755 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1756 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1757 //
1758 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1759 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1760 //
1761 // The only two differences between std140 and std430 are related to padding alignment/array stride
1762 // in arrays and structs. In std140 they take minimum vec4 alignment.
1763 // std430 only removes the vec4 requirement.
1764
1765 uint32_t offset = 0;
1766 uint32_t pad_alignment = 1;
1767
1768 bool is_top_level_block =
1769 has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1770
1771 for (uint32_t i = 0; i < type.member_types.size(); i++)
1772 {
1773 auto &memb_type = get<SPIRType>(id: type.member_types[i]);
1774
1775 auto *type_meta = ir.find_meta(id: type.self);
1776 auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{};
1777
1778 // Verify alignment rules.
1779 uint32_t packed_alignment = type_to_packed_alignment(type: memb_type, flags: member_flags, packing);
1780
1781 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1782 // layout(constant_id = 0) const int s = 10;
1783 // const int S = s + 5; // SpecConstantOp
1784 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1785 // we would need full implementation of compile-time constant folding. :(
1786 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1787 // for our analysis (e.g. unsized arrays).
1788 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1789 // Querying size of this member will fail, so just don't call it unless we have to.
1790 //
1791 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1792 bool member_can_be_unsized =
1793 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1794
1795 uint32_t packed_size = 0;
1796 if (!member_can_be_unsized || packing_is_hlsl(packing))
1797 packed_size = type_to_packed_size(type: memb_type, flags: member_flags, packing);
1798
1799 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1800 uint32_t actual_offset = type_struct_member_offset(type, index: i);
1801
1802 if (packing_is_hlsl(packing))
1803 {
1804 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1805 uint32_t target_offset;
1806
1807 // If we intend to use explicit packing, we must check for improper straddle with that offset.
1808 // In implicit packing, we must check with implicit offset, since the explicit offset
1809 // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4.
1810 // This is important when packing sub-structs that don't support packoffset().
1811 if (packing_has_flexible_offset(packing))
1812 target_offset = actual_offset;
1813 else
1814 target_offset = offset;
1815
1816 uint32_t begin_word = target_offset / 16;
1817 uint32_t end_word = (target_offset + packed_size - 1) / 16;
1818
1819 if (begin_word != end_word)
1820 packed_alignment = max<uint32_t>(a: packed_alignment, b: 16u);
1821 }
1822
1823 // Field is not in the specified range anymore and we can ignore any further fields.
1824 if (actual_offset >= end_offset)
1825 break;
1826
1827 uint32_t alignment = max(a: packed_alignment, b: pad_alignment);
1828 offset = (offset + alignment - 1) & ~(alignment - 1);
1829
1830 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1831 // GL 4.5 spec, 7.6.2.2.
1832 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1833 pad_alignment = packed_alignment;
1834 else
1835 pad_alignment = 1;
1836
1837 // Only care about packing if we are in the given range
1838 if (actual_offset >= start_offset)
1839 {
1840 // We only care about offsets in std140, std430, etc ...
1841 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1842 if (!packing_has_flexible_offset(packing))
1843 {
1844 if (actual_offset != offset) // This cannot be the packing we're looking for.
1845 {
1846 if (failed_validation_index)
1847 *failed_validation_index = i;
1848 return false;
1849 }
1850 }
1851 else if ((actual_offset & (alignment - 1)) != 0)
1852 {
1853 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1854 if (failed_validation_index)
1855 *failed_validation_index = i;
1856 return false;
1857 }
1858
1859 // Verify array stride rules.
1860 if (is_array(type: memb_type) &&
1861 type_to_packed_array_stride(type: memb_type, flags: member_flags, packing) !=
1862 type_struct_member_array_stride(type, index: i))
1863 {
1864 if (failed_validation_index)
1865 *failed_validation_index = i;
1866 return false;
1867 }
1868
1869 // Verify that sub-structs also follow packing rules.
1870 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1871 auto substruct_packing = packing_to_substruct_packing(packing);
1872
1873 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1874 !buffer_is_packing_standard(type: memb_type, packing: substruct_packing))
1875 {
1876 if (failed_validation_index)
1877 *failed_validation_index = i;
1878 return false;
1879 }
1880 }
1881
1882 // Bump size.
1883 offset = actual_offset + packed_size;
1884 }
1885
1886 return true;
1887}
1888
1889bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1890{
1891 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1892 // Be very explicit here about how to solve the issue.
1893 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1894 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1895 {
1896 uint32_t minimum_desktop_version = block ? 440 : 410;
1897 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1898
1899 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1900 return false;
1901 else if (options.es && options.version < 310)
1902 return false;
1903 }
1904
1905 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1906 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1907 {
1908 if (options.es && options.version < 300)
1909 return false;
1910 else if (!options.es && options.version < 330)
1911 return false;
1912 }
1913
1914 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1915 {
1916 if (options.es && options.version < 310)
1917 return false;
1918 else if (!options.es && options.version < 430)
1919 return false;
1920 }
1921
1922 return true;
1923}
1924
1925string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1926{
1927 // FIXME: Come up with a better solution for when to disable layouts.
1928 // Having layouts depend on extensions as well as which types
1929 // of layouts are used. For now, the simple solution is to just disable
1930 // layouts for legacy versions.
1931 if (is_legacy())
1932 return "";
1933
1934 if (subpass_input_is_framebuffer_fetch(id: var.self))
1935 return "";
1936
1937 SmallVector<string> attr;
1938
1939 auto &type = get<SPIRType>(id: var.basetype);
1940 auto &flags = get_decoration_bitset(id: var.self);
1941 auto &typeflags = get_decoration_bitset(id: type.self);
1942
1943 if (flags.get(bit: DecorationPassthroughNV))
1944 attr.push_back(t: "passthrough");
1945
1946 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1947 attr.push_back(t: "push_constant");
1948 else if (var.storage == StorageClassShaderRecordBufferKHR)
1949 attr.push_back(t: ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1950
1951 if (flags.get(bit: DecorationRowMajor))
1952 attr.push_back(t: "row_major");
1953 if (flags.get(bit: DecorationColMajor))
1954 attr.push_back(t: "column_major");
1955
1956 if (options.vulkan_semantics)
1957 {
1958 if (flags.get(bit: DecorationInputAttachmentIndex))
1959 attr.push_back(t: join(ts: "input_attachment_index = ", ts: get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex)));
1960 }
1961
1962 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
1963 if (flags.get(bit: DecorationLocation) && can_use_io_location(storage: var.storage, block: is_block))
1964 {
1965 Bitset combined_decoration;
1966 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1967 combined_decoration.merge_or(other: combined_decoration_for_member(type, index: i));
1968
1969 // If our members have location decorations, we don't need to
1970 // emit location decorations at the top as well (looks weird).
1971 if (!combined_decoration.get(bit: DecorationLocation))
1972 attr.push_back(t: join(ts: "location = ", ts: get_decoration(id: var.self, decoration: DecorationLocation)));
1973 }
1974
1975 if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1976 location_is_non_coherent_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)))
1977 {
1978 attr.push_back(t: "noncoherent");
1979 }
1980
1981 // Transform feedback
1982 bool uses_enhanced_layouts = false;
1983 if (is_block && var.storage == StorageClassOutput)
1984 {
1985 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1986 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1987 // is the xfb_offset.
1988 uint32_t member_count = uint32_t(type.member_types.size());
1989 bool have_xfb_buffer_stride = false;
1990 bool have_any_xfb_offset = false;
1991 bool have_geom_stream = false;
1992 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1993
1994 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride))
1995 {
1996 have_xfb_buffer_stride = true;
1997 xfb_buffer = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
1998 xfb_stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
1999 }
2000
2001 if (flags.get(bit: DecorationStream))
2002 {
2003 have_geom_stream = true;
2004 geom_stream = get_decoration(id: var.self, decoration: DecorationStream);
2005 }
2006
2007 // Verify that none of the members violate our assumption.
2008 for (uint32_t i = 0; i < member_count; i++)
2009 {
2010 if (has_member_decoration(id: type.self, index: i, decoration: DecorationStream))
2011 {
2012 uint32_t member_geom_stream = get_member_decoration(id: type.self, index: i, decoration: DecorationStream);
2013 if (have_geom_stream && member_geom_stream != geom_stream)
2014 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
2015 have_geom_stream = true;
2016 geom_stream = member_geom_stream;
2017 }
2018
2019 // Only members with an Offset decoration participate in XFB.
2020 if (!has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
2021 continue;
2022 have_any_xfb_offset = true;
2023
2024 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer))
2025 {
2026 uint32_t buffer_index = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer);
2027 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
2028 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
2029 have_xfb_buffer_stride = true;
2030 xfb_buffer = buffer_index;
2031 }
2032
2033 if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride))
2034 {
2035 uint32_t stride = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride);
2036 if (have_xfb_buffer_stride && stride != xfb_stride)
2037 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
2038 have_xfb_buffer_stride = true;
2039 xfb_stride = stride;
2040 }
2041 }
2042
2043 if (have_xfb_buffer_stride && have_any_xfb_offset)
2044 {
2045 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer));
2046 attr.push_back(t: join(ts: "xfb_stride = ", ts&: xfb_stride));
2047 uses_enhanced_layouts = true;
2048 }
2049
2050 if (have_geom_stream)
2051 {
2052 if (get_execution_model() != ExecutionModelGeometry)
2053 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
2054 if (options.es)
2055 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
2056 if (options.version < 400)
2057 require_extension_internal(ext: "GL_ARB_transform_feedback3");
2058 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
2059 }
2060 }
2061 else if (var.storage == StorageClassOutput)
2062 {
2063 if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride) && flags.get(bit: DecorationOffset))
2064 {
2065 // XFB for standalone variables, we can emit all decorations.
2066 attr.push_back(t: join(ts: "xfb_buffer = ", ts: get_decoration(id: var.self, decoration: DecorationXfbBuffer)));
2067 attr.push_back(t: join(ts: "xfb_stride = ", ts: get_decoration(id: var.self, decoration: DecorationXfbStride)));
2068 attr.push_back(t: join(ts: "xfb_offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
2069 uses_enhanced_layouts = true;
2070 }
2071
2072 if (flags.get(bit: DecorationStream))
2073 {
2074 if (get_execution_model() != ExecutionModelGeometry)
2075 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
2076 if (options.es)
2077 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
2078 if (options.version < 400)
2079 require_extension_internal(ext: "GL_ARB_transform_feedback3");
2080 attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream)));
2081 }
2082 }
2083
2084 // Can only declare Component if we can declare location.
2085 if (flags.get(bit: DecorationComponent) && can_use_io_location(storage: var.storage, block: is_block))
2086 {
2087 uses_enhanced_layouts = true;
2088 attr.push_back(t: join(ts: "component = ", ts: get_decoration(id: var.self, decoration: DecorationComponent)));
2089 }
2090
2091 if (uses_enhanced_layouts)
2092 {
2093 if (!options.es)
2094 {
2095 if (options.version < 440 && options.version >= 140)
2096 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2097 else if (options.version < 140)
2098 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
2099 if (!options.es && options.version < 440)
2100 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2101 }
2102 else if (options.es)
2103 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
2104 }
2105
2106 if (flags.get(bit: DecorationIndex))
2107 attr.push_back(t: join(ts: "index = ", ts: get_decoration(id: var.self, decoration: DecorationIndex)));
2108
2109 // Do not emit set = decoration in regular GLSL output, but
2110 // we need to preserve it in Vulkan GLSL mode.
2111 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
2112 {
2113 if (flags.get(bit: DecorationDescriptorSet) && options.vulkan_semantics)
2114 attr.push_back(t: join(ts: "set = ", ts: get_decoration(id: var.self, decoration: DecorationDescriptorSet)));
2115 }
2116
2117 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
2118 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2119 (var.storage == StorageClassUniform && typeflags.get(bit: DecorationBufferBlock));
2120 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
2121 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(bit: DecorationBlock);
2122
2123 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
2124 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
2125
2126 // pretend no UBOs when options say so
2127 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
2128 can_use_buffer_blocks = false;
2129
2130 bool can_use_binding;
2131 if (options.es)
2132 can_use_binding = options.version >= 310;
2133 else
2134 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
2135
2136 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
2137 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
2138 can_use_binding = false;
2139
2140 if (var.storage == StorageClassShaderRecordBufferKHR)
2141 can_use_binding = false;
2142
2143 if (can_use_binding && flags.get(bit: DecorationBinding))
2144 attr.push_back(t: join(ts: "binding = ", ts: get_decoration(id: var.self, decoration: DecorationBinding)));
2145
2146 if (var.storage != StorageClassOutput && flags.get(bit: DecorationOffset))
2147 attr.push_back(t: join(ts: "offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset)));
2148
2149 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
2150 // If SPIR-V does not comply with either layout, we cannot really work around it.
2151 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
2152 {
2153 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: false, support_enhanced_layouts: true));
2154 }
2155 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
2156 {
2157 attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true));
2158 }
2159
2160 // For images, the type itself adds a layout qualifer.
2161 // Only emit the format for storage images.
2162 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2163 {
2164 const char *fmt = format_to_glsl(format: type.image.format);
2165 if (fmt)
2166 attr.push_back(t: fmt);
2167 }
2168
2169 if (attr.empty())
2170 return "";
2171
2172 string res = "layout(";
2173 res += merge(list: attr);
2174 res += ") ";
2175 return res;
2176}
2177
2178string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type,
2179 bool support_std430_without_scalar_layout,
2180 bool support_enhanced_layouts)
2181{
2182 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, packing: BufferPackingStd430))
2183 return "std430";
2184 else if (buffer_is_packing_standard(type, packing: BufferPackingStd140))
2185 return "std140";
2186 else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalar))
2187 {
2188 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2189 return "scalar";
2190 }
2191 else if (support_std430_without_scalar_layout &&
2192 support_enhanced_layouts &&
2193 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2194 {
2195 if (options.es && !options.vulkan_semantics)
2196 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2197 "not support GL_ARB_enhanced_layouts.");
2198 if (!options.es && !options.vulkan_semantics && options.version < 440)
2199 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2200
2201 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2202 return "std430";
2203 }
2204 else if (support_enhanced_layouts &&
2205 buffer_is_packing_standard(type, packing: BufferPackingStd140EnhancedLayout))
2206 {
2207 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2208 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2209 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2210 if (options.es && !options.vulkan_semantics)
2211 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2212 "not support GL_ARB_enhanced_layouts.");
2213 if (!options.es && !options.vulkan_semantics && options.version < 440)
2214 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
2215
2216 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2217 return "std140";
2218 }
2219 else if (options.vulkan_semantics &&
2220 support_enhanced_layouts &&
2221 buffer_is_packing_standard(type, packing: BufferPackingScalarEnhancedLayout))
2222 {
2223 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2224 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2225 return "scalar";
2226 }
2227 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2228 buffer_is_packing_standard(type, packing: BufferPackingStd430))
2229 {
2230 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2231 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2232 return "std430";
2233 }
2234 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2235 support_enhanced_layouts &&
2236 buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout))
2237 {
2238 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2239 set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset);
2240 require_extension_internal(ext: "GL_EXT_scalar_block_layout");
2241 return "std430";
2242 }
2243 else
2244 {
2245 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2246 "layouts. You can try flattening this block to support a more flexible layout.");
2247 }
2248}
2249
2250void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2251{
2252 if (flattened_buffer_blocks.count(x: var.self))
2253 emit_buffer_block_flattened(type: var);
2254 else if (options.vulkan_semantics)
2255 emit_push_constant_block_vulkan(var);
2256 else if (options.emit_push_constant_as_uniform_buffer)
2257 emit_buffer_block_native(var);
2258 else
2259 emit_push_constant_block_glsl(var);
2260}
2261
2262void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2263{
2264 emit_buffer_block(type: var);
2265}
2266
2267void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2268{
2269 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2270 auto &type = get<SPIRType>(id: var.basetype);
2271
2272 unset_decoration(id: var.self, decoration: DecorationBinding);
2273 unset_decoration(id: var.self, decoration: DecorationDescriptorSet);
2274
2275#if 0
2276 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2277 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2278 "Remap to location with reflection API first or disable these decorations.");
2279#endif
2280
2281 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2282 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2283 bool block_flag = has_decoration(id: type.self, decoration: DecorationBlock);
2284 unset_decoration(id: type.self, decoration: DecorationBlock);
2285
2286 emit_struct(type);
2287
2288 if (block_flag)
2289 set_decoration(id: type.self, decoration: DecorationBlock);
2290
2291 emit_uniform(var);
2292 statement(ts: "");
2293}
2294
2295void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2296{
2297 auto &type = get<SPIRType>(id: var.basetype);
2298 bool ubo_block = var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock);
2299
2300 if (flattened_buffer_blocks.count(x: var.self))
2301 emit_buffer_block_flattened(type: var);
2302 else if (is_legacy() || (!options.es && options.version == 130) ||
2303 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2304 emit_buffer_block_legacy(var);
2305 else
2306 emit_buffer_block_native(var);
2307}
2308
2309void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2310{
2311 auto &type = get<SPIRType>(id: var.basetype);
2312 bool ssbo = var.storage == StorageClassStorageBuffer ||
2313 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2314 if (ssbo)
2315 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2316
2317 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2318 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2319 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2320 bool block_flag = block_flags.get(bit: DecorationBlock);
2321 block_flags.clear(bit: DecorationBlock);
2322 emit_struct(type);
2323 if (block_flag)
2324 block_flags.set(DecorationBlock);
2325 emit_uniform(var);
2326 statement(ts: "");
2327}
2328
2329void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
2330{
2331 auto &type = get<SPIRType>(id: type_id);
2332 string buffer_name;
2333
2334 if (forward_declaration && is_physical_pointer_to_buffer_block(type))
2335 {
2336 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2337 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2338 // The names must match up.
2339 buffer_name = to_name(id: type.self, allow_alias: false);
2340
2341 // Shaders never use the block by interface name, so we don't
2342 // have to track this other than updating name caches.
2343 // If we have a collision for any reason, just fallback immediately.
2344 if (ir.meta[type.self].decoration.alias.empty() ||
2345 block_ssbo_names.find(x: buffer_name) != end(cont&: block_ssbo_names) ||
2346 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2347 {
2348 buffer_name = join(ts: "_", ts&: type.self);
2349 }
2350
2351 // Make sure we get something unique for both global name scope and block name scope.
2352 // See GLSL 4.5 spec: section 4.3.9 for details.
2353 add_variable(variables_primary&: block_ssbo_names, variables_secondary: resource_names, name&: buffer_name);
2354
2355 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2356 // This cannot conflict with anything else, so we're safe now.
2357 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2358 if (buffer_name.empty())
2359 buffer_name = join(ts: "_", ts&: type.self);
2360
2361 block_names.insert(x: buffer_name);
2362 block_ssbo_names.insert(x: buffer_name);
2363
2364 // Ensure we emit the correct name when emitting non-forward pointer type.
2365 ir.meta[type.self].decoration.alias = buffer_name;
2366 }
2367 else
2368 {
2369 buffer_name = type_to_glsl(type);
2370 }
2371
2372 if (!forward_declaration)
2373 {
2374 auto itr = physical_storage_type_to_alignment.find(x: type_id);
2375 uint32_t alignment = 0;
2376 if (itr != physical_storage_type_to_alignment.end())
2377 alignment = itr->second.alignment;
2378
2379 if (is_physical_pointer_to_buffer_block(type))
2380 {
2381 SmallVector<std::string> attributes;
2382 attributes.push_back(t: "buffer_reference");
2383 if (alignment)
2384 attributes.push_back(t: join(ts: "buffer_reference_align = ", ts&: alignment));
2385 attributes.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true));
2386
2387 auto flags = ir.get_buffer_block_type_flags(type);
2388 string decorations;
2389 if (flags.get(bit: DecorationRestrict))
2390 decorations += " restrict";
2391 if (flags.get(bit: DecorationCoherent))
2392 decorations += " coherent";
2393 if (flags.get(bit: DecorationNonReadable))
2394 decorations += " writeonly";
2395 if (flags.get(bit: DecorationNonWritable))
2396 decorations += " readonly";
2397
2398 statement(ts: "layout(", ts: merge(list: attributes), ts: ")", ts&: decorations, ts: " buffer ", ts&: buffer_name);
2399 }
2400 else
2401 {
2402 string packing_standard;
2403 if (type.basetype == SPIRType::Struct)
2404 {
2405 // The non-block type is embedded in a block, so we cannot use enhanced layouts :(
2406 packing_standard = buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", ";
2407 }
2408 else if (is_array(type: get_pointee_type(type)))
2409 {
2410 SPIRType wrap_type{OpTypeStruct};
2411 wrap_type.self = ir.increase_bound_by(count: 1);
2412 wrap_type.member_types.push_back(t: get_pointee_type_id(type_id));
2413 ir.set_member_decoration(id: wrap_type.self, index: 0, decoration: DecorationOffset, argument: 0);
2414 packing_standard = buffer_to_packing_standard(type: wrap_type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", ";
2415 }
2416
2417 if (alignment)
2418 statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference, buffer_reference_align = ", ts&: alignment, ts: ") buffer ", ts&: buffer_name);
2419 else
2420 statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference) buffer ", ts&: buffer_name);
2421 }
2422
2423 begin_scope();
2424
2425 if (is_physical_pointer_to_buffer_block(type))
2426 {
2427 type.member_name_cache.clear();
2428
2429 uint32_t i = 0;
2430 for (auto &member : type.member_types)
2431 {
2432 add_member_name(type, name: i);
2433 emit_struct_member(type, member_type_id: member, index: i);
2434 i++;
2435 }
2436 }
2437 else
2438 {
2439 auto &pointee_type = get_pointee_type(type);
2440 statement(ts: type_to_glsl(type: pointee_type), ts: " value", ts: type_to_array_glsl(type: pointee_type, variable_id: 0), ts: ";");
2441 }
2442
2443 end_scope_decl();
2444 statement(ts: "");
2445 }
2446 else
2447 {
2448 statement(ts: "layout(buffer_reference) buffer ", ts&: buffer_name, ts: ";");
2449 }
2450}
2451
2452void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2453{
2454 auto &type = get<SPIRType>(id: var.basetype);
2455
2456 Bitset flags = ir.get_buffer_block_flags(var);
2457 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2458 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
2459 bool is_restrict = ssbo && flags.get(bit: DecorationRestrict);
2460 bool is_writeonly = ssbo && flags.get(bit: DecorationNonReadable);
2461 bool is_readonly = ssbo && flags.get(bit: DecorationNonWritable);
2462 bool is_coherent = ssbo && flags.get(bit: DecorationCoherent);
2463
2464 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2465 auto buffer_name = to_name(id: type.self, allow_alias: false);
2466
2467 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2468
2469 // Shaders never use the block by interface name, so we don't
2470 // have to track this other than updating name caches.
2471 // If we have a collision for any reason, just fallback immediately.
2472 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(x: buffer_name) != end(cont&: block_namespace) ||
2473 resource_names.find(x: buffer_name) != end(cont&: resource_names))
2474 {
2475 buffer_name = get_block_fallback_name(id: var.self);
2476 }
2477
2478 // Make sure we get something unique for both global name scope and block name scope.
2479 // See GLSL 4.5 spec: section 4.3.9 for details.
2480 add_variable(variables_primary&: block_namespace, variables_secondary: resource_names, name&: buffer_name);
2481
2482 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2483 // This cannot conflict with anything else, so we're safe now.
2484 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2485 if (buffer_name.empty())
2486 buffer_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2487
2488 block_names.insert(x: buffer_name);
2489 block_namespace.insert(x: buffer_name);
2490
2491 // Save for post-reflection later.
2492 declared_block_names[var.self] = buffer_name;
2493
2494 statement(ts: layout_for_variable(var), ts: is_coherent ? "coherent " : "", ts: is_restrict ? "restrict " : "",
2495 ts: is_writeonly ? "writeonly " : "", ts: is_readonly ? "readonly " : "", ts: ssbo ? "buffer " : "uniform ",
2496 ts&: buffer_name);
2497
2498 begin_scope();
2499
2500 type.member_name_cache.clear();
2501
2502 uint32_t i = 0;
2503 for (auto &member : type.member_types)
2504 {
2505 add_member_name(type, name: i);
2506 emit_struct_member(type, member_type_id: member, index: i);
2507 i++;
2508 }
2509
2510 // Don't declare empty blocks in GLSL, this is not allowed.
2511 if (type_is_empty(type) && !backend.supports_empty_struct)
2512 statement(ts: "int empty_struct_member;");
2513
2514 // var.self can be used as a backup name for the block name,
2515 // so we need to make sure we don't disturb the name here on a recompile.
2516 // It will need to be reset if we have to recompile.
2517 preserve_alias_on_reset(id: var.self);
2518 add_resource_name(id: var.self);
2519 end_scope_decl(decl: to_name(id: var.self) + type_to_array_glsl(type, variable_id: var.self));
2520 statement(ts: "");
2521}
2522
2523void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2524{
2525 auto &type = get<SPIRType>(id: var.basetype);
2526
2527 // Block names should never alias.
2528 auto buffer_name = to_name(id: type.self, allow_alias: false);
2529 size_t buffer_size = (get_declared_struct_size(struct_type: type) + 15) / 16;
2530
2531 SPIRType::BaseType basic_type;
2532 if (get_common_basic_type(type, base_type&: basic_type))
2533 {
2534 SPIRType tmp { OpTypeVector };
2535 tmp.basetype = basic_type;
2536 tmp.vecsize = 4;
2537 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2538 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2539
2540 auto flags = ir.get_buffer_block_flags(var);
2541 statement(ts: "uniform ", ts: flags_to_qualifiers_glsl(type: tmp, flags), ts: type_to_glsl(type: tmp), ts: " ", ts&: buffer_name, ts: "[",
2542 ts&: buffer_size, ts: "];");
2543 }
2544 else
2545 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2546}
2547
2548const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2549{
2550 auto &execution = get_entry_point();
2551
2552 if (subpass_input_is_framebuffer_fetch(id: var.self))
2553 return "";
2554
2555 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2556 {
2557 if (is_legacy() && execution.model == ExecutionModelVertex)
2558 return var.storage == StorageClassInput ? "attribute " : "varying ";
2559 else if (is_legacy() && execution.model == ExecutionModelFragment)
2560 return "varying "; // Fragment outputs are renamed so they never hit this case.
2561 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2562 {
2563 uint32_t loc = get_decoration(id: var.self, decoration: DecorationLocation);
2564 bool is_inout = location_is_framebuffer_fetch(location: loc);
2565 if (is_inout)
2566 return "inout ";
2567 else
2568 return "out ";
2569 }
2570 else
2571 return var.storage == StorageClassInput ? "in " : "out ";
2572 }
2573 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2574 var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter)
2575 {
2576 return "uniform ";
2577 }
2578 else if (var.storage == StorageClassRayPayloadKHR)
2579 {
2580 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2581 }
2582 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2583 {
2584 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2585 }
2586 else if (var.storage == StorageClassHitAttributeKHR)
2587 {
2588 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2589 }
2590 else if (var.storage == StorageClassCallableDataKHR)
2591 {
2592 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2593 }
2594 else if (var.storage == StorageClassIncomingCallableDataKHR)
2595 {
2596 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2597 }
2598
2599 return "";
2600}
2601
2602void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2603 const SmallVector<uint32_t> &indices)
2604{
2605 uint32_t member_type_id = type.self;
2606 const SPIRType *member_type = &type;
2607 const SPIRType *parent_type = nullptr;
2608 auto flattened_name = basename;
2609 for (auto &index : indices)
2610 {
2611 flattened_name += "_";
2612 flattened_name += to_member_name(type: *member_type, index);
2613 parent_type = member_type;
2614 member_type_id = member_type->member_types[index];
2615 member_type = &get<SPIRType>(id: member_type_id);
2616 }
2617
2618 assert(member_type->basetype != SPIRType::Struct);
2619
2620 // We're overriding struct member names, so ensure we do so on the primary type.
2621 if (parent_type->type_alias)
2622 parent_type = &get<SPIRType>(id: parent_type->type_alias);
2623
2624 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2625 // which is not allowed.
2626 ParsedIR::sanitize_underscores(str&: flattened_name);
2627
2628 uint32_t last_index = indices.back();
2629
2630 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2631 // Replace member name while emitting it so it encodes both struct name and member name.
2632 auto backup_name = get_member_name(id: parent_type->self, index: last_index);
2633 auto member_name = to_member_name(type: *parent_type, index: last_index);
2634 set_member_name(id: parent_type->self, index: last_index, name: flattened_name);
2635 emit_struct_member(type: *parent_type, member_type_id, index: last_index, qualifier: qual);
2636 // Restore member name.
2637 set_member_name(id: parent_type->self, index: last_index, name: member_name);
2638}
2639
2640void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2641 const SmallVector<uint32_t> &indices)
2642{
2643 auto sub_indices = indices;
2644 sub_indices.push_back(t: 0);
2645
2646 const SPIRType *member_type = &type;
2647 for (auto &index : indices)
2648 member_type = &get<SPIRType>(id: member_type->member_types[index]);
2649
2650 assert(member_type->basetype == SPIRType::Struct);
2651
2652 if (!member_type->array.empty())
2653 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2654
2655 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2656 {
2657 sub_indices.back() = i;
2658 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
2659 emit_flattened_io_block_struct(basename, type, qual, indices: sub_indices);
2660 else
2661 emit_flattened_io_block_member(basename, type, qual, indices: sub_indices);
2662 }
2663}
2664
2665void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2666{
2667 auto &var_type = get<SPIRType>(id: var.basetype);
2668 if (!var_type.array.empty())
2669 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2670
2671 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2672 // struct declarations for aliased types.
2673 auto &type = var_type.type_alias ? get<SPIRType>(id: var_type.type_alias) : var_type;
2674
2675 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2676 // Emit the members as if they are part of a block to get all qualifiers.
2677 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2678
2679 type.member_name_cache.clear();
2680
2681 SmallVector<uint32_t> member_indices;
2682 member_indices.push_back(t: 0);
2683 auto basename = to_name(id: var.self);
2684
2685 uint32_t i = 0;
2686 for (auto &member : type.member_types)
2687 {
2688 add_member_name(type, name: i);
2689 auto &membertype = get<SPIRType>(id: member);
2690
2691 member_indices.back() = i;
2692 if (membertype.basetype == SPIRType::Struct)
2693 emit_flattened_io_block_struct(basename, type, qual, indices: member_indices);
2694 else
2695 emit_flattened_io_block_member(basename, type, qual, indices: member_indices);
2696 i++;
2697 }
2698
2699 ir.meta[type.self].decoration.decoration_flags = old_flags;
2700
2701 // Treat this variable as fully flattened from now on.
2702 flattened_structs[var.self] = true;
2703}
2704
2705void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2706{
2707 auto &type = get<SPIRType>(id: var.basetype);
2708
2709 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2710 !options.es && options.version < 410)
2711 {
2712 require_extension_internal(ext: "GL_ARB_vertex_attrib_64bit");
2713 }
2714
2715 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2716 bool block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock);
2717 const char *qual = to_storage_qualifiers_glsl(var);
2718
2719 if (block)
2720 {
2721 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2722 // I/O variables which are struct types.
2723 // To support this, flatten the struct into separate varyings instead.
2724 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2725 (!options.es && options.version < 150))
2726 {
2727 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2728 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2729 emit_flattened_io_block(var, qual);
2730 }
2731 else
2732 {
2733 if (options.es && options.version < 320)
2734 {
2735 // Geometry and tessellation extensions imply this extension.
2736 if (!has_extension(ext: "GL_EXT_geometry_shader") && !has_extension(ext: "GL_EXT_tessellation_shader"))
2737 require_extension_internal(ext: "GL_EXT_shader_io_blocks");
2738 }
2739
2740 // Workaround to make sure we can emit "patch in/out" correctly.
2741 fixup_io_block_patch_primitive_qualifiers(var);
2742
2743 // Block names should never alias.
2744 auto block_name = to_name(id: type.self, allow_alias: false);
2745
2746 // The namespace for I/O blocks is separate from other variables in GLSL.
2747 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2748
2749 // Shaders never use the block by interface name, so we don't
2750 // have to track this other than updating name caches.
2751 if (block_name.empty() || block_namespace.find(x: block_name) != end(cont&: block_namespace))
2752 block_name = get_fallback_name(id: type.self);
2753 else
2754 block_namespace.insert(x: block_name);
2755
2756 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2757 // This cannot conflict with anything else, so we're safe now.
2758 if (block_name.empty())
2759 block_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self);
2760
2761 // Instance names cannot alias block names.
2762 resource_names.insert(x: block_name);
2763
2764 const char *block_qualifier;
2765 if (has_decoration(id: var.self, decoration: DecorationPatch))
2766 block_qualifier = "patch ";
2767 else if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT))
2768 block_qualifier = "perprimitiveEXT ";
2769 else
2770 block_qualifier = "";
2771
2772 statement(ts: layout_for_variable(var), ts&: block_qualifier, ts&: qual, ts&: block_name);
2773 begin_scope();
2774
2775 type.member_name_cache.clear();
2776
2777 uint32_t i = 0;
2778 for (auto &member : type.member_types)
2779 {
2780 add_member_name(type, name: i);
2781 emit_struct_member(type, member_type_id: member, index: i);
2782 i++;
2783 }
2784
2785 add_resource_name(id: var.self);
2786 end_scope_decl(decl: join(ts: to_name(id: var.self), ts: type_to_array_glsl(type, variable_id: var.self)));
2787 statement(ts: "");
2788 }
2789 }
2790 else
2791 {
2792 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2793 // I/O variables which are struct types.
2794 // To support this, flatten the struct into separate varyings instead.
2795 if (type.basetype == SPIRType::Struct &&
2796 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2797 (!options.es && options.version < 150)))
2798 {
2799 emit_flattened_io_block(var, qual);
2800 }
2801 else
2802 {
2803 add_resource_name(id: var.self);
2804
2805 // Legacy GLSL did not support int attributes, we automatically
2806 // declare them as float and cast them on load/store
2807 SPIRType newtype = type;
2808 if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int)
2809 newtype.basetype = SPIRType::Float;
2810
2811 // Tessellation control and evaluation shaders must have either
2812 // gl_MaxPatchVertices or unsized arrays for input arrays.
2813 // Opt for unsized as it's the more "correct" variant to use.
2814 if (type.storage == StorageClassInput && !type.array.empty() &&
2815 !has_decoration(id: var.self, decoration: DecorationPatch) &&
2816 (get_entry_point().model == ExecutionModelTessellationControl ||
2817 get_entry_point().model == ExecutionModelTessellationEvaluation))
2818 {
2819 newtype.array.back() = 0;
2820 newtype.array_size_literal.back() = true;
2821 }
2822
2823 statement(ts: layout_for_variable(var), ts: to_qualifiers_glsl(id: var.self),
2824 ts: variable_decl(type: newtype, name: to_name(id: var.self), id: var.self), ts: ";");
2825 }
2826 }
2827}
2828
2829void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2830{
2831 auto &type = get<SPIRType>(id: var.basetype);
2832 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2833 {
2834 if (!options.es && options.version < 420)
2835 require_extension_internal(ext: "GL_ARB_shader_image_load_store");
2836 else if (options.es && options.version < 310)
2837 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2838 }
2839
2840 add_resource_name(id: var.self);
2841 statement(ts: layout_for_variable(var), ts: variable_decl(variable: var), ts: ";");
2842}
2843
2844string CompilerGLSL::constant_value_macro_name(uint32_t id)
2845{
2846 return join(ts: "SPIRV_CROSS_CONSTANT_ID_", ts&: id);
2847}
2848
2849void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2850{
2851 auto &type = get<SPIRType>(id: constant.basetype);
2852 // This will break. It is bogus and should not be legal.
2853 if (type_is_top_level_block(type))
2854 return;
2855 add_resource_name(id: constant.self);
2856 auto name = to_name(id: constant.self);
2857 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_op_expression(cop: constant), ts: ";");
2858}
2859
2860int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
2861{
2862 auto &entry_point = get_entry_point();
2863 int index = -1;
2864
2865 // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
2866 // since the spec constant declarations are never explicitly declared.
2867 if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(bit: ExecutionModeLocalSizeId))
2868 {
2869 if (c.self == entry_point.workgroup_size.id_x)
2870 index = 0;
2871 else if (c.self == entry_point.workgroup_size.id_y)
2872 index = 1;
2873 else if (c.self == entry_point.workgroup_size.id_z)
2874 index = 2;
2875 }
2876
2877 return index;
2878}
2879
2880void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2881{
2882 auto &type = get<SPIRType>(id: constant.constant_type);
2883
2884 // This will break. It is bogus and should not be legal.
2885 if (type_is_top_level_block(type))
2886 return;
2887
2888 SpecializationConstant wg_x, wg_y, wg_z;
2889 ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
2890
2891 // This specialization constant is implicitly declared by emitting layout() in;
2892 if (constant.self == workgroup_size_id)
2893 return;
2894
2895 // These specialization constants are implicitly declared by emitting layout() in;
2896 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2897 // later can use macro overrides for work group size.
2898 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2899 ConstantID(constant.self) == wg_z.id;
2900
2901 if (options.vulkan_semantics && is_workgroup_size_constant)
2902 {
2903 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2904 return;
2905 }
2906 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2907 !has_decoration(id: constant.self, decoration: DecorationSpecId))
2908 {
2909 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2910 return;
2911 }
2912
2913 add_resource_name(id: constant.self);
2914 auto name = to_name(id: constant.self);
2915
2916 // Only scalars have constant IDs.
2917 if (has_decoration(id: constant.self, decoration: DecorationSpecId))
2918 {
2919 if (options.vulkan_semantics)
2920 {
2921 statement(ts: "layout(constant_id = ", ts: get_decoration(id: constant.self, decoration: DecorationSpecId), ts: ") const ",
2922 ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2923 }
2924 else
2925 {
2926 const string &macro_name = constant.specialization_constant_macro_name;
2927 statement(ts: "#ifndef ", ts: macro_name);
2928 statement(ts: "#define ", ts: macro_name, ts: " ", ts: constant_expression(c: constant));
2929 statement(ts: "#endif");
2930
2931 // For workgroup size constants, only emit the macros.
2932 if (!is_workgroup_size_constant)
2933 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: macro_name, ts: ";");
2934 }
2935 }
2936 else
2937 {
2938 statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";");
2939 }
2940}
2941
2942void CompilerGLSL::emit_entry_point_declarations()
2943{
2944}
2945
2946void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2947{
2948 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
2949 if (is_hidden_variable(var))
2950 return;
2951
2952 auto *meta = ir.find_meta(id: var.self);
2953 if (!meta)
2954 return;
2955
2956 auto &m = meta->decoration;
2957 if (keywords.find(x: m.alias) != end(cont: keywords))
2958 m.alias = join(ts: "_", ts&: m.alias);
2959 });
2960
2961 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, const SPIRFunction &func) {
2962 auto *meta = ir.find_meta(id: func.self);
2963 if (!meta)
2964 return;
2965
2966 auto &m = meta->decoration;
2967 if (keywords.find(x: m.alias) != end(cont: keywords))
2968 m.alias = join(ts: "_", ts&: m.alias);
2969 });
2970
2971 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) {
2972 auto *meta = ir.find_meta(id: type.self);
2973 if (!meta)
2974 return;
2975
2976 auto &m = meta->decoration;
2977 if (keywords.find(x: m.alias) != end(cont: keywords))
2978 m.alias = join(ts: "_", ts&: m.alias);
2979
2980 for (auto &memb : meta->members)
2981 if (keywords.find(x: memb.alias) != end(cont: keywords))
2982 memb.alias = join(ts: "_", ts&: memb.alias);
2983 });
2984}
2985
2986void CompilerGLSL::replace_illegal_names()
2987{
2988 // clang-format off
2989 static const unordered_set<string> keywords = {
2990 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2991 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2992 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2993 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2994 "ceil", "cos", "cosh", "cross", "degrees",
2995 "dFdx", "dFdxCoarse", "dFdxFine",
2996 "dFdy", "dFdyCoarse", "dFdyFine",
2997 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2998 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2999 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
3000 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
3001 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
3002 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
3003 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
3004 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
3005 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
3006 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
3007 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
3008 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
3009 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
3010 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
3011 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
3012 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
3013 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
3014
3015 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
3016 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
3017 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
3018 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
3019 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
3020 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
3021 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
3022 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
3023 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
3024 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
3025 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
3026 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
3027 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
3028 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
3029 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
3030 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
3031 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
3032 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
3033 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
3034 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
3035 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
3036 "while", "writeonly",
3037 };
3038 // clang-format on
3039
3040 replace_illegal_names(keywords);
3041}
3042
3043void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
3044{
3045 auto &m = ir.meta[var.self].decoration;
3046 uint32_t location = 0;
3047 if (m.decoration_flags.get(bit: DecorationLocation))
3048 location = m.location;
3049
3050 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
3051 // do the access chain part of this for us.
3052 auto &type = get<SPIRType>(id: var.basetype);
3053
3054 if (type.array.empty())
3055 {
3056 // Redirect the write to a specific render target in legacy GLSL.
3057 m.alias = join(ts: "gl_FragData[", ts&: location, ts: "]");
3058
3059 if (is_legacy_es() && location != 0)
3060 require_extension_internal(ext: "GL_EXT_draw_buffers");
3061 }
3062 else if (type.array.size() == 1)
3063 {
3064 // If location is non-zero, we probably have to add an offset.
3065 // This gets really tricky since we'd have to inject an offset in the access chain.
3066 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
3067 m.alias = "gl_FragData";
3068 if (location != 0)
3069 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
3070 "This is unimplemented in SPIRV-Cross.");
3071
3072 if (is_legacy_es())
3073 require_extension_internal(ext: "GL_EXT_draw_buffers");
3074 }
3075 else
3076 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
3077
3078 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
3079}
3080
3081void CompilerGLSL::replace_fragment_outputs()
3082{
3083 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3084 auto &type = this->get<SPIRType>(id: var.basetype);
3085
3086 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
3087 replace_fragment_output(var);
3088 });
3089}
3090
3091string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
3092{
3093 if (out_type.vecsize == input_components)
3094 return expr;
3095 else if (input_components == 1 && !backend.can_swizzle_scalar)
3096 return join(ts: type_to_glsl(type: out_type), ts: "(", ts: expr, ts: ")");
3097 else
3098 {
3099 // FIXME: This will not work with packed expressions.
3100 auto e = enclose_expression(expr) + ".";
3101 // Just clamp the swizzle index if we have more outputs than inputs.
3102 for (uint32_t c = 0; c < out_type.vecsize; c++)
3103 e += index_to_swizzle(index: min(a: c, b: input_components - 1));
3104 if (backend.swizzle_is_function && out_type.vecsize > 1)
3105 e += "()";
3106
3107 remove_duplicate_swizzle(op&: e);
3108 return e;
3109 }
3110}
3111
3112void CompilerGLSL::emit_pls()
3113{
3114 auto &execution = get_entry_point();
3115 if (execution.model != ExecutionModelFragment)
3116 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
3117
3118 if (!options.es)
3119 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
3120
3121 if (options.version < 300)
3122 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
3123
3124 if (!pls_inputs.empty())
3125 {
3126 statement(ts: "__pixel_local_inEXT _PLSIn");
3127 begin_scope();
3128 for (auto &input : pls_inputs)
3129 statement(ts: pls_decl(variable: input), ts: ";");
3130 end_scope_decl();
3131 statement(ts: "");
3132 }
3133
3134 if (!pls_outputs.empty())
3135 {
3136 statement(ts: "__pixel_local_outEXT _PLSOut");
3137 begin_scope();
3138 for (auto &output : pls_outputs)
3139 statement(ts: pls_decl(variable: output), ts: ";");
3140 end_scope_decl();
3141 statement(ts: "");
3142 }
3143}
3144
3145void CompilerGLSL::fixup_image_load_store_access()
3146{
3147 if (!options.enable_storage_image_qualifier_deduction)
3148 return;
3149
3150 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t var, const SPIRVariable &) {
3151 auto &vartype = expression_type(id: var);
3152 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
3153 {
3154 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
3155 // Solve this by making the image access as restricted as possible and loosen up if we need to.
3156 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
3157
3158 if (!has_decoration(id: var, decoration: DecorationNonWritable) && !has_decoration(id: var, decoration: DecorationNonReadable))
3159 {
3160 set_decoration(id: var, decoration: DecorationNonWritable);
3161 set_decoration(id: var, decoration: DecorationNonReadable);
3162 }
3163 }
3164 });
3165}
3166
3167static bool is_block_builtin(BuiltIn builtin)
3168{
3169 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
3170 builtin == BuiltInCullDistance;
3171}
3172
3173bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
3174{
3175 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
3176
3177 if (storage != StorageClassOutput)
3178 return false;
3179 bool should_force = false;
3180
3181 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3182 if (should_force)
3183 return;
3184
3185 auto &type = this->get<SPIRType>(id: var.basetype);
3186 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3187 if (var.storage == storage && block && is_builtin_variable(var))
3188 {
3189 uint32_t member_count = uint32_t(type.member_types.size());
3190 for (uint32_t i = 0; i < member_count; i++)
3191 {
3192 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) &&
3193 is_block_builtin(builtin: BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))) &&
3194 has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
3195 {
3196 should_force = true;
3197 }
3198 }
3199 }
3200 else if (var.storage == storage && !block && is_builtin_variable(var))
3201 {
3202 if (is_block_builtin(builtin: BuiltIn(get_decoration(id: type.self, decoration: DecorationBuiltIn))) &&
3203 has_decoration(id: var.self, decoration: DecorationOffset))
3204 {
3205 should_force = true;
3206 }
3207 }
3208 });
3209
3210 // If we're declaring clip/cull planes with control points we need to force block declaration.
3211 if ((get_execution_model() == ExecutionModelTessellationControl ||
3212 get_execution_model() == ExecutionModelMeshEXT) &&
3213 (clip_distance_count || cull_distance_count))
3214 {
3215 should_force = true;
3216 }
3217
3218 // Either glslang bug or oversight, but global invariant position does not work in mesh shaders.
3219 if (get_execution_model() == ExecutionModelMeshEXT && position_invariant)
3220 should_force = true;
3221
3222 return should_force;
3223}
3224
3225void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
3226{
3227 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3228 auto &type = this->get<SPIRType>(id: var.basetype);
3229 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3230 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
3231 is_builtin_variable(var))
3232 {
3233 if (model != ExecutionModelMeshEXT)
3234 {
3235 // Make sure the array has a supported name in the code.
3236 if (var.storage == StorageClassOutput)
3237 set_name(id: var.self, name: "gl_out");
3238 else if (var.storage == StorageClassInput)
3239 set_name(id: var.self, name: "gl_in");
3240 }
3241 else
3242 {
3243 auto flags = get_buffer_block_flags(id: var.self);
3244 if (flags.get(bit: DecorationPerPrimitiveEXT))
3245 {
3246 set_name(id: var.self, name: "gl_MeshPrimitivesEXT");
3247 set_name(id: type.self, name: "gl_MeshPerPrimitiveEXT");
3248 }
3249 else
3250 {
3251 set_name(id: var.self, name: "gl_MeshVerticesEXT");
3252 set_name(id: type.self, name: "gl_MeshPerVertexEXT");
3253 }
3254 }
3255 }
3256
3257 if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
3258 {
3259 auto *m = ir.find_meta(id: var.self);
3260 if (m && m->decoration.builtin)
3261 {
3262 auto builtin_type = m->decoration.builtin_type;
3263 if (builtin_type == BuiltInPrimitivePointIndicesEXT)
3264 set_name(id: var.self, name: "gl_PrimitivePointIndicesEXT");
3265 else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
3266 set_name(id: var.self, name: "gl_PrimitiveLineIndicesEXT");
3267 else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
3268 set_name(id: var.self, name: "gl_PrimitiveTriangleIndicesEXT");
3269 }
3270 }
3271 });
3272}
3273
3274void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
3275{
3276 Bitset emitted_builtins;
3277 Bitset global_builtins;
3278 const SPIRVariable *block_var = nullptr;
3279 bool emitted_block = false;
3280
3281 // Need to use declared size in the type.
3282 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3283 uint32_t cull_distance_size = 0;
3284 uint32_t clip_distance_size = 0;
3285
3286 bool have_xfb_buffer_stride = false;
3287 bool have_geom_stream = false;
3288 bool have_any_xfb_offset = false;
3289 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3290 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3291
3292 const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
3293 return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
3294 builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
3295 };
3296
3297 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3298 auto &type = this->get<SPIRType>(id: var.basetype);
3299 bool block = has_decoration(id: type.self, decoration: DecorationBlock);
3300 Bitset builtins;
3301
3302 if (var.storage == storage && block && is_builtin_variable(var))
3303 {
3304 uint32_t index = 0;
3305 for (auto &m : ir.meta[type.self].members)
3306 {
3307 if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
3308 {
3309 builtins.set(m.builtin_type);
3310 if (m.builtin_type == BuiltInCullDistance)
3311 cull_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3312 else if (m.builtin_type == BuiltInClipDistance)
3313 clip_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index]));
3314
3315 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationOffset))
3316 {
3317 have_any_xfb_offset = true;
3318 builtin_xfb_offsets[m.builtin_type] = m.offset;
3319 }
3320
3321 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3322 {
3323 uint32_t stream = m.stream;
3324 if (have_geom_stream && geom_stream != stream)
3325 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3326 have_geom_stream = true;
3327 geom_stream = stream;
3328 }
3329 }
3330 index++;
3331 }
3332
3333 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationXfbBuffer) &&
3334 has_decoration(id: var.self, decoration: DecorationXfbStride))
3335 {
3336 uint32_t buffer_index = get_decoration(id: var.self, decoration: DecorationXfbBuffer);
3337 uint32_t stride = get_decoration(id: var.self, decoration: DecorationXfbStride);
3338 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3339 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3340 if (have_xfb_buffer_stride && stride != xfb_stride)
3341 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3342 have_xfb_buffer_stride = true;
3343 xfb_buffer = buffer_index;
3344 xfb_stride = stride;
3345 }
3346
3347 if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationStream))
3348 {
3349 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3350 if (have_geom_stream && geom_stream != stream)
3351 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3352 have_geom_stream = true;
3353 geom_stream = stream;
3354 }
3355 }
3356 else if (var.storage == storage && !block && is_builtin_variable(var))
3357 {
3358 // While we're at it, collect all declared global builtins (HLSL mostly ...).
3359 auto &m = ir.meta[var.self].decoration;
3360 if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
3361 {
3362 // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type
3363 // for correct result.
3364 global_builtins.set(m.builtin_type);
3365 if (m.builtin_type == BuiltInCullDistance)
3366 cull_distance_size = to_array_size_literal(type, index: 0);
3367 else if (m.builtin_type == BuiltInClipDistance)
3368 clip_distance_size = to_array_size_literal(type, index: 0);
3369
3370 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationXfbStride) &&
3371 m.decoration_flags.get(bit: DecorationXfbBuffer) && m.decoration_flags.get(bit: DecorationOffset))
3372 {
3373 have_any_xfb_offset = true;
3374 builtin_xfb_offsets[m.builtin_type] = m.offset;
3375 uint32_t buffer_index = m.xfb_buffer;
3376 uint32_t stride = m.xfb_stride;
3377 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3378 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3379 if (have_xfb_buffer_stride && stride != xfb_stride)
3380 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3381 have_xfb_buffer_stride = true;
3382 xfb_buffer = buffer_index;
3383 xfb_stride = stride;
3384 }
3385
3386 if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream))
3387 {
3388 uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream);
3389 if (have_geom_stream && geom_stream != stream)
3390 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3391 have_geom_stream = true;
3392 geom_stream = stream;
3393 }
3394 }
3395 }
3396
3397 if (builtins.empty())
3398 return;
3399
3400 if (emitted_block)
3401 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3402
3403 emitted_builtins = builtins;
3404 emitted_block = true;
3405 block_var = &var;
3406 });
3407
3408 global_builtins =
3409 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3410 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3411
3412 // Try to collect all other declared builtins.
3413 if (!emitted_block)
3414 emitted_builtins = global_builtins;
3415
3416 // Can't declare an empty interface block.
3417 if (emitted_builtins.empty())
3418 return;
3419
3420 if (storage == StorageClassOutput)
3421 {
3422 SmallVector<string> attr;
3423 if (have_xfb_buffer_stride && have_any_xfb_offset)
3424 {
3425 if (!options.es)
3426 {
3427 if (options.version < 440 && options.version >= 140)
3428 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3429 else if (options.version < 140)
3430 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3431 if (!options.es && options.version < 440)
3432 require_extension_internal(ext: "GL_ARB_enhanced_layouts");
3433 }
3434 else if (options.es)
3435 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3436 attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer, ts: ", xfb_stride = ", ts&: xfb_stride));
3437 }
3438
3439 if (have_geom_stream)
3440 {
3441 if (get_execution_model() != ExecutionModelGeometry)
3442 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3443 if (options.es)
3444 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3445 if (options.version < 400)
3446 require_extension_internal(ext: "GL_ARB_transform_feedback3");
3447 attr.push_back(t: join(ts: "stream = ", ts&: geom_stream));
3448 }
3449
3450 if (model == ExecutionModelMeshEXT)
3451 statement(ts: "out gl_MeshPerVertexEXT");
3452 else if (!attr.empty())
3453 statement(ts: "layout(", ts: merge(list: attr), ts: ") out gl_PerVertex");
3454 else
3455 statement(ts: "out gl_PerVertex");
3456 }
3457 else
3458 {
3459 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3460 if (get_entry_point().geometry_passthrough)
3461 statement(ts: "layout(passthrough) in gl_PerVertex");
3462 else
3463 statement(ts: "in gl_PerVertex");
3464 }
3465
3466 begin_scope();
3467 if (emitted_builtins.get(bit: BuiltInPosition))
3468 {
3469 auto itr = builtin_xfb_offsets.find(x: BuiltInPosition);
3470 if (itr != end(cont&: builtin_xfb_offsets))
3471 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") vec4 gl_Position;");
3472 else if (position_invariant)
3473 statement(ts: "invariant vec4 gl_Position;");
3474 else
3475 statement(ts: "vec4 gl_Position;");
3476 }
3477
3478 if (emitted_builtins.get(bit: BuiltInPointSize))
3479 {
3480 auto itr = builtin_xfb_offsets.find(x: BuiltInPointSize);
3481 if (itr != end(cont&: builtin_xfb_offsets))
3482 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_PointSize;");
3483 else
3484 statement(ts: "float gl_PointSize;");
3485 }
3486
3487 if (emitted_builtins.get(bit: BuiltInClipDistance))
3488 {
3489 auto itr = builtin_xfb_offsets.find(x: BuiltInClipDistance);
3490 if (itr != end(cont&: builtin_xfb_offsets))
3491 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3492 else
3493 statement(ts: "float gl_ClipDistance[", ts&: clip_distance_size, ts: "];");
3494 }
3495
3496 if (emitted_builtins.get(bit: BuiltInCullDistance))
3497 {
3498 auto itr = builtin_xfb_offsets.find(x: BuiltInCullDistance);
3499 if (itr != end(cont&: builtin_xfb_offsets))
3500 statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3501 else
3502 statement(ts: "float gl_CullDistance[", ts&: cull_distance_size, ts: "];");
3503 }
3504
3505 bool builtin_array = model == ExecutionModelTessellationControl ||
3506 (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
3507 (model == ExecutionModelGeometry && storage == StorageClassInput) ||
3508 (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
3509
3510 if (builtin_array)
3511 {
3512 const char *instance_name;
3513 if (model == ExecutionModelMeshEXT)
3514 instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
3515 else
3516 instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
3517
3518 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3519 end_scope_decl(decl: join(ts&: instance_name, ts: "[", ts&: get_entry_point().output_vertices, ts: "]"));
3520 else
3521 end_scope_decl(decl: join(ts&: instance_name, ts: "[]"));
3522 }
3523 else
3524 end_scope_decl();
3525 statement(ts: "");
3526}
3527
3528bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3529{
3530 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3531
3532 if (statically_assigned)
3533 {
3534 auto *constant = maybe_get<SPIRConstant>(id: var.static_expression);
3535 if (constant && constant->is_used_as_lut)
3536 return true;
3537 }
3538
3539 return false;
3540}
3541
3542void CompilerGLSL::emit_resources()
3543{
3544 auto &execution = get_entry_point();
3545
3546 replace_illegal_names();
3547
3548 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3549 // with builtins.
3550 if (execution.model == ExecutionModelFragment && is_legacy())
3551 replace_fragment_outputs();
3552
3553 // Emit PLS blocks if we have such variables.
3554 if (!pls_inputs.empty() || !pls_outputs.empty())
3555 emit_pls();
3556
3557 switch (execution.model)
3558 {
3559 case ExecutionModelGeometry:
3560 case ExecutionModelTessellationControl:
3561 case ExecutionModelTessellationEvaluation:
3562 case ExecutionModelMeshEXT:
3563 fixup_implicit_builtin_block_names(model: execution.model);
3564 break;
3565
3566 default:
3567 break;
3568 }
3569
3570 bool global_invariant_position = position_invariant && (options.es || options.version >= 120);
3571
3572 // Emit custom gl_PerVertex for SSO compatibility.
3573 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3574 {
3575 switch (execution.model)
3576 {
3577 case ExecutionModelGeometry:
3578 case ExecutionModelTessellationControl:
3579 case ExecutionModelTessellationEvaluation:
3580 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3581 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3582 global_invariant_position = false;
3583 break;
3584
3585 case ExecutionModelVertex:
3586 case ExecutionModelMeshEXT:
3587 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3588 global_invariant_position = false;
3589 break;
3590
3591 default:
3592 break;
3593 }
3594 }
3595 else if (should_force_emit_builtin_block(storage: StorageClassOutput))
3596 {
3597 emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model);
3598 global_invariant_position = false;
3599 }
3600 else if (execution.geometry_passthrough)
3601 {
3602 // Need to declare gl_in with Passthrough.
3603 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3604 emit_declared_builtin_block(storage: StorageClassInput, model: execution.model);
3605 }
3606 else
3607 {
3608 // Need to redeclare clip/cull distance with explicit size to use them.
3609 // SPIR-V mandates these builtins have a size declared.
3610 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3611 if (clip_distance_count != 0)
3612 statement(ts&: storage, ts: " float gl_ClipDistance[", ts&: clip_distance_count, ts: "];");
3613 if (cull_distance_count != 0)
3614 statement(ts&: storage, ts: " float gl_CullDistance[", ts&: cull_distance_count, ts: "];");
3615 if (clip_distance_count != 0 || cull_distance_count != 0)
3616 statement(ts: "");
3617 }
3618
3619 if (global_invariant_position)
3620 {
3621 statement(ts: "invariant gl_Position;");
3622 statement(ts: "");
3623 }
3624
3625 bool emitted = false;
3626
3627 // If emitted Vulkan GLSL,
3628 // emit specialization constants as actual floats,
3629 // spec op expressions will redirect to the constant name.
3630 //
3631 {
3632 auto loop_lock = ir.create_loop_hard_lock();
3633 for (auto &id_ : ir.ids_for_constant_undef_or_type)
3634 {
3635 auto &id = ir.ids[id_];
3636
3637 // Skip declaring any bogus constants or undefs which use block types.
3638 // We don't declare block types directly, so this will never work.
3639 // Should not be legal SPIR-V, so this is considered a workaround.
3640
3641 if (id.get_type() == TypeConstant)
3642 {
3643 auto &c = id.get<SPIRConstant>();
3644
3645 bool needs_declaration = c.specialization || c.is_used_as_lut;
3646
3647 if (needs_declaration)
3648 {
3649 if (!options.vulkan_semantics && c.specialization)
3650 {
3651 c.specialization_constant_macro_name =
3652 constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId));
3653 }
3654 emit_constant(constant: c);
3655 emitted = true;
3656 }
3657 }
3658 else if (id.get_type() == TypeConstantOp)
3659 {
3660 emit_specialization_constant_op(constant: id.get<SPIRConstantOp>());
3661 emitted = true;
3662 }
3663 else if (id.get_type() == TypeType)
3664 {
3665 auto *type = &id.get<SPIRType>();
3666
3667 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3668 (!has_decoration(id: type->self, decoration: DecorationBlock) &&
3669 !has_decoration(id: type->self, decoration: DecorationBufferBlock));
3670
3671 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3672 if (type->basetype == SPIRType::Struct && type->pointer &&
3673 has_decoration(id: type->self, decoration: DecorationBlock) &&
3674 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3675 type->storage == StorageClassHitAttributeKHR))
3676 {
3677 type = &get<SPIRType>(id: type->parent_type);
3678 is_natural_struct = true;
3679 }
3680
3681 if (is_natural_struct)
3682 {
3683 if (emitted)
3684 statement(ts: "");
3685 emitted = false;
3686
3687 emit_struct(type&: *type);
3688 }
3689 }
3690 else if (id.get_type() == TypeUndef)
3691 {
3692 auto &undef = id.get<SPIRUndef>();
3693 auto &type = this->get<SPIRType>(id: undef.basetype);
3694 // OpUndef can be void for some reason ...
3695 if (type.basetype == SPIRType::Void)
3696 return;
3697
3698 // This will break. It is bogus and should not be legal.
3699 if (type_is_top_level_block(type))
3700 return;
3701
3702 string initializer;
3703 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3704 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: undef.basetype));
3705
3706 // FIXME: If used in a constant, we must declare it as one.
3707 statement(ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";");
3708 emitted = true;
3709 }
3710 }
3711 }
3712
3713 if (emitted)
3714 statement(ts: "");
3715
3716 // If we needed to declare work group size late, check here.
3717 // If the work group size depends on a specialization constant, we need to declare the layout() block
3718 // after constants (and their macros) have been declared.
3719 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3720 (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId)))
3721 {
3722 SpecializationConstant wg_x, wg_y, wg_z;
3723 get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z);
3724
3725 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3726 {
3727 SmallVector<string> inputs;
3728 build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z);
3729 statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;");
3730 statement(ts: "");
3731 }
3732 }
3733
3734 emitted = false;
3735
3736 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3737 {
3738 // Output buffer reference blocks.
3739 // Do this in two stages, one with forward declaration,
3740 // and one without. Buffer reference blocks can reference themselves
3741 // to support things like linked lists.
3742 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) {
3743 if (is_physical_pointer(type))
3744 {
3745 bool emit_type = true;
3746 if (!is_physical_pointer_to_buffer_block(type))
3747 {
3748 // Only forward-declare if we intend to emit it in the non_block_pointer types.
3749 // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
3750 emit_type = std::find(first: physical_storage_non_block_pointer_types.begin(),
3751 last: physical_storage_non_block_pointer_types.end(),
3752 val: id) != physical_storage_non_block_pointer_types.end();
3753 }
3754
3755 if (emit_type)
3756 emit_buffer_reference_block(type_id: id, forward_declaration: true);
3757 }
3758 });
3759
3760 for (auto type : physical_storage_non_block_pointer_types)
3761 emit_buffer_reference_block(type_id: type, forward_declaration: false);
3762
3763 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) {
3764 if (is_physical_pointer_to_buffer_block(type))
3765 emit_buffer_reference_block(type_id: id, forward_declaration: false);
3766 });
3767 }
3768
3769 // Output UBOs and SSBOs
3770 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3771 auto &type = this->get<SPIRType>(id: var.basetype);
3772
3773 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3774 type.storage == StorageClassShaderRecordBufferKHR;
3775 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
3776 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
3777
3778 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3779 has_block_flags)
3780 {
3781 emit_buffer_block(var);
3782 }
3783 });
3784
3785 // Output push constant blocks
3786 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3787 auto &type = this->get<SPIRType>(id: var.basetype);
3788 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3789 !is_hidden_variable(var))
3790 {
3791 emit_push_constant_block(var);
3792 }
3793 });
3794
3795 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3796
3797 // Output Uniform Constants (values, samplers, images, etc).
3798 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3799 auto &type = this->get<SPIRType>(id: var.basetype);
3800
3801 // If we're remapping separate samplers and images, only emit the combined samplers.
3802 if (skip_separate_image_sampler)
3803 {
3804 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3805 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3806 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3807 bool separate_sampler = type.basetype == SPIRType::Sampler;
3808 if (!sampler_buffer && (separate_image || separate_sampler))
3809 return;
3810 }
3811
3812 if (var.storage != StorageClassFunction && type.pointer &&
3813 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3814 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3815 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3816 type.storage == StorageClassHitAttributeKHR) &&
3817 !is_hidden_variable(var))
3818 {
3819 emit_uniform(var);
3820 emitted = true;
3821 }
3822 });
3823
3824 if (emitted)
3825 statement(ts: "");
3826 emitted = false;
3827
3828 bool emitted_base_instance = false;
3829
3830 // Output in/out interfaces.
3831 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) {
3832 auto &type = this->get<SPIRType>(id: var.basetype);
3833
3834 bool is_hidden = is_hidden_variable(var);
3835
3836 // Unused output I/O variables might still be required to implement framebuffer fetch.
3837 if (var.storage == StorageClassOutput && !is_legacy() &&
3838 location_is_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)) != 0)
3839 {
3840 is_hidden = false;
3841 }
3842
3843 if (var.storage != StorageClassFunction && type.pointer &&
3844 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3845 interface_variable_exists_in_entry_point(id: var.self) && !is_hidden)
3846 {
3847 if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
3848 type.array.size() == 1)
3849 {
3850 SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
3851 }
3852 emit_interface_block(var);
3853 emitted = true;
3854 }
3855 else if (is_builtin_variable(var))
3856 {
3857 auto builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn));
3858 // For gl_InstanceIndex emulation on GLES, the API user needs to
3859 // supply this uniform.
3860
3861 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3862 if (!options.vulkan_semantics)
3863 {
3864 if (!emitted_base_instance &&
3865 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3866 (builtin == BuiltInBaseInstance)))
3867 {
3868 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3869 statement(ts: "#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3870 statement(ts: "#else");
3871 // A crude, but simple workaround which should be good enough for non-indirect draws.
3872 statement(ts: "uniform int SPIRV_Cross_BaseInstance;");
3873 statement(ts: "#endif");
3874 emitted = true;
3875 emitted_base_instance = true;
3876 }
3877 else if (builtin == BuiltInBaseVertex)
3878 {
3879 statement(ts: "#ifdef GL_ARB_shader_draw_parameters");
3880 statement(ts: "#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3881 statement(ts: "#else");
3882 // A crude, but simple workaround which should be good enough for non-indirect draws.
3883 statement(ts: "uniform int SPIRV_Cross_BaseVertex;");
3884 statement(ts: "#endif");
3885 }
3886 else if (builtin == BuiltInDrawIndex)
3887 {
3888 statement(ts: "#ifndef GL_ARB_shader_draw_parameters");
3889 // Cannot really be worked around.
3890 statement(ts: "#error GL_ARB_shader_draw_parameters is not supported.");
3891 statement(ts: "#endif");
3892 }
3893 }
3894 }
3895 });
3896
3897 // Global variables.
3898 for (auto global : global_variables)
3899 {
3900 auto &var = get<SPIRVariable>(id: global);
3901 if (is_hidden_variable(var, include_builtins: true))
3902 continue;
3903
3904 if (var.storage != StorageClassOutput)
3905 {
3906 if (!variable_is_lut(var))
3907 {
3908 add_resource_name(id: var.self);
3909
3910 string initializer;
3911 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3912 !var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var)))
3913 {
3914 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var)));
3915 }
3916
3917 statement(ts: variable_decl(variable: var), ts&: initializer, ts: ";");
3918 emitted = true;
3919 }
3920 }
3921 else if (var.initializer && maybe_get<SPIRConstant>(id: var.initializer) != nullptr)
3922 {
3923 emit_output_variable_initializer(var);
3924 }
3925 }
3926
3927 if (emitted)
3928 statement(ts: "");
3929}
3930
3931void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3932{
3933 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3934 auto &entry_func = this->get<SPIRFunction>(id: ir.default_entry_point);
3935 auto &type = get<SPIRType>(id: var.basetype);
3936 bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch);
3937 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
3938 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3939
3940 if (is_block)
3941 {
3942 uint32_t member_count = uint32_t(type.member_types.size());
3943 bool type_is_array = type.array.size() == 1;
3944 uint32_t array_size = 1;
3945 if (type_is_array)
3946 array_size = to_array_size_literal(type);
3947 uint32_t iteration_count = is_control_point ? 1 : array_size;
3948
3949 // If the initializer is a block, we must initialize each block member one at a time.
3950 for (uint32_t i = 0; i < member_count; i++)
3951 {
3952 // These outputs might not have been properly declared, so don't initialize them in that case.
3953 if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
3954 {
3955 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInCullDistance &&
3956 !cull_distance_count)
3957 continue;
3958
3959 if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInClipDistance &&
3960 !clip_distance_count)
3961 continue;
3962 }
3963
3964 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3965 // This code path hits when we have an array of blocks.
3966 string lut_name;
3967 if (type_is_array)
3968 {
3969 lut_name = join(ts: "_", ts: var.self, ts: "_", ts&: i, ts: "_init");
3970 uint32_t member_type_id = get<SPIRType>(id: var.basetype).member_types[i];
3971 auto &member_type = get<SPIRType>(id: member_type_id);
3972 auto array_type = member_type;
3973 array_type.parent_type = member_type_id;
3974 array_type.op = OpTypeArray;
3975 array_type.array.push_back(t: array_size);
3976 array_type.array_size_literal.push_back(t: true);
3977
3978 SmallVector<string> exprs;
3979 exprs.reserve(count: array_size);
3980 auto &c = get<SPIRConstant>(id: var.initializer);
3981 for (uint32_t j = 0; j < array_size; j++)
3982 exprs.push_back(t: to_expression(id: get<SPIRConstant>(id: c.subconstants[j]).subconstants[i]));
3983 statement(ts: "const ", ts: type_to_glsl(type: array_type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type: array_type, variable_id: 0), ts: " = ",
3984 ts: type_to_glsl_constructor(type: array_type), ts: "(", ts: merge(list: exprs, between: ", "), ts: ");");
3985 }
3986
3987 for (uint32_t j = 0; j < iteration_count; j++)
3988 {
3989 entry_func.fixup_hooks_in.push_back(t: [=, &var]() {
3990 AccessChainMeta meta;
3991 auto &c = this->get<SPIRConstant>(id: var.initializer);
3992
3993 uint32_t invocation_id = 0;
3994 uint32_t member_index_id = 0;
3995 if (is_control_point)
3996 {
3997 uint32_t ids = ir.increase_bound_by(count: 3);
3998 auto &uint_type = set<SPIRType>(id: ids, args: OpTypeInt);
3999 uint_type.basetype = SPIRType::UInt;
4000 uint_type.width = 32;
4001 set<SPIRExpression>(id: ids + 1, args: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), args&: ids, args: true);
4002 set<SPIRConstant>(id: ids + 2, args&: ids, args: i, args: false);
4003 invocation_id = ids + 1;
4004 member_index_id = ids + 2;
4005 }
4006
4007 if (is_patch)
4008 {
4009 statement(ts: "if (gl_InvocationID == 0)");
4010 begin_scope();
4011 }
4012
4013 if (type_is_array && !is_control_point)
4014 {
4015 uint32_t indices[2] = { j, i };
4016 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
4017 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: j, ts: "];");
4018 }
4019 else if (is_control_point)
4020 {
4021 uint32_t indices[2] = { invocation_id, member_index_id };
4022 auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: 0, meta: &meta);
4023 statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), ts: "];");
4024 }
4025 else
4026 {
4027 auto chain =
4028 access_chain_internal(base: var.self, indices: &i, count: 1, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta);
4029 statement(ts&: chain, ts: " = ", ts: to_expression(id: c.subconstants[i]), ts: ";");
4030 }
4031
4032 if (is_patch)
4033 end_scope();
4034 });
4035 }
4036 }
4037 }
4038 else if (is_control_point)
4039 {
4040 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
4041 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type, variable_id: 0),
4042 ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
4043 entry_func.fixup_hooks_in.push_back(t: [&, lut_name]() {
4044 statement(ts: to_expression(id: var.self), ts: "[gl_InvocationID] = ", ts: lut_name, ts: "[gl_InvocationID];");
4045 });
4046 }
4047 else if (has_decoration(id: var.self, decoration: DecorationBuiltIn) &&
4048 BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)) == BuiltInSampleMask)
4049 {
4050 // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
4051 entry_func.fixup_hooks_in.push_back(t: [&] {
4052 auto &c = this->get<SPIRConstant>(id: var.initializer);
4053 uint32_t num_constants = uint32_t(c.subconstants.size());
4054 for (uint32_t i = 0; i < num_constants; i++)
4055 {
4056 // Don't use to_expression on constant since it might be uint, just fish out the raw int.
4057 statement(ts: to_expression(id: var.self), ts: "[", ts&: i, ts: "] = ",
4058 ts: convert_to_string(value: this->get<SPIRConstant>(id: c.subconstants[i]).scalar_i32()), ts: ";");
4059 }
4060 });
4061 }
4062 else
4063 {
4064 auto lut_name = join(ts: "_", ts: var.self, ts: "_init");
4065 statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name,
4066 ts: type_to_array_glsl(type, variable_id: var.self), ts: " = ", ts: to_expression(id: var.initializer), ts: ";");
4067 entry_func.fixup_hooks_in.push_back(t: [&, lut_name, is_patch]() {
4068 if (is_patch)
4069 {
4070 statement(ts: "if (gl_InvocationID == 0)");
4071 begin_scope();
4072 }
4073 statement(ts: to_expression(id: var.self), ts: " = ", ts: lut_name, ts: ";");
4074 if (is_patch)
4075 end_scope();
4076 });
4077 }
4078}
4079
4080void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
4081{
4082 std::string result;
4083 switch (group_op)
4084 {
4085 case GroupOperationReduce:
4086 result = "reduction";
4087 break;
4088
4089 case GroupOperationExclusiveScan:
4090 result = "excl_scan";
4091 break;
4092
4093 case GroupOperationInclusiveScan:
4094 result = "incl_scan";
4095 break;
4096
4097 default:
4098 SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
4099 }
4100
4101 struct TypeInfo
4102 {
4103 std::string type;
4104 std::string identity;
4105 };
4106
4107 std::vector<TypeInfo> type_infos;
4108 switch (op)
4109 {
4110 case OpGroupNonUniformIAdd:
4111 {
4112 type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "0u" });
4113 type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(0u)" });
4114 type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(0u)" });
4115 type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(0u)" });
4116 type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "0" });
4117 type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(0)" });
4118 type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(0)" });
4119 type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(0)" });
4120 break;
4121 }
4122
4123 case OpGroupNonUniformFAdd:
4124 {
4125 type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "0.0f" });
4126 type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(0.0f)" });
4127 type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(0.0f)" });
4128 type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(0.0f)" });
4129 // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
4130 type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF" });
4131 type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(0.0LF)" });
4132 type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(0.0LF)" });
4133 type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(0.0LF)" });
4134 break;
4135 }
4136
4137 case OpGroupNonUniformIMul:
4138 {
4139 type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "1u" });
4140 type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(1u)" });
4141 type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(1u)" });
4142 type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(1u)" });
4143 type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "1" });
4144 type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(1)" });
4145 type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(1)" });
4146 type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(1)" });
4147 break;
4148 }
4149
4150 case OpGroupNonUniformFMul:
4151 {
4152 type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "1.0f" });
4153 type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(1.0f)" });
4154 type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(1.0f)" });
4155 type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(1.0f)" });
4156 type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF" });
4157 type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(1.0LF)" });
4158 type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(1.0LF)" });
4159 type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(1.0LF)" });
4160 break;
4161 }
4162
4163 default:
4164 SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
4165 }
4166
4167 const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
4168 const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
4169 std::string op_symbol;
4170 if (op_is_addition)
4171 {
4172 op_symbol = "+=";
4173 }
4174 else if (op_is_multiplication)
4175 {
4176 op_symbol = "*=";
4177 }
4178
4179 for (const TypeInfo &t : type_infos)
4180 {
4181 statement(ts: t.type, ts: " ", ts: func, ts: "(", ts: t.type, ts: " v)");
4182 begin_scope();
4183 statement(ts: t.type, ts: " ", ts&: result, ts: " = ", ts: t.identity, ts: ";");
4184 statement(ts: "uvec4 active_threads = subgroupBallot(true);");
4185 statement(ts: "if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
4186 begin_scope();
4187 statement(ts: "uint total = gl_SubgroupSize / 2u;");
4188 statement(ts&: result, ts: " = v;");
4189 statement(ts: "for (uint i = 1u; i <= total; i <<= 1u)");
4190 begin_scope();
4191 statement(ts: "bool valid;");
4192 if (group_op == GroupOperationReduce)
4193 {
4194 statement(ts: t.type, ts: " s = shuffleXorNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);");
4195 }
4196 else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
4197 {
4198 statement(ts: t.type, ts: " s = shuffleUpNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);");
4199 }
4200 if (op_is_addition || op_is_multiplication)
4201 {
4202 statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";");
4203 }
4204 end_scope();
4205 if (group_op == GroupOperationExclusiveScan)
4206 {
4207 statement(ts&: result, ts: " = shuffleUpNV(", ts&: result, ts: ", 1u, gl_SubgroupSize);");
4208 statement(ts: "if (subgroupElect())");
4209 begin_scope();
4210 statement(ts&: result, ts: " = ", ts: t.identity, ts: ";");
4211 end_scope();
4212 }
4213 end_scope();
4214 statement(ts: "else");
4215 begin_scope();
4216 if (group_op == GroupOperationExclusiveScan)
4217 {
4218 statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
4219 }
4220 else if (group_op == GroupOperationInclusiveScan)
4221 {
4222 statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
4223 }
4224 statement(ts: "for (uint i = 0u; i < gl_SubgroupSize; ++i)");
4225 begin_scope();
4226 statement(ts: "bool valid = subgroupBallotBitExtract(active_threads, i);");
4227 statement(ts: t.type, ts: " s = shuffleNV(v, i, gl_SubgroupSize);");
4228 if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
4229 {
4230 statement(ts: "valid = valid && (i < total);");
4231 }
4232 if (op_is_addition || op_is_multiplication)
4233 {
4234 statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";");
4235 }
4236 end_scope();
4237 end_scope();
4238 statement(ts: "return ", ts&: result, ts: ";");
4239 end_scope();
4240 }
4241}
4242
4243void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
4244{
4245 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
4246 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
4247
4248 if (!options.vulkan_semantics)
4249 {
4250 using Supp = ShaderSubgroupSupportHelper;
4251 auto result = shader_subgroup_supporter.resolve();
4252
4253 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMask))
4254 {
4255 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupMask, r: result);
4256
4257 for (auto &e : exts)
4258 {
4259 const char *name = Supp::get_extension_name(c: e);
4260 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4261
4262 switch (e)
4263 {
4264 case Supp::NV_shader_thread_group:
4265 statement(ts: "#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
4266 statement(ts: "#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
4267 statement(ts: "#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
4268 statement(ts: "#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
4269 statement(ts: "#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
4270 break;
4271 case Supp::ARB_shader_ballot:
4272 statement(ts: "#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
4273 statement(ts: "#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
4274 statement(ts: "#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
4275 statement(ts: "#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
4276 statement(ts: "#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
4277 break;
4278 default:
4279 break;
4280 }
4281 }
4282 statement(ts: "#endif");
4283 statement(ts: "");
4284 }
4285
4286 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupSize))
4287 {
4288 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupSize, r: result);
4289
4290 for (auto &e : exts)
4291 {
4292 const char *name = Supp::get_extension_name(c: e);
4293 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4294
4295 switch (e)
4296 {
4297 case Supp::NV_shader_thread_group:
4298 statement(ts: "#define gl_SubgroupSize gl_WarpSizeNV");
4299 break;
4300 case Supp::ARB_shader_ballot:
4301 statement(ts: "#define gl_SubgroupSize gl_SubGroupSizeARB");
4302 break;
4303 case Supp::AMD_gcn_shader:
4304 statement(ts: "#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
4305 break;
4306 default:
4307 break;
4308 }
4309 }
4310 statement(ts: "#endif");
4311 statement(ts: "");
4312 }
4313
4314 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInvocationID))
4315 {
4316 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupInvocationID, r: result);
4317
4318 for (auto &e : exts)
4319 {
4320 const char *name = Supp::get_extension_name(c: e);
4321 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4322
4323 switch (e)
4324 {
4325 case Supp::NV_shader_thread_group:
4326 statement(ts: "#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
4327 break;
4328 case Supp::ARB_shader_ballot:
4329 statement(ts: "#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
4330 break;
4331 default:
4332 break;
4333 }
4334 }
4335 statement(ts: "#endif");
4336 statement(ts: "");
4337 }
4338
4339 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupID))
4340 {
4341 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupID, r: result);
4342
4343 for (auto &e : exts)
4344 {
4345 const char *name = Supp::get_extension_name(c: e);
4346 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4347
4348 switch (e)
4349 {
4350 case Supp::NV_shader_thread_group:
4351 statement(ts: "#define gl_SubgroupID gl_WarpIDNV");
4352 break;
4353 default:
4354 break;
4355 }
4356 }
4357 statement(ts: "#endif");
4358 statement(ts: "");
4359 }
4360
4361 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::NumSubgroups))
4362 {
4363 auto exts = Supp::get_candidates_for_feature(ft: Supp::NumSubgroups, r: result);
4364
4365 for (auto &e : exts)
4366 {
4367 const char *name = Supp::get_extension_name(c: e);
4368 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4369
4370 switch (e)
4371 {
4372 case Supp::NV_shader_thread_group:
4373 statement(ts: "#define gl_NumSubgroups gl_WarpsPerSMNV");
4374 break;
4375 default:
4376 break;
4377 }
4378 }
4379 statement(ts: "#endif");
4380 statement(ts: "");
4381 }
4382
4383 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBroadcast_First))
4384 {
4385 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBroadcast_First, r: result);
4386
4387 for (auto &e : exts)
4388 {
4389 const char *name = Supp::get_extension_name(c: e);
4390 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4391
4392 switch (e)
4393 {
4394 case Supp::NV_shader_thread_shuffle:
4395 for (const char *t : workaround_types)
4396 {
4397 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4398 ts: " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
4399 }
4400 for (const char *t : workaround_types)
4401 {
4402 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4403 ts: " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
4404 }
4405 break;
4406 case Supp::ARB_shader_ballot:
4407 for (const char *t : workaround_types)
4408 {
4409 statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t,
4410 ts: " value) { return readFirstInvocationARB(value); }");
4411 }
4412 for (const char *t : workaround_types)
4413 {
4414 statement(ts&: t, ts: " subgroupBroadcast(", ts&: t,
4415 ts: " value, uint id) { return readInvocationARB(value, id); }");
4416 }
4417 break;
4418 default:
4419 break;
4420 }
4421 }
4422 statement(ts: "#endif");
4423 statement(ts: "");
4424 }
4425
4426 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotFindLSB_MSB))
4427 {
4428 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallotFindLSB_MSB, r: result);
4429
4430 for (auto &e : exts)
4431 {
4432 const char *name = Supp::get_extension_name(c: e);
4433 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4434
4435 switch (e)
4436 {
4437 case Supp::NV_shader_thread_group:
4438 statement(ts: "uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
4439 statement(ts: "uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
4440 break;
4441 default:
4442 break;
4443 }
4444 }
4445 statement(ts: "#else");
4446 statement(ts: "uint subgroupBallotFindLSB(uvec4 value)");
4447 begin_scope();
4448 statement(ts: "int firstLive = findLSB(value.x);");
4449 statement(ts: "return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
4450 end_scope();
4451 statement(ts: "uint subgroupBallotFindMSB(uvec4 value)");
4452 begin_scope();
4453 statement(ts: "int firstLive = findMSB(value.y);");
4454 statement(ts: "return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
4455 end_scope();
4456 statement(ts: "#endif");
4457 statement(ts: "");
4458 }
4459
4460 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAll_Any_AllEqualBool))
4461 {
4462 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupAll_Any_AllEqualBool, r: result);
4463
4464 for (auto &e : exts)
4465 {
4466 const char *name = Supp::get_extension_name(c: e);
4467 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4468
4469 switch (e)
4470 {
4471 case Supp::NV_gpu_shader_5:
4472 statement(ts: "bool subgroupAll(bool value) { return allThreadsNV(value); }");
4473 statement(ts: "bool subgroupAny(bool value) { return anyThreadNV(value); }");
4474 statement(ts: "bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
4475 break;
4476 case Supp::ARB_shader_group_vote:
4477 statement(ts: "bool subgroupAll(bool v) { return allInvocationsARB(v); }");
4478 statement(ts: "bool subgroupAny(bool v) { return anyInvocationARB(v); }");
4479 statement(ts: "bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
4480 break;
4481 case Supp::AMD_gcn_shader:
4482 statement(ts: "bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4483 statement(ts: "bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4484 statement(ts: "bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4485 "b == ballotAMD(true); }");
4486 break;
4487 default:
4488 break;
4489 }
4490 }
4491 statement(ts: "#endif");
4492 statement(ts: "");
4493 }
4494
4495 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAllEqualT))
4496 {
4497 statement(ts: "#ifndef GL_KHR_shader_subgroup_vote");
4498 statement(
4499 ts: "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4500 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4501 for (const char *t : workaround_types)
4502 statement(ts: "_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", ts&: t, ts: ")");
4503 statement(ts: "#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4504 statement(ts: "#endif");
4505 statement(ts: "");
4506 }
4507
4508 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallot))
4509 {
4510 auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallot, r: result);
4511
4512 for (auto &e : exts)
4513 {
4514 const char *name = Supp::get_extension_name(c: e);
4515 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4516
4517 switch (e)
4518 {
4519 case Supp::NV_shader_thread_group:
4520 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4521 break;
4522 case Supp::ARB_shader_ballot:
4523 statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4524 break;
4525 default:
4526 break;
4527 }
4528 }
4529 statement(ts: "#endif");
4530 statement(ts: "");
4531 }
4532
4533 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupElect))
4534 {
4535 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4536 statement(ts: "bool subgroupElect()");
4537 begin_scope();
4538 statement(ts: "uvec4 activeMask = subgroupBallot(true);");
4539 statement(ts: "uint firstLive = subgroupBallotFindLSB(activeMask);");
4540 statement(ts: "return gl_SubgroupInvocationID == firstLive;");
4541 end_scope();
4542 statement(ts: "#endif");
4543 statement(ts: "");
4544 }
4545
4546 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBarrier))
4547 {
4548 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4549 // that subgroup execute in lockstep so this barrier is implicit.
4550 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4551 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
4552 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4553 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4554 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4555 statement(ts: "void subgroupBarrier() { memoryBarrierShared(); }");
4556 statement(ts: "#endif");
4557 statement(ts: "");
4558 }
4559
4560 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMemBarrier))
4561 {
4562 if (model == spv::ExecutionModelGLCompute)
4563 {
4564 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4565 statement(ts: "void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4566 statement(ts: "void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4567 statement(ts: "void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4568 statement(ts: "void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4569 statement(ts: "#endif");
4570 }
4571 else
4572 {
4573 statement(ts: "#ifndef GL_KHR_shader_subgroup_basic");
4574 statement(ts: "void subgroupMemoryBarrier() { memoryBarrier(); }");
4575 statement(ts: "void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4576 statement(ts: "void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4577 statement(ts: "#endif");
4578 }
4579 statement(ts: "");
4580 }
4581
4582 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4583 {
4584 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4585 statement(ts: "bool subgroupInverseBallot(uvec4 value)");
4586 begin_scope();
4587 statement(ts: "return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4588 end_scope();
4589
4590 statement(ts: "uint subgroupBallotInclusiveBitCount(uvec4 value)");
4591 begin_scope();
4592 statement(ts: "uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4593 statement(ts: "ivec2 c = bitCount(v);");
4594 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4595 statement(ts: "return uint(c.x);");
4596 statement_no_indent(ts: "#else");
4597 statement(ts: "return uint(c.x + c.y);");
4598 statement_no_indent(ts: "#endif");
4599 end_scope();
4600
4601 statement(ts: "uint subgroupBallotExclusiveBitCount(uvec4 value)");
4602 begin_scope();
4603 statement(ts: "uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4604 statement(ts: "ivec2 c = bitCount(v);");
4605 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4606 statement(ts: "return uint(c.x);");
4607 statement_no_indent(ts: "#else");
4608 statement(ts: "return uint(c.x + c.y);");
4609 statement_no_indent(ts: "#endif");
4610 end_scope();
4611 statement(ts: "#endif");
4612 statement(ts: "");
4613 }
4614
4615 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitCount))
4616 {
4617 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4618 statement(ts: "uint subgroupBallotBitCount(uvec4 value)");
4619 begin_scope();
4620 statement(ts: "ivec2 c = bitCount(value.xy);");
4621 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4622 statement(ts: "return uint(c.x);");
4623 statement_no_indent(ts: "#else");
4624 statement(ts: "return uint(c.x + c.y);");
4625 statement_no_indent(ts: "#endif");
4626 end_scope();
4627 statement(ts: "#endif");
4628 statement(ts: "");
4629 }
4630
4631 if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitExtract))
4632 {
4633 statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot");
4634 statement(ts: "bool subgroupBallotBitExtract(uvec4 value, uint index)");
4635 begin_scope();
4636 statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group");
4637 statement(ts: "uint shifted = value.x >> index;");
4638 statement_no_indent(ts: "#else");
4639 statement(ts: "uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4640 statement_no_indent(ts: "#endif");
4641 statement(ts: "return (shifted & 1u) != 0u;");
4642 end_scope();
4643 statement(ts: "#endif");
4644 statement(ts: "");
4645 }
4646
4647 auto arithmetic_feature_helper =
4648 [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op)
4649 {
4650 if (shader_subgroup_supporter.is_feature_requested(feature: feat))
4651 {
4652 auto exts = Supp::get_candidates_for_feature(ft: feat, r: result);
4653 for (auto &e : exts)
4654 {
4655 const char *name = Supp::get_extension_name(c: e);
4656 statement(ts: &e == &exts.front() ? "#if" : "#elif", ts: " defined(", ts&: name, ts: ")");
4657
4658 switch (e)
4659 {
4660 case Supp::NV_shader_thread_shuffle:
4661 emit_subgroup_arithmetic_workaround(func: func_name, op, group_op);
4662 break;
4663 default:
4664 break;
4665 }
4666 }
4667 statement(ts: "#endif");
4668 statement(ts: "");
4669 }
4670 };
4671
4672 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
4673 GroupOperationReduce);
4674 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
4675 OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
4676 arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
4677 OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
4678 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
4679 GroupOperationReduce);
4680 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
4681 OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
4682 arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
4683 OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
4684
4685 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
4686 GroupOperationReduce);
4687 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
4688 OpGroupNonUniformIMul, GroupOperationExclusiveScan);
4689 arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
4690 OpGroupNonUniformIMul, GroupOperationInclusiveScan);
4691 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
4692 GroupOperationReduce);
4693 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
4694 OpGroupNonUniformFMul, GroupOperationExclusiveScan);
4695 arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
4696 OpGroupNonUniformFMul, GroupOperationInclusiveScan);
4697 }
4698
4699 if (!workaround_ubo_load_overload_types.empty())
4700 {
4701 for (auto &type_id : workaround_ubo_load_overload_types)
4702 {
4703 auto &type = get<SPIRType>(id: type_id);
4704
4705 if (options.es && is_matrix(type))
4706 {
4707 // Need both variants.
4708 // GLSL cannot overload on precision, so need to dispatch appropriately.
4709 statement(ts: "highp ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(highp ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4710 statement(ts: "mediump ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajorMP(mediump ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4711 }
4712 else
4713 {
4714 statement(ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(", ts: type_to_glsl(type), ts: " wrap) { return wrap; }");
4715 }
4716 }
4717 statement(ts: "");
4718 }
4719}
4720
4721void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
4722{
4723 const char *qual = "";
4724 const char *suffix = (options.es && relaxed) ? "MP" : "";
4725 if (options.es)
4726 qual = relaxed ? "mediump " : "highp ";
4727
4728 if (polyfills & PolyfillTranspose2x2)
4729 {
4730 statement(ts&: qual, ts: "mat2 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4731 begin_scope();
4732 statement(ts: "return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4733 end_scope();
4734 statement(ts: "");
4735 }
4736
4737 if (polyfills & PolyfillTranspose3x3)
4738 {
4739 statement(ts&: qual, ts: "mat3 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4740 begin_scope();
4741 statement(ts: "return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4742 end_scope();
4743 statement(ts: "");
4744 }
4745
4746 if (polyfills & PolyfillTranspose4x4)
4747 {
4748 statement(ts&: qual, ts: "mat4 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4749 begin_scope();
4750 statement(ts: "return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4751 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4752 end_scope();
4753 statement(ts: "");
4754 }
4755
4756 if (polyfills & PolyfillDeterminant2x2)
4757 {
4758 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4759 begin_scope();
4760 statement(ts: "return m[0][0] * m[1][1] - m[0][1] * m[1][0];");
4761 end_scope();
4762 statement(ts: "");
4763 }
4764
4765 if (polyfills & PolyfillDeterminant3x3)
4766 {
4767 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4768 begin_scope();
4769 statement(ts: "return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], "
4770 "m[1][2] * m[2][0] - m[1][0] * m[2][2], "
4771 "m[1][0] * m[2][1] - m[1][1] * m[2][0]));");
4772 end_scope();
4773 statement(ts: "");
4774 }
4775
4776 if (polyfills & PolyfillDeterminant4x4)
4777 {
4778 statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4779 begin_scope();
4780 statement(ts: "return dot(m[0], vec4("
4781 "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
4782 "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
4783 "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
4784 "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));");
4785 end_scope();
4786 statement(ts: "");
4787 }
4788
4789 if (polyfills & PolyfillMatrixInverse2x2)
4790 {
4791 statement(ts&: qual, ts: "mat2 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)");
4792 begin_scope();
4793 statement(ts: "return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) "
4794 "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));");
4795 end_scope();
4796 statement(ts: "");
4797 }
4798
4799 if (polyfills & PolyfillMatrixInverse3x3)
4800 {
4801 statement(ts&: qual, ts: "mat3 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)");
4802 begin_scope();
4803 statement(ts&: qual, ts: "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);");
4804 statement(ts: "return mat3(t[0], "
4805 "m[0][2] * m[2][1] - m[0][1] * m[2][2], "
4806 "m[0][1] * m[1][2] - m[0][2] * m[1][1], "
4807 "t[1], "
4808 "m[0][0] * m[2][2] - m[0][2] * m[2][0], "
4809 "m[0][2] * m[1][0] - m[0][0] * m[1][2], "
4810 "t[2], "
4811 "m[0][1] * m[2][0] - m[0][0] * m[2][1], "
4812 "m[0][0] * m[1][1] - m[0][1] * m[1][0]) "
4813 "* (1.0 / dot(m[0], t));");
4814 end_scope();
4815 statement(ts: "");
4816 }
4817
4818 if (polyfills & PolyfillMatrixInverse4x4)
4819 {
4820 statement(ts&: qual, ts: "mat4 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)");
4821 begin_scope();
4822 statement(ts&: qual, ts: "vec4 t = vec4("
4823 "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
4824 "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
4825 "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
4826 "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);");
4827 statement(ts: "return mat4("
4828 "t[0], "
4829 "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], "
4830 "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], "
4831 "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], "
4832 "t[1], "
4833 "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], "
4834 "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], "
4835 "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], "
4836 "t[2], "
4837 "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], "
4838 "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], "
4839 "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], "
4840 "t[3], "
4841 "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], "
4842 "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], "
4843 "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) "
4844 "* (1.0 / dot(m[0], t));");
4845 end_scope();
4846 statement(ts: "");
4847 }
4848
4849 if (!relaxed)
4850 {
4851 static const Polyfill polys[3][3] = {
4852 { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
4853 { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
4854 { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
4855 };
4856
4857 static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp };
4858 static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
4859 bool has_poly = false;
4860
4861 for (uint32_t i = 0; i < 3; i++)
4862 {
4863 for (uint32_t j = 0; j < 3; j++)
4864 {
4865 if ((polyfills & polys[i][j]) == 0)
4866 continue;
4867
4868 const char *types[3][4] = {
4869 { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
4870 { "float", "vec2", "vec3", "vec4" },
4871 { "double", "dvec2", "dvec3", "dvec4" },
4872 };
4873
4874 for (uint32_t k = 0; k < 4; k++)
4875 {
4876 auto *type = types[j][k];
4877
4878 if (i < 2)
4879 {
4880 statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ",
4881 ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ");");
4882 }
4883 else
4884 {
4885 statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ",
4886 ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ", ", ts&: type, ts: ");");
4887 }
4888
4889 has_poly = true;
4890 }
4891 }
4892 }
4893
4894 if (has_poly)
4895 statement(ts: "");
4896 }
4897 else
4898 {
4899 // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump
4900 // propagation.
4901
4902 static const Polyfill polys[3][3] = {
4903 { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
4904 { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
4905 { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
4906 };
4907
4908 static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
4909
4910 for (uint32_t i = 0; i < 3; i++)
4911 {
4912 for (uint32_t j = 0; j < 3; j++)
4913 {
4914 if ((polyfills & polys[i][j]) == 0)
4915 continue;
4916
4917 const char *types[3][4] = {
4918 { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
4919 { "float", "vec2", "vec3", "vec4" },
4920 { "double", "dvec2", "dvec3", "dvec4" },
4921 };
4922
4923 for (uint32_t k = 0; k < 4; k++)
4924 {
4925 auto *type = types[j][k];
4926
4927 if (i < 2)
4928 {
4929 statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(",
4930 ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b)");
4931 begin_scope();
4932 statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b);");
4933 statement(ts: "return res;");
4934 end_scope();
4935 statement(ts: "");
4936 }
4937 else
4938 {
4939 statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(",
4940 ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b, mediump ", ts&: type, ts: " c)");
4941 begin_scope();
4942 statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b, c);");
4943 statement(ts: "return res;");
4944 end_scope();
4945 statement(ts: "");
4946 }
4947 }
4948 }
4949 }
4950 }
4951}
4952
4953// Returns a string representation of the ID, usable as a function arg.
4954// Default is to simply return the expression representation fo the arg ID.
4955// Subclasses may override to modify the return value.
4956string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4957{
4958 // Make sure that we use the name of the original variable, and not the parameter alias.
4959 uint32_t name_id = id;
4960 auto *var = maybe_get<SPIRVariable>(id);
4961 if (var && var->basevariable)
4962 name_id = var->basevariable;
4963 return to_expression(id: name_id);
4964}
4965
4966void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
4967{
4968 auto res = forced_temporaries.insert(x: id);
4969
4970 // Forcing new temporaries guarantees forward progress.
4971 if (res.second)
4972 force_recompile_guarantee_forward_progress();
4973 else
4974 force_recompile();
4975}
4976
4977uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
4978{
4979 // Constants do not have innate precision.
4980 auto handle_type = ir.ids[id].get_type();
4981 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
4982 return id;
4983
4984 // Ignore anything that isn't 32-bit values.
4985 auto &type = get<SPIRType>(id: type_id);
4986 if (type.pointer)
4987 return id;
4988 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
4989 return id;
4990
4991 if (precision == Options::DontCare)
4992 {
4993 // If precision is consumed as don't care (operations only consisting of constants),
4994 // we need to bind the expression to a temporary,
4995 // otherwise we have no way of controlling the precision later.
4996 auto itr = forced_temporaries.insert(x: id);
4997 if (itr.second)
4998 force_recompile_guarantee_forward_progress();
4999 return id;
5000 }
5001
5002 auto current_precision = has_decoration(id, decoration: DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
5003 if (current_precision == precision)
5004 return id;
5005
5006 auto itr = temporary_to_mirror_precision_alias.find(x: id);
5007 if (itr == temporary_to_mirror_precision_alias.end())
5008 {
5009 uint32_t alias_id = ir.increase_bound_by(count: 1);
5010 auto &m = ir.meta[alias_id];
5011 if (auto *input_m = ir.find_meta(id))
5012 m = *input_m;
5013
5014 const char *prefix;
5015 if (precision == Options::Mediump)
5016 {
5017 set_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
5018 prefix = "mp_copy_";
5019 }
5020 else
5021 {
5022 unset_decoration(id: alias_id, decoration: DecorationRelaxedPrecision);
5023 prefix = "hp_copy_";
5024 }
5025
5026 auto alias_name = join(ts&: prefix, ts: to_name(id));
5027 ParsedIR::sanitize_underscores(str&: alias_name);
5028 set_name(id: alias_id, name: alias_name);
5029
5030 emit_op(result_type: type_id, result_id: alias_id, rhs: to_expression(id), forward_rhs: true);
5031 temporary_to_mirror_precision_alias[id] = alias_id;
5032 forced_temporaries.insert(x: id);
5033 forced_temporaries.insert(x: alias_id);
5034 force_recompile_guarantee_forward_progress();
5035 id = alias_id;
5036 }
5037 else
5038 {
5039 id = itr->second;
5040 }
5041
5042 return id;
5043}
5044
5045void CompilerGLSL::handle_invalid_expression(uint32_t id)
5046{
5047 // We tried to read an invalidated expression.
5048 // This means we need another pass at compilation, but next time,
5049 // force temporary variables so that they cannot be invalidated.
5050 force_temporary_and_recompile(id);
5051
5052 // If the invalid expression happened as a result of a CompositeInsert
5053 // overwrite, we must block this from happening next iteration.
5054 if (composite_insert_overwritten.count(x: id))
5055 block_composite_insert_overwrite.insert(x: id);
5056}
5057
5058// Converts the format of the current expression from packed to unpacked,
5059// by wrapping the expression in a constructor of the appropriate type.
5060// GLSL does not support packed formats, so simply return the expression.
5061// Subclasses that do will override.
5062string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
5063{
5064 return expr_str;
5065}
5066
5067// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
5068void CompilerGLSL::strip_enclosed_expression(string &expr)
5069{
5070 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
5071 return;
5072
5073 // Have to make sure that our first and last parens actually enclose everything inside it.
5074 uint32_t paren_count = 0;
5075 for (auto &c : expr)
5076 {
5077 if (c == '(')
5078 paren_count++;
5079 else if (c == ')')
5080 {
5081 paren_count--;
5082
5083 // If we hit 0 and this is not the final char, our first and final parens actually don't
5084 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
5085 if (paren_count == 0 && &c != &expr.back())
5086 return;
5087 }
5088 }
5089 expr.erase(pos: expr.size() - 1, n: 1);
5090 expr.erase(position: begin(cont&: expr));
5091}
5092
5093bool CompilerGLSL::needs_enclose_expression(const std::string &expr)
5094{
5095 bool need_parens = false;
5096
5097 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
5098 // unary expressions.
5099 if (!expr.empty())
5100 {
5101 auto c = expr.front();
5102 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
5103 need_parens = true;
5104 }
5105
5106 if (!need_parens)
5107 {
5108 uint32_t paren_count = 0;
5109 for (auto c : expr)
5110 {
5111 if (c == '(' || c == '[')
5112 paren_count++;
5113 else if (c == ')' || c == ']')
5114 {
5115 assert(paren_count);
5116 paren_count--;
5117 }
5118 else if (c == ' ' && paren_count == 0)
5119 {
5120 need_parens = true;
5121 break;
5122 }
5123 }
5124 assert(paren_count == 0);
5125 }
5126
5127 return need_parens;
5128}
5129
5130string CompilerGLSL::enclose_expression(const string &expr)
5131{
5132 // If this expression contains any spaces which are not enclosed by parentheses,
5133 // we need to enclose it so we can treat the whole string as an expression.
5134 // This happens when two expressions have been part of a binary op earlier.
5135 if (needs_enclose_expression(expr))
5136 return join(ts: '(', ts: expr, ts: ')');
5137 else
5138 return expr;
5139}
5140
5141string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
5142{
5143 // If this expression starts with an address-of operator ('&'), then
5144 // just return the part after the operator.
5145 // TODO: Strip parens if unnecessary?
5146 if (expr.front() == '&')
5147 return expr.substr(pos: 1);
5148 else if (backend.native_pointers)
5149 return join(ts: '*', ts: expr);
5150 else if (is_physical_pointer(type: expr_type) && !is_physical_pointer_to_buffer_block(type: expr_type))
5151 return join(ts: enclose_expression(expr), ts: ".value");
5152 else
5153 return expr;
5154}
5155
5156string CompilerGLSL::address_of_expression(const std::string &expr)
5157{
5158 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
5159 {
5160 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
5161 // the first two and last characters. We might have to enclose the expression.
5162 // This doesn't work for cases like (*foo + 10),
5163 // but this is an r-value expression which we cannot take the address of anyways.
5164 return enclose_expression(expr: expr.substr(pos: 2, n: expr.size() - 3));
5165 }
5166 else if (expr.front() == '*')
5167 {
5168 // If this expression starts with a dereference operator ('*'), then
5169 // just return the part after the operator.
5170 return expr.substr(pos: 1);
5171 }
5172 else
5173 return join(ts: '&', ts: enclose_expression(expr));
5174}
5175
5176// Just like to_expression except that we enclose the expression inside parentheses if needed.
5177string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
5178{
5179 return enclose_expression(expr: to_expression(id, register_expression_read));
5180}
5181
5182// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
5183// need_transpose must be forced to false.
5184string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
5185{
5186 return unpack_expression_type(expr_str: to_expression(id), expression_type(id),
5187 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
5188 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), true);
5189}
5190
5191string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
5192{
5193 // If we need to transpose, it will also take care of unpacking rules.
5194 auto *e = maybe_get<SPIRExpression>(id);
5195 bool need_transpose = e && e->need_transpose;
5196 bool is_remapped = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
5197 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
5198
5199 if (!need_transpose && (is_remapped || is_packed))
5200 {
5201 return unpack_expression_type(expr_str: to_expression(id, register_expression_read),
5202 get_pointee_type(type_id: expression_type_id(id)),
5203 get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID),
5204 has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), false);
5205 }
5206 else
5207 return to_expression(id, register_expression_read);
5208}
5209
5210string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
5211{
5212 return enclose_expression(expr: to_unpacked_expression(id, register_expression_read));
5213}
5214
5215string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
5216{
5217 auto &type = expression_type(id);
5218 if (type.pointer && should_dereference(id))
5219 return dereference_expression(expr_type: type, expr: to_enclosed_expression(id, register_expression_read));
5220 else
5221 return to_expression(id, register_expression_read);
5222}
5223
5224string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
5225{
5226 auto &type = expression_type(id);
5227 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
5228 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
5229 else
5230 return to_unpacked_expression(id, register_expression_read);
5231}
5232
5233string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
5234{
5235 auto &type = expression_type(id);
5236 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
5237 return address_of_expression(expr: to_enclosed_expression(id, register_expression_read));
5238 else
5239 return to_enclosed_unpacked_expression(id, register_expression_read);
5240}
5241
5242string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
5243{
5244 auto expr = to_enclosed_expression(id);
5245 if (has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked))
5246 return join(ts&: expr, ts: "[", ts&: index, ts: "]");
5247 else
5248 return join(ts&: expr, ts: ".", ts: index_to_swizzle(index));
5249}
5250
5251string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
5252 const uint32_t *chain, uint32_t length)
5253{
5254 // It is kinda silly if application actually enter this path since they know the constant up front.
5255 // It is useful here to extract the plain constant directly.
5256 SPIRConstant tmp;
5257 tmp.constant_type = result_type;
5258 auto &composite_type = get<SPIRType>(id: c.constant_type);
5259 assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
5260 assert(!c.specialization);
5261
5262 if (is_matrix(type: composite_type))
5263 {
5264 if (length == 2)
5265 {
5266 tmp.m.c[0].vecsize = 1;
5267 tmp.m.columns = 1;
5268 tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
5269 }
5270 else
5271 {
5272 assert(length == 1);
5273 tmp.m.c[0].vecsize = composite_type.vecsize;
5274 tmp.m.columns = 1;
5275 tmp.m.c[0] = c.m.c[chain[0]];
5276 }
5277 }
5278 else
5279 {
5280 assert(length == 1);
5281 tmp.m.c[0].vecsize = 1;
5282 tmp.m.columns = 1;
5283 tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
5284 }
5285
5286 return constant_expression(c: tmp);
5287}
5288
5289string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type,
5290 const string &base_expr, const SPIRType &type)
5291{
5292 bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
5293 type.basetype == SPIRType::Boolean &&
5294 backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
5295
5296 SPIRType tmp_type { OpNop };
5297 if (remapped_boolean)
5298 {
5299 tmp_type = get<SPIRType>(id: type.parent_type);
5300 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5301 }
5302 else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean)
5303 {
5304 // It's possible that we have an r-value expression that was OpLoaded from a struct.
5305 // We have to reroll this and explicitly cast the input to bool, because the r-value is short.
5306 tmp_type = get<SPIRType>(id: type.parent_type);
5307 remapped_boolean = true;
5308 }
5309
5310 uint32_t size = to_array_size_literal(type);
5311 auto &parent = get<SPIRType>(id: type.parent_type);
5312 string expr = "{ ";
5313
5314 for (uint32_t i = 0; i < size; i++)
5315 {
5316 auto subexpr = join(ts: base_expr, ts: "[", ts: convert_to_string(t: i), ts: "]");
5317 if (!is_array(type: parent))
5318 {
5319 if (remapped_boolean)
5320 subexpr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: subexpr, ts: ")");
5321 expr += subexpr;
5322 }
5323 else
5324 expr += to_rerolled_array_expression(parent_type, base_expr: subexpr, type: parent);
5325
5326 if (i + 1 < size)
5327 expr += ", ";
5328 }
5329
5330 expr += " }";
5331 return expr;
5332}
5333
5334string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type)
5335{
5336 auto &type = expression_type(id);
5337
5338 bool reroll_array = false;
5339 bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
5340 type.basetype == SPIRType::Boolean &&
5341 backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
5342
5343 if (is_array(type))
5344 {
5345 reroll_array = !backend.array_is_value_type ||
5346 (block_like_type && !backend.array_is_value_type_in_buffer_blocks);
5347
5348 if (remapped_boolean)
5349 {
5350 // Forced to reroll if we have to change bool[] to short[].
5351 reroll_array = true;
5352 }
5353 }
5354
5355 if (reroll_array)
5356 {
5357 // For this case, we need to "re-roll" an array initializer from a temporary.
5358 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
5359 // participate in a struct initializer. E.g.
5360 // float arr[2] = { 1.0, 2.0 };
5361 // Foo foo = { arr }; must be transformed to
5362 // Foo foo = { { arr[0], arr[1] } };
5363 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
5364
5365 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
5366 // as temporaries anyways.
5367 return to_rerolled_array_expression(parent_type, base_expr: to_enclosed_expression(id), type);
5368 }
5369 else
5370 {
5371 auto expr = to_unpacked_expression(id);
5372 if (remapped_boolean)
5373 {
5374 auto tmp_type = type;
5375 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5376 expr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: expr, ts: ")");
5377 }
5378
5379 return expr;
5380 }
5381}
5382
5383string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
5384{
5385 string expr = to_expression(id);
5386
5387 if (has_decoration(id, decoration: DecorationNonUniform))
5388 convert_non_uniform_expression(expr, ptr_id: id);
5389
5390 return expr;
5391}
5392
5393string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
5394{
5395 auto itr = invalid_expressions.find(x: id);
5396 if (itr != end(cont&: invalid_expressions))
5397 handle_invalid_expression(id);
5398
5399 if (ir.ids[id].get_type() == TypeExpression)
5400 {
5401 // We might have a more complex chain of dependencies.
5402 // A possible scenario is that we
5403 //
5404 // %1 = OpLoad
5405 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
5406 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
5407 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
5408 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
5409 //
5410 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
5411 // and see that we should not forward reads of the original variable.
5412 auto &expr = get<SPIRExpression>(id);
5413 for (uint32_t dep : expr.expression_dependencies)
5414 if (invalid_expressions.find(x: dep) != end(cont&: invalid_expressions))
5415 handle_invalid_expression(id: dep);
5416 }
5417
5418 if (register_expression_read)
5419 track_expression_read(id);
5420
5421 switch (ir.ids[id].get_type())
5422 {
5423 case TypeExpression:
5424 {
5425 auto &e = get<SPIRExpression>(id);
5426 if (e.base_expression)
5427 return to_enclosed_expression(id: e.base_expression) + e.expression;
5428 else if (e.need_transpose)
5429 {
5430 // This should not be reached for access chains, since we always deal explicitly with transpose state
5431 // when consuming an access chain expression.
5432 uint32_t physical_type_id = get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
5433 bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
5434 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
5435 return convert_row_major_matrix(exp_str: e.expression, exp_type: get<SPIRType>(id: e.expression_type), physical_type_id,
5436 is_packed, relaxed);
5437 }
5438 else if (flattened_structs.count(x: id))
5439 {
5440 return load_flattened_struct(basename: e.expression, type: get<SPIRType>(id: e.expression_type));
5441 }
5442 else
5443 {
5444 if (is_forcing_recompilation())
5445 {
5446 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
5447 // Avoid this by returning dummy expressions during this phase.
5448 // Do not use empty expressions here, because those are sentinels for other cases.
5449 return "_";
5450 }
5451 else
5452 return e.expression;
5453 }
5454 }
5455
5456 case TypeConstant:
5457 {
5458 auto &c = get<SPIRConstant>(id);
5459 auto &type = get<SPIRType>(id: c.constant_type);
5460
5461 // WorkGroupSize may be a constant.
5462 if (has_decoration(id: c.self, decoration: DecorationBuiltIn))
5463 return builtin_to_glsl(builtin: BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)), storage: StorageClassGeneric);
5464 else if (c.specialization)
5465 {
5466 if (backend.workgroup_size_is_hidden)
5467 {
5468 int wg_index = get_constant_mapping_to_workgroup_component(c);
5469 if (wg_index >= 0)
5470 {
5471 auto wg_size = join(ts: builtin_to_glsl(builtin: BuiltInWorkgroupSize, storage: StorageClassInput), ts: vector_swizzle(vecsize: 1, index: wg_index));
5472 if (type.basetype != SPIRType::UInt)
5473 wg_size = bitcast_expression(target_type: type, expr_type: SPIRType::UInt, expr: wg_size);
5474 return wg_size;
5475 }
5476 }
5477
5478 if (expression_is_forwarded(id))
5479 return constant_expression(c);
5480
5481 return to_name(id);
5482 }
5483 else if (c.is_used_as_lut)
5484 return to_name(id);
5485 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
5486 return to_name(id);
5487 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
5488 return to_name(id);
5489 else
5490 return constant_expression(c);
5491 }
5492
5493 case TypeConstantOp:
5494 return to_name(id);
5495
5496 case TypeVariable:
5497 {
5498 auto &var = get<SPIRVariable>(id);
5499 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
5500 // the variable has not been declared yet.
5501 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
5502 {
5503 // We might try to load from a loop variable before it has been initialized.
5504 // Prefer static expression and fallback to initializer.
5505 if (var.static_expression)
5506 return to_expression(id: var.static_expression);
5507 else if (var.initializer)
5508 return to_expression(id: var.initializer);
5509 else
5510 {
5511 // We cannot declare the variable yet, so have to fake it.
5512 uint32_t undef_id = ir.increase_bound_by(count: 1);
5513 return emit_uninitialized_temporary_expression(type: get_variable_data_type_id(var), id: undef_id).expression;
5514 }
5515 }
5516 else if (var.deferred_declaration)
5517 {
5518 var.deferred_declaration = false;
5519 return variable_decl(variable: var);
5520 }
5521 else if (flattened_structs.count(x: id))
5522 {
5523 return load_flattened_struct(basename: to_name(id), type: get<SPIRType>(id: var.basetype));
5524 }
5525 else
5526 {
5527 auto &dec = ir.meta[var.self].decoration;
5528 if (dec.builtin)
5529 return builtin_to_glsl(builtin: dec.builtin_type, storage: var.storage);
5530 else
5531 return to_name(id);
5532 }
5533 }
5534
5535 case TypeCombinedImageSampler:
5536 // This type should never be taken the expression of directly.
5537 // The intention is that texture sampling functions will extract the image and samplers
5538 // separately and take their expressions as needed.
5539 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
5540 // expression ala sampler2D(texture, sampler).
5541 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
5542
5543 case TypeAccessChain:
5544 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
5545 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
5546
5547 default:
5548 return to_name(id);
5549 }
5550}
5551
5552SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
5553{
5554 if (auto *constant = maybe_get<SPIRConstant>(id: const_id))
5555 {
5556 const auto &type = get<SPIRType>(id: constant->constant_type);
5557 if (is_array(type) || type.basetype == SPIRType::Struct)
5558 return constant->subconstants;
5559 if (is_matrix(type))
5560 return SmallVector<ConstantID>(constant->m.id);
5561 if (is_vector(type))
5562 return SmallVector<ConstantID>(constant->m.c[0].id);
5563 SPIRV_CROSS_THROW("Unexpected scalar constant!");
5564 }
5565 if (!const_composite_insert_ids.count(x: const_id))
5566 SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
5567 return const_composite_insert_ids[const_id];
5568}
5569
5570void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
5571 const SmallVector<ConstantID> &initializers)
5572{
5573 auto &type = get<SPIRType>(id: type_id);
5574 constant.specialization = true;
5575 if (is_array(type) || type.basetype == SPIRType::Struct)
5576 {
5577 constant.subconstants = initializers;
5578 }
5579 else if (is_matrix(type))
5580 {
5581 constant.m.columns = type.columns;
5582 for (uint32_t i = 0; i < type.columns; ++i)
5583 {
5584 constant.m.id[i] = initializers[i];
5585 constant.m.c[i].vecsize = type.vecsize;
5586 }
5587 }
5588 else if (is_vector(type))
5589 {
5590 constant.m.c[0].vecsize = type.vecsize;
5591 for (uint32_t i = 0; i < type.vecsize; ++i)
5592 constant.m.c[0].id[i] = initializers[i];
5593 }
5594 else
5595 SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
5596}
5597
5598void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
5599 const SmallVector<ConstantID> &initializers)
5600{
5601 if (maybe_get<SPIRConstantOp>(id: const_id))
5602 {
5603 const_composite_insert_ids[const_id] = initializers;
5604 return;
5605 }
5606
5607 auto &constant = set<SPIRConstant>(id: const_id, args&: type_id);
5608 fill_composite_constant(constant, type_id, initializers);
5609 forwarded_temporaries.insert(x: const_id);
5610}
5611
5612TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
5613{
5614 auto &type = get<SPIRType>(id: type_id);
5615 if (is_array(type))
5616 return type.parent_type;
5617 if (type.basetype == SPIRType::Struct)
5618 return type.member_types[member_idx];
5619 if (is_matrix(type))
5620 return type.parent_type;
5621 if (is_vector(type))
5622 return type.parent_type;
5623 SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
5624}
5625
5626string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
5627{
5628 auto &type = get<SPIRType>(id: cop.basetype);
5629 bool binary = false;
5630 bool unary = false;
5631 string op;
5632
5633 if (is_legacy() && is_unsigned_opcode(op: cop.opcode))
5634 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
5635
5636 // TODO: Find a clean way to reuse emit_instruction.
5637 switch (cop.opcode)
5638 {
5639 case OpSConvert:
5640 case OpUConvert:
5641 case OpFConvert:
5642 op = type_to_glsl_constructor(type);
5643 break;
5644
5645#define GLSL_BOP(opname, x) \
5646 case Op##opname: \
5647 binary = true; \
5648 op = x; \
5649 break
5650
5651#define GLSL_UOP(opname, x) \
5652 case Op##opname: \
5653 unary = true; \
5654 op = x; \
5655 break
5656
5657 GLSL_UOP(SNegate, "-");
5658 GLSL_UOP(Not, "~");
5659 GLSL_BOP(IAdd, "+");
5660 GLSL_BOP(ISub, "-");
5661 GLSL_BOP(IMul, "*");
5662 GLSL_BOP(SDiv, "/");
5663 GLSL_BOP(UDiv, "/");
5664 GLSL_BOP(UMod, "%");
5665 GLSL_BOP(SMod, "%");
5666 GLSL_BOP(ShiftRightLogical, ">>");
5667 GLSL_BOP(ShiftRightArithmetic, ">>");
5668 GLSL_BOP(ShiftLeftLogical, "<<");
5669 GLSL_BOP(BitwiseOr, "|");
5670 GLSL_BOP(BitwiseXor, "^");
5671 GLSL_BOP(BitwiseAnd, "&");
5672 GLSL_BOP(LogicalOr, "||");
5673 GLSL_BOP(LogicalAnd, "&&");
5674 GLSL_UOP(LogicalNot, "!");
5675 GLSL_BOP(LogicalEqual, "==");
5676 GLSL_BOP(LogicalNotEqual, "!=");
5677 GLSL_BOP(IEqual, "==");
5678 GLSL_BOP(INotEqual, "!=");
5679 GLSL_BOP(ULessThan, "<");
5680 GLSL_BOP(SLessThan, "<");
5681 GLSL_BOP(ULessThanEqual, "<=");
5682 GLSL_BOP(SLessThanEqual, "<=");
5683 GLSL_BOP(UGreaterThan, ">");
5684 GLSL_BOP(SGreaterThan, ">");
5685 GLSL_BOP(UGreaterThanEqual, ">=");
5686 GLSL_BOP(SGreaterThanEqual, ">=");
5687
5688 case OpSRem:
5689 {
5690 uint32_t op0 = cop.arguments[0];
5691 uint32_t op1 = cop.arguments[1];
5692 return join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(",
5693 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
5694 }
5695
5696 case OpSelect:
5697 {
5698 if (cop.arguments.size() < 3)
5699 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5700
5701 // This one is pretty annoying. It's triggered from
5702 // uint(bool), int(bool) from spec constants.
5703 // In order to preserve its compile-time constness in Vulkan GLSL,
5704 // we need to reduce the OpSelect expression back to this simplified model.
5705 // If we cannot, fail.
5706 if (to_trivial_mix_op(type, op, left: cop.arguments[2], right: cop.arguments[1], lerp: cop.arguments[0]))
5707 {
5708 // Implement as a simple cast down below.
5709 }
5710 else
5711 {
5712 // Implement a ternary and pray the compiler understands it :)
5713 return to_ternary_expression(result_type: type, select: cop.arguments[0], true_value: cop.arguments[1], false_value: cop.arguments[2]);
5714 }
5715 break;
5716 }
5717
5718 case OpVectorShuffle:
5719 {
5720 string expr = type_to_glsl_constructor(type);
5721 expr += "(";
5722
5723 uint32_t left_components = expression_type(id: cop.arguments[0]).vecsize;
5724 string left_arg = to_enclosed_expression(id: cop.arguments[0]);
5725 string right_arg = to_enclosed_expression(id: cop.arguments[1]);
5726
5727 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
5728 {
5729 uint32_t index = cop.arguments[i];
5730 if (index == 0xFFFFFFFF)
5731 {
5732 SPIRConstant c;
5733 c.constant_type = type.parent_type;
5734 assert(type.parent_type != ID(0));
5735 expr += constant_expression(c);
5736 }
5737 else if (index >= left_components)
5738 {
5739 expr += right_arg + "." + "xyzw"[index - left_components];
5740 }
5741 else
5742 {
5743 expr += left_arg + "." + "xyzw"[index];
5744 }
5745
5746 if (i + 1 < uint32_t(cop.arguments.size()))
5747 expr += ", ";
5748 }
5749
5750 expr += ")";
5751 return expr;
5752 }
5753
5754 case OpCompositeExtract:
5755 {
5756 auto expr = access_chain_internal(base: cop.arguments[0], indices: &cop.arguments[1], count: uint32_t(cop.arguments.size() - 1),
5757 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
5758 return expr;
5759 }
5760
5761 case OpCompositeInsert:
5762 {
5763 SmallVector<ConstantID> new_init = get_composite_constant_ids(const_id: cop.arguments[1]);
5764 uint32_t idx;
5765 uint32_t target_id = cop.self;
5766 uint32_t target_type_id = cop.basetype;
5767 // We have to drill down to the part we want to modify, and create new
5768 // constants for each containing part.
5769 for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
5770 {
5771 uint32_t new_const = ir.increase_bound_by(count: 1);
5772 uint32_t old_const = new_init[cop.arguments[idx]];
5773 new_init[cop.arguments[idx]] = new_const;
5774 set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init);
5775 new_init = get_composite_constant_ids(const_id: old_const);
5776 target_id = new_const;
5777 target_type_id = get_composite_member_type(type_id: target_type_id, member_idx: cop.arguments[idx]);
5778 }
5779 // Now replace the initializer with the one from this instruction.
5780 new_init[cop.arguments[idx]] = cop.arguments[0];
5781 set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init);
5782 SPIRConstant tmp_const(cop.basetype);
5783 fill_composite_constant(constant&: tmp_const, type_id: cop.basetype, initializers: const_composite_insert_ids[cop.self]);
5784 return constant_expression(c: tmp_const);
5785 }
5786
5787 default:
5788 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
5789 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
5790 }
5791
5792 uint32_t bit_width = 0;
5793 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
5794 bit_width = expression_type(id: cop.arguments[0]).width;
5795
5796 SPIRType::BaseType input_type;
5797 bool skip_cast_if_equal_type = opcode_is_sign_invariant(opcode: cop.opcode);
5798
5799 switch (cop.opcode)
5800 {
5801 case OpIEqual:
5802 case OpINotEqual:
5803 input_type = to_signed_basetype(width: bit_width);
5804 break;
5805
5806 case OpSLessThan:
5807 case OpSLessThanEqual:
5808 case OpSGreaterThan:
5809 case OpSGreaterThanEqual:
5810 case OpSMod:
5811 case OpSDiv:
5812 case OpShiftRightArithmetic:
5813 case OpSConvert:
5814 case OpSNegate:
5815 input_type = to_signed_basetype(width: bit_width);
5816 break;
5817
5818 case OpULessThan:
5819 case OpULessThanEqual:
5820 case OpUGreaterThan:
5821 case OpUGreaterThanEqual:
5822 case OpUMod:
5823 case OpUDiv:
5824 case OpShiftRightLogical:
5825 case OpUConvert:
5826 input_type = to_unsigned_basetype(width: bit_width);
5827 break;
5828
5829 default:
5830 input_type = type.basetype;
5831 break;
5832 }
5833
5834#undef GLSL_BOP
5835#undef GLSL_UOP
5836 if (binary)
5837 {
5838 if (cop.arguments.size() < 2)
5839 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5840
5841 string cast_op0;
5842 string cast_op1;
5843 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0: cop.arguments[0],
5844 op1: cop.arguments[1], skip_cast_if_equal_type);
5845
5846 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
5847 {
5848 expected_type.basetype = input_type;
5849 auto expr = bitcast_glsl_op(result_type: type, argument_type: expected_type);
5850 expr += '(';
5851 expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
5852 expr += ')';
5853 return expr;
5854 }
5855 else
5856 return join(ts: "(", ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1, ts: ")");
5857 }
5858 else if (unary)
5859 {
5860 if (cop.arguments.size() < 1)
5861 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5862
5863 // Auto-bitcast to result type as needed.
5864 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
5865 return join(ts: "(", ts&: op, ts: bitcast_glsl(result_type: type, arg: cop.arguments[0]), ts: ")");
5866 }
5867 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
5868 {
5869 if (cop.arguments.size() < 1)
5870 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5871
5872 auto &arg_type = expression_type(id: cop.arguments[0]);
5873 if (arg_type.width < type.width && input_type != arg_type.basetype)
5874 {
5875 auto expected = arg_type;
5876 expected.basetype = input_type;
5877 return join(ts&: op, ts: "(", ts: bitcast_glsl(result_type: expected, arg: cop.arguments[0]), ts: ")");
5878 }
5879 else
5880 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5881 }
5882 else
5883 {
5884 if (cop.arguments.size() < 1)
5885 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
5886 return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")");
5887 }
5888}
5889
5890string CompilerGLSL::constant_expression(const SPIRConstant &c,
5891 bool inside_block_like_struct_scope,
5892 bool inside_struct_scope)
5893{
5894 auto &type = get<SPIRType>(id: c.constant_type);
5895
5896 if (is_pointer(type))
5897 {
5898 return backend.null_pointer_literal;
5899 }
5900 else if (!c.subconstants.empty())
5901 {
5902 // Handles Arrays and structures.
5903 string res;
5904
5905 // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
5906 // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
5907 // Should look at ArrayStride here as well, but it's possible to declare a constant struct
5908 // with Offset = 0, using no ArrayStride on the enclosed array type.
5909 // A particular CTS test hits this scenario.
5910 bool array_type_decays = inside_block_like_struct_scope &&
5911 is_array(type) &&
5912 !backend.array_is_value_type_in_buffer_blocks;
5913
5914 // Allow Metal to use the array<T> template to make arrays a value type
5915 bool needs_trailing_tracket = false;
5916 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
5917 !is_array(type))
5918 {
5919 res = type_to_glsl_constructor(type) + "{ ";
5920 }
5921 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
5922 is_array(type) && !array_type_decays)
5923 {
5924 const auto *p_type = &type;
5925 SPIRType tmp_type { OpNop };
5926
5927 if (inside_struct_scope &&
5928 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
5929 type.basetype == SPIRType::Boolean)
5930 {
5931 tmp_type = type;
5932 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
5933 p_type = &tmp_type;
5934 }
5935
5936 res = type_to_glsl_constructor(type: *p_type) + "({ ";
5937 needs_trailing_tracket = true;
5938 }
5939 else if (backend.use_initializer_list)
5940 {
5941 res = "{ ";
5942 }
5943 else
5944 {
5945 res = type_to_glsl_constructor(type) + "(";
5946 }
5947
5948 uint32_t subconstant_index = 0;
5949 for (auto &elem : c.subconstants)
5950 {
5951 if (auto *op = maybe_get<SPIRConstantOp>(id: elem))
5952 {
5953 res += constant_op_expression(cop: *op);
5954 }
5955 else if (maybe_get<SPIRUndef>(id: elem) != nullptr)
5956 {
5957 res += to_name(id: elem);
5958 }
5959 else
5960 {
5961 auto &subc = get<SPIRConstant>(id: elem);
5962 if (subc.specialization && !expression_is_forwarded(id: elem))
5963 res += to_name(id: elem);
5964 else
5965 {
5966 if (!is_array(type) && type.basetype == SPIRType::Struct)
5967 {
5968 // When we get down to emitting struct members, override the block-like information.
5969 // For constants, we can freely mix and match block-like state.
5970 inside_block_like_struct_scope =
5971 has_member_decoration(id: type.self, index: subconstant_index, decoration: DecorationOffset);
5972 }
5973
5974 if (type.basetype == SPIRType::Struct)
5975 inside_struct_scope = true;
5976
5977 res += constant_expression(c: subc, inside_block_like_struct_scope, inside_struct_scope);
5978 }
5979 }
5980
5981 if (&elem != &c.subconstants.back())
5982 res += ", ";
5983
5984 subconstant_index++;
5985 }
5986
5987 res += backend.use_initializer_list ? " }" : ")";
5988 if (needs_trailing_tracket)
5989 res += ")";
5990
5991 return res;
5992 }
5993 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
5994 {
5995 // Metal tessellation likes empty structs which are then constant expressions.
5996 if (backend.supports_empty_struct)
5997 return "{ }";
5998 else if (backend.use_typed_initializer_list)
5999 return join(ts: type_to_glsl(type), ts: "{ 0 }");
6000 else if (backend.use_initializer_list)
6001 return "{ 0 }";
6002 else
6003 return join(ts: type_to_glsl(type), ts: "(0)");
6004 }
6005 else if (c.columns() == 1)
6006 {
6007 auto res = constant_expression_vector(c, vector: 0);
6008
6009 if (inside_struct_scope &&
6010 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
6011 type.basetype == SPIRType::Boolean)
6012 {
6013 SPIRType tmp_type = type;
6014 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
6015 res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")");
6016 }
6017
6018 return res;
6019 }
6020 else
6021 {
6022 string res = type_to_glsl(type) + "(";
6023 for (uint32_t col = 0; col < c.columns(); col++)
6024 {
6025 if (c.specialization_constant_id(col) != 0)
6026 res += to_name(id: c.specialization_constant_id(col));
6027 else
6028 res += constant_expression_vector(c, vector: col);
6029
6030 if (col + 1 < c.columns())
6031 res += ", ";
6032 }
6033 res += ")";
6034
6035 if (inside_struct_scope &&
6036 backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
6037 type.basetype == SPIRType::Boolean)
6038 {
6039 SPIRType tmp_type = type;
6040 tmp_type.basetype = backend.boolean_in_struct_remapped_type;
6041 res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")");
6042 }
6043
6044 return res;
6045 }
6046}
6047
6048#ifdef _MSC_VER
6049// snprintf does not exist or is buggy on older MSVC versions, some of them
6050// being used by MinGW. Use sprintf instead and disable corresponding warning.
6051#pragma warning(push)
6052#pragma warning(disable : 4996)
6053#endif
6054
6055string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6056{
6057 string res;
6058 float float_value = c.scalar_f16(col, row);
6059
6060 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
6061 // of complicated workarounds, just value-cast to the half type always.
6062 if (std::isnan(x: float_value) || std::isinf(x: float_value))
6063 {
6064 SPIRType type { OpTypeFloat };
6065 type.basetype = SPIRType::Half;
6066 type.vecsize = 1;
6067 type.columns = 1;
6068
6069 if (float_value == numeric_limits<float>::infinity())
6070 res = join(ts: type_to_glsl(type), ts: "(1.0 / 0.0)");
6071 else if (float_value == -numeric_limits<float>::infinity())
6072 res = join(ts: type_to_glsl(type), ts: "(-1.0 / 0.0)");
6073 else if (std::isnan(x: float_value))
6074 res = join(ts: type_to_glsl(type), ts: "(0.0 / 0.0)");
6075 else
6076 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6077 }
6078 else
6079 {
6080 SPIRType type { OpTypeFloat };
6081 type.basetype = SPIRType::Half;
6082 type.vecsize = 1;
6083 type.columns = 1;
6084 res = join(ts: type_to_glsl(type), ts: "(", ts: format_float(value: float_value), ts: ")");
6085 }
6086
6087 return res;
6088}
6089
6090string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6091{
6092 string res;
6093 float float_value = c.scalar_f32(col, row);
6094
6095 if (std::isnan(x: float_value) || std::isinf(x: float_value))
6096 {
6097 // Use special representation.
6098 if (!is_legacy())
6099 {
6100 SPIRType out_type { OpTypeFloat };
6101 SPIRType in_type { OpTypeInt };
6102 out_type.basetype = SPIRType::Float;
6103 in_type.basetype = SPIRType::UInt;
6104 out_type.vecsize = 1;
6105 in_type.vecsize = 1;
6106 out_type.width = 32;
6107 in_type.width = 32;
6108
6109 char print_buffer[32];
6110#ifdef _WIN32
6111 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
6112#else
6113 snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%xu", c.scalar(col, row));
6114#endif
6115
6116 const char *comment = "inf";
6117 if (float_value == -numeric_limits<float>::infinity())
6118 comment = "-inf";
6119 else if (std::isnan(x: float_value))
6120 comment = "nan";
6121 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
6122 }
6123 else
6124 {
6125 if (float_value == numeric_limits<float>::infinity())
6126 {
6127 if (backend.float_literal_suffix)
6128 res = "(1.0f / 0.0f)";
6129 else
6130 res = "(1.0 / 0.0)";
6131 }
6132 else if (float_value == -numeric_limits<float>::infinity())
6133 {
6134 if (backend.float_literal_suffix)
6135 res = "(-1.0f / 0.0f)";
6136 else
6137 res = "(-1.0 / 0.0)";
6138 }
6139 else if (std::isnan(x: float_value))
6140 {
6141 if (backend.float_literal_suffix)
6142 res = "(0.0f / 0.0f)";
6143 else
6144 res = "(0.0 / 0.0)";
6145 }
6146 else
6147 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6148 }
6149 }
6150 else
6151 {
6152 res = format_float(value: float_value);
6153 if (backend.float_literal_suffix)
6154 res += "f";
6155 }
6156
6157 return res;
6158}
6159
6160std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
6161{
6162 string res;
6163 double double_value = c.scalar_f64(col, row);
6164
6165 if (std::isnan(x: double_value) || std::isinf(x: double_value))
6166 {
6167 // Use special representation.
6168 if (!is_legacy())
6169 {
6170 SPIRType out_type { OpTypeFloat };
6171 SPIRType in_type { OpTypeInt };
6172 out_type.basetype = SPIRType::Double;
6173 in_type.basetype = SPIRType::UInt64;
6174 out_type.vecsize = 1;
6175 in_type.vecsize = 1;
6176 out_type.width = 64;
6177 in_type.width = 64;
6178
6179 uint64_t u64_value = c.scalar_u64(col, row);
6180
6181 if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
6182 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
6183 require_extension_internal(ext: "GL_ARB_gpu_shader_int64");
6184
6185 char print_buffer[64];
6186#ifdef _WIN32
6187 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
6188 backend.long_long_literal_suffix ? "ull" : "ul");
6189#else
6190 snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%llx%s", static_cast<unsigned long long>(u64_value),
6191 backend.long_long_literal_suffix ? "ull" : "ul");
6192#endif
6193
6194 const char *comment = "inf";
6195 if (double_value == -numeric_limits<double>::infinity())
6196 comment = "-inf";
6197 else if (std::isnan(x: double_value))
6198 comment = "nan";
6199 res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)");
6200 }
6201 else
6202 {
6203 if (options.es)
6204 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
6205 if (options.version < 400)
6206 require_extension_internal(ext: "GL_ARB_gpu_shader_fp64");
6207
6208 if (double_value == numeric_limits<double>::infinity())
6209 {
6210 if (backend.double_literal_suffix)
6211 res = "(1.0lf / 0.0lf)";
6212 else
6213 res = "(1.0 / 0.0)";
6214 }
6215 else if (double_value == -numeric_limits<double>::infinity())
6216 {
6217 if (backend.double_literal_suffix)
6218 res = "(-1.0lf / 0.0lf)";
6219 else
6220 res = "(-1.0 / 0.0)";
6221 }
6222 else if (std::isnan(x: double_value))
6223 {
6224 if (backend.double_literal_suffix)
6225 res = "(0.0lf / 0.0lf)";
6226 else
6227 res = "(0.0 / 0.0)";
6228 }
6229 else
6230 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
6231 }
6232 }
6233 else
6234 {
6235 res = format_double(value: double_value);
6236 if (backend.double_literal_suffix)
6237 res += "lf";
6238 }
6239
6240 return res;
6241}
6242
6243#ifdef _MSC_VER
6244#pragma warning(pop)
6245#endif
6246
6247string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
6248{
6249 auto type = get<SPIRType>(id: c.constant_type);
6250 type.columns = 1;
6251
6252 auto scalar_type = type;
6253 scalar_type.vecsize = 1;
6254
6255 string res;
6256 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
6257 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
6258
6259 if (!type_is_floating_point(type))
6260 {
6261 // Cannot swizzle literal integers as a special case.
6262 swizzle_splat = false;
6263 }
6264
6265 if (splat || swizzle_splat)
6266 {
6267 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
6268 for (uint32_t i = 0; i < c.vector_size(); i++)
6269 {
6270 if (c.specialization_constant_id(col: vector, row: i) != 0)
6271 {
6272 splat = false;
6273 swizzle_splat = false;
6274 break;
6275 }
6276 }
6277 }
6278
6279 if (splat || swizzle_splat)
6280 {
6281 if (type.width == 64)
6282 {
6283 uint64_t ident = c.scalar_u64(col: vector, row: 0);
6284 for (uint32_t i = 1; i < c.vector_size(); i++)
6285 {
6286 if (ident != c.scalar_u64(col: vector, row: i))
6287 {
6288 splat = false;
6289 swizzle_splat = false;
6290 break;
6291 }
6292 }
6293 }
6294 else
6295 {
6296 uint32_t ident = c.scalar(col: vector, row: 0);
6297 for (uint32_t i = 1; i < c.vector_size(); i++)
6298 {
6299 if (ident != c.scalar(col: vector, row: i))
6300 {
6301 splat = false;
6302 swizzle_splat = false;
6303 }
6304 }
6305 }
6306 }
6307
6308 if (c.vector_size() > 1 && !swizzle_splat)
6309 res += type_to_glsl(type) + "(";
6310
6311 switch (type.basetype)
6312 {
6313 case SPIRType::Half:
6314 if (splat || swizzle_splat)
6315 {
6316 res += convert_half_to_string(c, col: vector, row: 0);
6317 if (swizzle_splat)
6318 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6319 }
6320 else
6321 {
6322 for (uint32_t i = 0; i < c.vector_size(); i++)
6323 {
6324 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6325 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6326 else
6327 res += convert_half_to_string(c, col: vector, row: i);
6328
6329 if (i + 1 < c.vector_size())
6330 res += ", ";
6331 }
6332 }
6333 break;
6334
6335 case SPIRType::Float:
6336 if (splat || swizzle_splat)
6337 {
6338 res += convert_float_to_string(c, col: vector, row: 0);
6339 if (swizzle_splat)
6340 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6341 }
6342 else
6343 {
6344 for (uint32_t i = 0; i < c.vector_size(); i++)
6345 {
6346 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6347 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6348 else
6349 res += convert_float_to_string(c, col: vector, row: i);
6350
6351 if (i + 1 < c.vector_size())
6352 res += ", ";
6353 }
6354 }
6355 break;
6356
6357 case SPIRType::Double:
6358 if (splat || swizzle_splat)
6359 {
6360 res += convert_double_to_string(c, col: vector, row: 0);
6361 if (swizzle_splat)
6362 res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res);
6363 }
6364 else
6365 {
6366 for (uint32_t i = 0; i < c.vector_size(); i++)
6367 {
6368 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6369 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6370 else
6371 res += convert_double_to_string(c, col: vector, row: i);
6372
6373 if (i + 1 < c.vector_size())
6374 res += ", ";
6375 }
6376 }
6377 break;
6378
6379 case SPIRType::Int64:
6380 {
6381 auto tmp = type;
6382 tmp.vecsize = 1;
6383 tmp.columns = 1;
6384 auto int64_type = type_to_glsl(type: tmp);
6385
6386 if (splat)
6387 {
6388 res += convert_to_string(value: c.scalar_i64(col: vector, row: 0), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
6389 }
6390 else
6391 {
6392 for (uint32_t i = 0; i < c.vector_size(); i++)
6393 {
6394 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6395 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6396 else
6397 res += convert_to_string(value: c.scalar_i64(col: vector, row: i), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix);
6398
6399 if (i + 1 < c.vector_size())
6400 res += ", ";
6401 }
6402 }
6403 break;
6404 }
6405
6406 case SPIRType::UInt64:
6407 if (splat)
6408 {
6409 res += convert_to_string(t: c.scalar_u64(col: vector, row: 0));
6410 if (backend.long_long_literal_suffix)
6411 res += "ull";
6412 else
6413 res += "ul";
6414 }
6415 else
6416 {
6417 for (uint32_t i = 0; i < c.vector_size(); i++)
6418 {
6419 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6420 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6421 else
6422 {
6423 res += convert_to_string(t: c.scalar_u64(col: vector, row: i));
6424 if (backend.long_long_literal_suffix)
6425 res += "ull";
6426 else
6427 res += "ul";
6428 }
6429
6430 if (i + 1 < c.vector_size())
6431 res += ", ";
6432 }
6433 }
6434 break;
6435
6436 case SPIRType::UInt:
6437 if (splat)
6438 {
6439 res += convert_to_string(t: c.scalar(col: vector, row: 0));
6440 if (is_legacy())
6441 {
6442 // Fake unsigned constant literals with signed ones if possible.
6443 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
6444 if (c.scalar_i32(col: vector, row: 0) < 0)
6445 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
6446 }
6447 else if (backend.uint32_t_literal_suffix)
6448 res += "u";
6449 }
6450 else
6451 {
6452 for (uint32_t i = 0; i < c.vector_size(); i++)
6453 {
6454 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6455 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6456 else
6457 {
6458 res += convert_to_string(t: c.scalar(col: vector, row: i));
6459 if (is_legacy())
6460 {
6461 // Fake unsigned constant literals with signed ones if possible.
6462 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
6463 if (c.scalar_i32(col: vector, row: i) < 0)
6464 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
6465 "the literal negative.");
6466 }
6467 else if (backend.uint32_t_literal_suffix)
6468 res += "u";
6469 }
6470
6471 if (i + 1 < c.vector_size())
6472 res += ", ";
6473 }
6474 }
6475 break;
6476
6477 case SPIRType::Int:
6478 if (splat)
6479 res += convert_to_string(value: c.scalar_i32(col: vector, row: 0));
6480 else
6481 {
6482 for (uint32_t i = 0; i < c.vector_size(); i++)
6483 {
6484 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6485 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6486 else
6487 res += convert_to_string(value: c.scalar_i32(col: vector, row: i));
6488 if (i + 1 < c.vector_size())
6489 res += ", ";
6490 }
6491 }
6492 break;
6493
6494 case SPIRType::UShort:
6495 if (splat)
6496 {
6497 res += convert_to_string(t: c.scalar(col: vector, row: 0));
6498 }
6499 else
6500 {
6501 for (uint32_t i = 0; i < c.vector_size(); i++)
6502 {
6503 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6504 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6505 else
6506 {
6507 if (*backend.uint16_t_literal_suffix)
6508 {
6509 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
6510 res += backend.uint16_t_literal_suffix;
6511 }
6512 else
6513 {
6514 // If backend doesn't have a literal suffix, we need to value cast.
6515 res += type_to_glsl(type: scalar_type);
6516 res += "(";
6517 res += convert_to_string(t: c.scalar_u16(col: vector, row: i));
6518 res += ")";
6519 }
6520 }
6521
6522 if (i + 1 < c.vector_size())
6523 res += ", ";
6524 }
6525 }
6526 break;
6527
6528 case SPIRType::Short:
6529 if (splat)
6530 {
6531 res += convert_to_string(t: c.scalar_i16(col: vector, row: 0));
6532 }
6533 else
6534 {
6535 for (uint32_t i = 0; i < c.vector_size(); i++)
6536 {
6537 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6538 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6539 else
6540 {
6541 if (*backend.int16_t_literal_suffix)
6542 {
6543 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
6544 res += backend.int16_t_literal_suffix;
6545 }
6546 else
6547 {
6548 // If backend doesn't have a literal suffix, we need to value cast.
6549 res += type_to_glsl(type: scalar_type);
6550 res += "(";
6551 res += convert_to_string(t: c.scalar_i16(col: vector, row: i));
6552 res += ")";
6553 }
6554 }
6555
6556 if (i + 1 < c.vector_size())
6557 res += ", ";
6558 }
6559 }
6560 break;
6561
6562 case SPIRType::UByte:
6563 if (splat)
6564 {
6565 res += convert_to_string(t: c.scalar_u8(col: vector, row: 0));
6566 }
6567 else
6568 {
6569 for (uint32_t i = 0; i < c.vector_size(); i++)
6570 {
6571 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6572 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6573 else
6574 {
6575 res += type_to_glsl(type: scalar_type);
6576 res += "(";
6577 res += convert_to_string(t: c.scalar_u8(col: vector, row: i));
6578 res += ")";
6579 }
6580
6581 if (i + 1 < c.vector_size())
6582 res += ", ";
6583 }
6584 }
6585 break;
6586
6587 case SPIRType::SByte:
6588 if (splat)
6589 {
6590 res += convert_to_string(t: c.scalar_i8(col: vector, row: 0));
6591 }
6592 else
6593 {
6594 for (uint32_t i = 0; i < c.vector_size(); i++)
6595 {
6596 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6597 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6598 else
6599 {
6600 res += type_to_glsl(type: scalar_type);
6601 res += "(";
6602 res += convert_to_string(t: c.scalar_i8(col: vector, row: i));
6603 res += ")";
6604 }
6605
6606 if (i + 1 < c.vector_size())
6607 res += ", ";
6608 }
6609 }
6610 break;
6611
6612 case SPIRType::Boolean:
6613 if (splat)
6614 res += c.scalar(col: vector, row: 0) ? "true" : "false";
6615 else
6616 {
6617 for (uint32_t i = 0; i < c.vector_size(); i++)
6618 {
6619 if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0)
6620 res += to_expression(id: c.specialization_constant_id(col: vector, row: i));
6621 else
6622 res += c.scalar(col: vector, row: i) ? "true" : "false";
6623
6624 if (i + 1 < c.vector_size())
6625 res += ", ";
6626 }
6627 }
6628 break;
6629
6630 default:
6631 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
6632 }
6633
6634 if (c.vector_size() > 1 && !swizzle_splat)
6635 res += ")";
6636
6637 return res;
6638}
6639
6640SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
6641{
6642 forced_temporaries.insert(x: id);
6643 emit_uninitialized_temporary(type, id);
6644 return set<SPIRExpression>(id, args: to_name(id), args&: type, args: true);
6645}
6646
6647void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
6648{
6649 // If we're declaring temporaries inside continue blocks,
6650 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
6651 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
6652 {
6653 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
6654 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
6655 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
6656 return tmp.first == result_type && tmp.second == result_id;
6657 }) == end(cont&: header.declare_temporary))
6658 {
6659 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
6660 hoisted_temporaries.insert(x: result_id);
6661 force_recompile();
6662 }
6663 }
6664 else if (hoisted_temporaries.count(x: result_id) == 0)
6665 {
6666 auto &type = get<SPIRType>(id: result_type);
6667 auto &flags = get_decoration_bitset(id: result_id);
6668
6669 // The result_id has not been made into an expression yet, so use flags interface.
6670 add_local_variable_name(id: result_id);
6671
6672 string initializer;
6673 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
6674 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: result_type));
6675
6676 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts&: initializer, ts: ";");
6677 }
6678}
6679
6680string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
6681{
6682 auto &type = get<SPIRType>(id: result_type);
6683
6684 // If we're declaring temporaries inside continue blocks,
6685 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
6686 if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id))
6687 {
6688 auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator);
6689 if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary),
6690 pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
6691 return tmp.first == result_type && tmp.second == result_id;
6692 }) == end(cont&: header.declare_temporary))
6693 {
6694 header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id);
6695 hoisted_temporaries.insert(x: result_id);
6696 force_recompile_guarantee_forward_progress();
6697 }
6698
6699 return join(ts: to_name(id: result_id), ts: " = ");
6700 }
6701 else if (hoisted_temporaries.count(x: result_id))
6702 {
6703 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
6704 return join(ts: to_name(id: result_id), ts: " = ");
6705 }
6706 else
6707 {
6708 // The result_id has not been made into an expression yet, so use flags interface.
6709 add_local_variable_name(id: result_id);
6710 auto &flags = get_decoration_bitset(id: result_id);
6711 return join(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts: " = ");
6712 }
6713}
6714
6715bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
6716{
6717 return forwarded_temporaries.count(x: id) != 0;
6718}
6719
6720bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
6721{
6722 return suppressed_usage_tracking.count(x: id) != 0;
6723}
6724
6725bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
6726{
6727 auto *expr = maybe_get<SPIRExpression>(id);
6728 if (!expr)
6729 return false;
6730
6731 // If we're emitting code at a deeper loop level than when we emitted the expression,
6732 // we're probably reading the same expression over and over.
6733 return current_loop_level > expr->emitted_loop_level;
6734}
6735
6736SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
6737 bool suppress_usage_tracking)
6738{
6739 if (forwarding && (forced_temporaries.find(x: result_id) == end(cont&: forced_temporaries)))
6740 {
6741 // Just forward it without temporary.
6742 // If the forward is trivial, we do not force flushing to temporary for this expression.
6743 forwarded_temporaries.insert(x: result_id);
6744 if (suppress_usage_tracking)
6745 suppressed_usage_tracking.insert(x: result_id);
6746
6747 return set<SPIRExpression>(id: result_id, args: rhs, args&: result_type, args: true);
6748 }
6749 else
6750 {
6751 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
6752 statement(ts: declare_temporary(result_type, result_id), ts: rhs, ts: ";");
6753 return set<SPIRExpression>(id: result_id, args: to_name(id: result_id), args&: result_type, args: true);
6754 }
6755}
6756
6757void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6758{
6759 bool forward = should_forward(id: op0);
6760 emit_op(result_type, result_id, rhs: join(ts&: op, ts: to_enclosed_unpacked_expression(id: op0)), forwarding: forward);
6761 inherit_expression_dependencies(dst: result_id, source: op0);
6762}
6763
6764void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6765{
6766 auto &type = get<SPIRType>(id: result_type);
6767 bool forward = should_forward(id: op0);
6768 emit_op(result_type, result_id, rhs: join(ts: type_to_glsl(type), ts: "(", ts&: op, ts: to_enclosed_unpacked_expression(id: op0), ts: ")"), forwarding: forward);
6769 inherit_expression_dependencies(dst: result_id, source: op0);
6770}
6771
6772void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block)
6773{
6774 statement(ts: "EmitMeshTasksEXT(",
6775 ts: to_unpacked_expression(id: block.mesh.groups[0]), ts: ", ",
6776 ts: to_unpacked_expression(id: block.mesh.groups[1]), ts: ", ",
6777 ts: to_unpacked_expression(id: block.mesh.groups[2]), ts: ");");
6778}
6779
6780void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
6781{
6782 // Various FP arithmetic opcodes such as add, sub, mul will hit this.
6783 bool force_temporary_precise = backend.support_precise_qualifier &&
6784 has_decoration(id: result_id, decoration: DecorationNoContraction) &&
6785 type_is_floating_point(type: get<SPIRType>(id: result_type));
6786 bool forward = should_forward(id: op0) && should_forward(id: op1) && !force_temporary_precise;
6787
6788 emit_op(result_type, result_id,
6789 rhs: join(ts: to_enclosed_unpacked_expression(id: op0), ts: " ", ts&: op, ts: " ", ts: to_enclosed_unpacked_expression(id: op1)), forwarding: forward);
6790
6791 inherit_expression_dependencies(dst: result_id, source: op0);
6792 inherit_expression_dependencies(dst: result_id, source: op1);
6793}
6794
6795void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
6796{
6797 auto &type = get<SPIRType>(id: result_type);
6798 auto expr = type_to_glsl_constructor(type);
6799 expr += '(';
6800 for (uint32_t i = 0; i < type.vecsize; i++)
6801 {
6802 // Make sure to call to_expression multiple times to ensure
6803 // that these expressions are properly flushed to temporaries if needed.
6804 expr += op;
6805 expr += to_extract_component_expression(id: operand, index: i);
6806
6807 if (i + 1 < type.vecsize)
6808 expr += ", ";
6809 }
6810 expr += ')';
6811 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand));
6812
6813 inherit_expression_dependencies(dst: result_id, source: operand);
6814}
6815
6816void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6817 const char *op, bool negate, SPIRType::BaseType expected_type)
6818{
6819 auto &type0 = expression_type(id: op0);
6820 auto &type1 = expression_type(id: op1);
6821
6822 SPIRType target_type0 = type0;
6823 SPIRType target_type1 = type1;
6824 target_type0.basetype = expected_type;
6825 target_type1.basetype = expected_type;
6826 target_type0.vecsize = 1;
6827 target_type1.vecsize = 1;
6828
6829 auto &type = get<SPIRType>(id: result_type);
6830 auto expr = type_to_glsl_constructor(type);
6831 expr += '(';
6832 for (uint32_t i = 0; i < type.vecsize; i++)
6833 {
6834 // Make sure to call to_expression multiple times to ensure
6835 // that these expressions are properly flushed to temporaries if needed.
6836 if (negate)
6837 expr += "!(";
6838
6839 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
6840 expr += bitcast_expression(target_type: target_type0, expr_type: type0.basetype, expr: to_extract_component_expression(id: op0, index: i));
6841 else
6842 expr += to_extract_component_expression(id: op0, index: i);
6843
6844 expr += ' ';
6845 expr += op;
6846 expr += ' ';
6847
6848 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
6849 expr += bitcast_expression(target_type: target_type1, expr_type: type1.basetype, expr: to_extract_component_expression(id: op1, index: i));
6850 else
6851 expr += to_extract_component_expression(id: op1, index: i);
6852
6853 if (negate)
6854 expr += ")";
6855
6856 if (i + 1 < type.vecsize)
6857 expr += ", ";
6858 }
6859 expr += ')';
6860 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6861
6862 inherit_expression_dependencies(dst: result_id, source: op0);
6863 inherit_expression_dependencies(dst: result_id, source: op1);
6864}
6865
6866SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
6867 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
6868{
6869 auto &type0 = expression_type(id: op0);
6870 auto &type1 = expression_type(id: op1);
6871
6872 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
6873 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
6874 // since equality test is exactly the same.
6875 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
6876
6877 // Create a fake type so we can bitcast to it.
6878 // We only deal with regular arithmetic types here like int, uints and so on.
6879 SPIRType expected_type{type0.op};
6880 expected_type.basetype = input_type;
6881 expected_type.vecsize = type0.vecsize;
6882 expected_type.columns = type0.columns;
6883 expected_type.width = type0.width;
6884
6885 if (cast)
6886 {
6887 cast_op0 = bitcast_glsl(result_type: expected_type, arg: op0);
6888 cast_op1 = bitcast_glsl(result_type: expected_type, arg: op1);
6889 }
6890 else
6891 {
6892 // If we don't cast, our actual input type is that of the first (or second) argument.
6893 cast_op0 = to_enclosed_unpacked_expression(id: op0);
6894 cast_op1 = to_enclosed_unpacked_expression(id: op1);
6895 input_type = type0.basetype;
6896 }
6897
6898 return expected_type;
6899}
6900
6901bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
6902{
6903 // Some bitcasts may require complex casting sequences, and are implemented here.
6904 // Otherwise a simply unary function will do with bitcast_glsl_op.
6905
6906 auto &output_type = get<SPIRType>(id: result_type);
6907 auto &input_type = expression_type(id: op0);
6908 string expr;
6909
6910 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
6911 expr = join(ts: "unpackFloat2x16(floatBitsToUint(", ts: to_unpacked_expression(id: op0), ts: "))");
6912 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
6913 input_type.vecsize == 2)
6914 expr = join(ts: "uintBitsToFloat(packFloat2x16(", ts: to_unpacked_expression(id: op0), ts: "))");
6915 else
6916 return false;
6917
6918 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: op0));
6919 return true;
6920}
6921
6922void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6923 const char *op, SPIRType::BaseType input_type,
6924 bool skip_cast_if_equal_type,
6925 bool implicit_integer_promotion)
6926{
6927 string cast_op0, cast_op1;
6928 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6929 auto &out_type = get<SPIRType>(id: result_type);
6930
6931 // We might have casted away from the result type, so bitcast again.
6932 // For example, arithmetic right shift with uint inputs.
6933 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6934 auto bitop = join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1);
6935 string expr;
6936
6937 if (implicit_integer_promotion)
6938 {
6939 // Simple value cast.
6940 expr = join(ts: type_to_glsl(type: out_type), ts: '(', ts&: bitop, ts: ')');
6941 }
6942 else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6943 {
6944 expected_type.basetype = input_type;
6945 expr = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: expected_type), ts: '(', ts&: bitop, ts: ')');
6946 }
6947 else
6948 {
6949 expr = std::move(bitop);
6950 }
6951
6952 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
6953 inherit_expression_dependencies(dst: result_id, source: op0);
6954 inherit_expression_dependencies(dst: result_id, source: op1);
6955}
6956
6957void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
6958{
6959 bool forward = should_forward(id: op0);
6960 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ")"), forwarding: forward);
6961 inherit_expression_dependencies(dst: result_id, source: op0);
6962}
6963
6964void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6965 const char *op)
6966{
6967 // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL
6968 const auto &type = get_type(id: result_type);
6969 bool must_forward = type_is_opaque_value(type);
6970 bool forward = must_forward || (should_forward(id: op0) && should_forward(id: op1));
6971 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ")"),
6972 forwarding: forward);
6973 inherit_expression_dependencies(dst: result_id, source: op0);
6974 inherit_expression_dependencies(dst: result_id, source: op1);
6975}
6976
6977void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6978 const char *op)
6979{
6980 auto &type = get<SPIRType>(id: result_type);
6981 if (type_is_floating_point(type))
6982 {
6983 if (!options.vulkan_semantics)
6984 SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
6985 if (options.es)
6986 SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
6987 require_extension_internal(ext: "GL_EXT_shader_atomic_float");
6988 }
6989
6990 forced_temporaries.insert(x: result_id);
6991 emit_op(result_type, result_id,
6992 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
6993 ts: to_unpacked_expression(id: op1), ts: ")"), forwarding: false);
6994 flush_all_atomic_capable_variables();
6995}
6996
6997void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
6998 uint32_t op0, uint32_t op1, uint32_t op2,
6999 const char *op)
7000{
7001 forced_temporaries.insert(x: result_id);
7002 emit_op(result_type, result_id,
7003 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ",
7004 ts: to_unpacked_expression(id: op1), ts: ", ", ts: to_unpacked_expression(id: op2), ts: ")"), forwarding: false);
7005 flush_all_atomic_capable_variables();
7006}
7007
7008void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
7009 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
7010{
7011 auto &out_type = get<SPIRType>(id: result_type);
7012 auto &expr_type = expression_type(id: op0);
7013 auto expected_type = out_type;
7014
7015 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
7016 expected_type.basetype = input_type;
7017 expected_type.width = expr_type.width;
7018
7019 string cast_op;
7020 if (expr_type.basetype != input_type)
7021 {
7022 if (expr_type.basetype == SPIRType::Boolean)
7023 cast_op = join(ts: type_to_glsl(type: expected_type), ts: "(", ts: to_unpacked_expression(id: op0), ts: ")");
7024 else
7025 cast_op = bitcast_glsl(result_type: expected_type, arg: op0);
7026 }
7027 else
7028 cast_op = to_unpacked_expression(id: op0);
7029
7030 string expr;
7031 if (out_type.basetype != expected_result_type)
7032 {
7033 expected_type.basetype = expected_result_type;
7034 expected_type.width = out_type.width;
7035 if (out_type.basetype == SPIRType::Boolean)
7036 expr = type_to_glsl(type: out_type);
7037 else
7038 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7039 expr += '(';
7040 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
7041 expr += ')';
7042 }
7043 else
7044 {
7045 expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")");
7046 }
7047
7048 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
7049 inherit_expression_dependencies(dst: result_id, source: op0);
7050}
7051
7052// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
7053// and different vector sizes all at once. Need a special purpose method here.
7054void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7055 uint32_t op2, const char *op,
7056 SPIRType::BaseType expected_result_type,
7057 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
7058 SPIRType::BaseType input_type2)
7059{
7060 auto &out_type = get<SPIRType>(id: result_type);
7061 auto expected_type = out_type;
7062 expected_type.basetype = input_type0;
7063
7064 string cast_op0 =
7065 expression_type(id: op0).basetype != input_type0 ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7066
7067 auto op1_expr = to_unpacked_expression(id: op1);
7068 auto op2_expr = to_unpacked_expression(id: op2);
7069
7070 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
7071 expected_type.basetype = input_type1;
7072 expected_type.vecsize = 1;
7073 string cast_op1 = expression_type(id: op1).basetype != input_type1 ?
7074 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op1_expr, ts: ")") :
7075 op1_expr;
7076
7077 expected_type.basetype = input_type2;
7078 expected_type.vecsize = 1;
7079 string cast_op2 = expression_type(id: op2).basetype != input_type2 ?
7080 join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op2_expr, ts: ")") :
7081 op2_expr;
7082
7083 string expr;
7084 if (out_type.basetype != expected_result_type)
7085 {
7086 expected_type.vecsize = out_type.vecsize;
7087 expected_type.basetype = expected_result_type;
7088 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7089 expr += '(';
7090 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7091 expr += ')';
7092 }
7093 else
7094 {
7095 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7096 }
7097
7098 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
7099 inherit_expression_dependencies(dst: result_id, source: op0);
7100 inherit_expression_dependencies(dst: result_id, source: op1);
7101 inherit_expression_dependencies(dst: result_id, source: op2);
7102}
7103
7104void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7105 uint32_t op2, const char *op, SPIRType::BaseType input_type)
7106{
7107 auto &out_type = get<SPIRType>(id: result_type);
7108 auto expected_type = out_type;
7109 expected_type.basetype = input_type;
7110 string cast_op0 =
7111 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7112 string cast_op1 =
7113 expression_type(id: op1).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op1) : to_unpacked_expression(id: op1);
7114 string cast_op2 =
7115 expression_type(id: op2).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op2) : to_unpacked_expression(id: op2);
7116
7117 string expr;
7118 if (out_type.basetype != input_type)
7119 {
7120 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7121 expr += '(';
7122 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7123 expr += ')';
7124 }
7125 else
7126 {
7127 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")");
7128 }
7129
7130 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2));
7131 inherit_expression_dependencies(dst: result_id, source: op0);
7132 inherit_expression_dependencies(dst: result_id, source: op1);
7133 inherit_expression_dependencies(dst: result_id, source: op2);
7134}
7135
7136void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
7137 uint32_t op1, const char *op, SPIRType::BaseType input_type)
7138{
7139 // Special purpose method for implementing clustered subgroup opcodes.
7140 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
7141 auto &out_type = get<SPIRType>(id: result_type);
7142 auto expected_type = out_type;
7143 expected_type.basetype = input_type;
7144 string cast_op0 =
7145 expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0);
7146
7147 string expr;
7148 if (out_type.basetype != input_type)
7149 {
7150 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7151 expr += '(';
7152 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
7153 expr += ')';
7154 }
7155 else
7156 {
7157 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")");
7158 }
7159
7160 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0));
7161 inherit_expression_dependencies(dst: result_id, source: op0);
7162}
7163
7164void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7165 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
7166{
7167 string cast_op0, cast_op1;
7168 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
7169 auto &out_type = get<SPIRType>(id: result_type);
7170
7171 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
7172 string expr;
7173 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
7174 {
7175 expected_type.basetype = input_type;
7176 expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type);
7177 expr += '(';
7178 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
7179 expr += ')';
7180 }
7181 else
7182 {
7183 expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")");
7184 }
7185
7186 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
7187 inherit_expression_dependencies(dst: result_id, source: op0);
7188 inherit_expression_dependencies(dst: result_id, source: op1);
7189}
7190
7191void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7192 uint32_t op2, const char *op)
7193{
7194 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2);
7195 emit_op(result_type, result_id,
7196 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
7197 ts: to_unpacked_expression(id: op2), ts: ")"),
7198 forwarding: forward);
7199
7200 inherit_expression_dependencies(dst: result_id, source: op0);
7201 inherit_expression_dependencies(dst: result_id, source: op1);
7202 inherit_expression_dependencies(dst: result_id, source: op2);
7203}
7204
7205void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7206 uint32_t op2, uint32_t op3, const char *op)
7207{
7208 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
7209 emit_op(result_type, result_id,
7210 rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ",
7211 ts: to_unpacked_expression(id: op2), ts: ", ", ts: to_unpacked_expression(id: op3), ts: ")"),
7212 forwarding: forward);
7213
7214 inherit_expression_dependencies(dst: result_id, source: op0);
7215 inherit_expression_dependencies(dst: result_id, source: op1);
7216 inherit_expression_dependencies(dst: result_id, source: op2);
7217 inherit_expression_dependencies(dst: result_id, source: op3);
7218}
7219
7220void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
7221 uint32_t op2, uint32_t op3, const char *op,
7222 SPIRType::BaseType offset_count_type)
7223{
7224 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
7225 // and bitfieldInsert is sign invariant.
7226 bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3);
7227
7228 auto op0_expr = to_unpacked_expression(id: op0);
7229 auto op1_expr = to_unpacked_expression(id: op1);
7230 auto op2_expr = to_unpacked_expression(id: op2);
7231 auto op3_expr = to_unpacked_expression(id: op3);
7232
7233 assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int);
7234 SPIRType target_type { OpTypeInt };
7235 target_type.width = 32;
7236 target_type.vecsize = 1;
7237 target_type.basetype = offset_count_type;
7238
7239 if (expression_type(id: op2).basetype != offset_count_type)
7240 {
7241 // Value-cast here. Input might be 16-bit. GLSL requires int.
7242 op2_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op2_expr, ts: ")");
7243 }
7244
7245 if (expression_type(id: op3).basetype != offset_count_type)
7246 {
7247 // Value-cast here. Input might be 16-bit. GLSL requires int.
7248 op3_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op3_expr, ts: ")");
7249 }
7250
7251 emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts&: op0_expr, ts: ", ", ts&: op1_expr, ts: ", ", ts&: op2_expr, ts: ", ", ts&: op3_expr, ts: ")"),
7252 forwarding: forward);
7253
7254 inherit_expression_dependencies(dst: result_id, source: op0);
7255 inherit_expression_dependencies(dst: result_id, source: op1);
7256 inherit_expression_dependencies(dst: result_id, source: op2);
7257 inherit_expression_dependencies(dst: result_id, source: op3);
7258}
7259
7260string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
7261{
7262 const char *type;
7263 switch (imgtype.image.dim)
7264 {
7265 case spv::Dim1D:
7266 // Force 2D path for ES.
7267 if (options.es)
7268 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
7269 else
7270 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
7271 break;
7272 case spv::Dim2D:
7273 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
7274 break;
7275 case spv::Dim3D:
7276 type = "3D";
7277 break;
7278 case spv::DimCube:
7279 type = "Cube";
7280 break;
7281 case spv::DimRect:
7282 type = "2DRect";
7283 break;
7284 case spv::DimBuffer:
7285 type = "Buffer";
7286 break;
7287 case spv::DimSubpassData:
7288 type = "2D";
7289 break;
7290 default:
7291 type = "";
7292 break;
7293 }
7294
7295 // In legacy GLSL, an extension is required for textureLod in the fragment
7296 // shader or textureGrad anywhere.
7297 bool legacy_lod_ext = false;
7298 auto &execution = get_entry_point();
7299 if (op == "textureGrad" || op == "textureProjGrad" ||
7300 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
7301 {
7302 if (is_legacy_es())
7303 {
7304 legacy_lod_ext = true;
7305 require_extension_internal(ext: "GL_EXT_shader_texture_lod");
7306 }
7307 else if (is_legacy_desktop())
7308 require_extension_internal(ext: "GL_ARB_shader_texture_lod");
7309 }
7310
7311 if (op == "textureLodOffset" || op == "textureProjLodOffset")
7312 {
7313 if (is_legacy_es())
7314 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
7315
7316 require_extension_internal(ext: "GL_EXT_gpu_shader4");
7317 }
7318
7319 // GLES has very limited support for shadow samplers.
7320 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
7321 // everything else can just throw
7322 bool is_comparison = is_depth_image(type: imgtype, id: tex);
7323 if (is_comparison && is_legacy_es())
7324 {
7325 if (op == "texture" || op == "textureProj")
7326 require_extension_internal(ext: "GL_EXT_shadow_samplers");
7327 else
7328 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
7329
7330 if (imgtype.image.dim == spv::DimCube)
7331 return "shadowCubeNV";
7332 }
7333
7334 if (op == "textureSize")
7335 {
7336 if (is_legacy_es())
7337 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
7338 if (is_comparison)
7339 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
7340 require_extension_internal(ext: "GL_EXT_gpu_shader4");
7341 }
7342
7343 if (op == "texelFetch" && is_legacy_es())
7344 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
7345
7346 bool is_es_and_depth = is_legacy_es() && is_comparison;
7347 std::string type_prefix = is_comparison ? "shadow" : "texture";
7348
7349 if (op == "texture")
7350 return is_es_and_depth ? join(ts&: type_prefix, ts&: type, ts: "EXT") : join(ts&: type_prefix, ts&: type);
7351 else if (op == "textureLod")
7352 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "LodEXT" : "Lod");
7353 else if (op == "textureProj")
7354 return join(ts&: type_prefix, ts&: type, ts: is_es_and_depth ? "ProjEXT" : "Proj");
7355 else if (op == "textureGrad")
7356 return join(ts&: type_prefix, ts&: type, ts: is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
7357 else if (op == "textureProjLod")
7358 return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
7359 else if (op == "textureLodOffset")
7360 return join(ts&: type_prefix, ts&: type, ts: "LodOffset");
7361 else if (op == "textureProjGrad")
7362 return join(ts&: type_prefix, ts&: type,
7363 ts: is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
7364 else if (op == "textureProjLodOffset")
7365 return join(ts&: type_prefix, ts&: type, ts: "ProjLodOffset");
7366 else if (op == "textureSize")
7367 return join(ts: "textureSize", ts&: type);
7368 else if (op == "texelFetch")
7369 return join(ts: "texelFetch", ts&: type);
7370 else
7371 {
7372 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
7373 }
7374}
7375
7376bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
7377{
7378 auto *cleft = maybe_get<SPIRConstant>(id: left);
7379 auto *cright = maybe_get<SPIRConstant>(id: right);
7380 auto &lerptype = expression_type(id: lerp);
7381
7382 // If our targets aren't constants, we cannot use construction.
7383 if (!cleft || !cright)
7384 return false;
7385
7386 // If our targets are spec constants, we cannot use construction.
7387 if (cleft->specialization || cright->specialization)
7388 return false;
7389
7390 auto &value_type = get<SPIRType>(id: cleft->constant_type);
7391
7392 if (lerptype.basetype != SPIRType::Boolean)
7393 return false;
7394 if (value_type.basetype == SPIRType::Struct || is_array(type: value_type))
7395 return false;
7396 if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
7397 return false;
7398
7399 // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
7400 // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
7401 // Just avoid this case.
7402 if (value_type.columns > 1)
7403 return false;
7404
7405 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
7406 bool ret = true;
7407 for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
7408 {
7409 switch (type.basetype)
7410 {
7411 case SPIRType::Short:
7412 case SPIRType::UShort:
7413 ret = cleft->scalar_u16(col: 0, row) == 0 && cright->scalar_u16(col: 0, row) == 1;
7414 break;
7415
7416 case SPIRType::Int:
7417 case SPIRType::UInt:
7418 ret = cleft->scalar(col: 0, row) == 0 && cright->scalar(col: 0, row) == 1;
7419 break;
7420
7421 case SPIRType::Half:
7422 ret = cleft->scalar_f16(col: 0, row) == 0.0f && cright->scalar_f16(col: 0, row) == 1.0f;
7423 break;
7424
7425 case SPIRType::Float:
7426 ret = cleft->scalar_f32(col: 0, row) == 0.0f && cright->scalar_f32(col: 0, row) == 1.0f;
7427 break;
7428
7429 case SPIRType::Double:
7430 ret = cleft->scalar_f64(col: 0, row) == 0.0 && cright->scalar_f64(col: 0, row) == 1.0;
7431 break;
7432
7433 case SPIRType::Int64:
7434 case SPIRType::UInt64:
7435 ret = cleft->scalar_u64(col: 0, row) == 0 && cright->scalar_u64(col: 0, row) == 1;
7436 break;
7437
7438 default:
7439 ret = false;
7440 break;
7441 }
7442 }
7443
7444 if (ret)
7445 op = type_to_glsl_constructor(type);
7446 return ret;
7447}
7448
7449string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
7450 uint32_t false_value)
7451{
7452 string expr;
7453 auto &lerptype = expression_type(id: select);
7454
7455 if (lerptype.vecsize == 1)
7456 expr = join(ts: to_enclosed_expression(id: select), ts: " ? ", ts: to_enclosed_pointer_expression(id: true_value), ts: " : ",
7457 ts: to_enclosed_pointer_expression(id: false_value));
7458 else
7459 {
7460 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(id: expression, index: i); };
7461
7462 expr = type_to_glsl_constructor(type: restype);
7463 expr += "(";
7464 for (uint32_t i = 0; i < restype.vecsize; i++)
7465 {
7466 expr += swiz(select, i);
7467 expr += " ? ";
7468 expr += swiz(true_value, i);
7469 expr += " : ";
7470 expr += swiz(false_value, i);
7471 if (i + 1 < restype.vecsize)
7472 expr += ", ";
7473 }
7474 expr += ")";
7475 }
7476
7477 return expr;
7478}
7479
7480void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
7481{
7482 auto &lerptype = expression_type(id: lerp);
7483 auto &restype = get<SPIRType>(id: result_type);
7484
7485 // If this results in a variable pointer, assume it may be written through.
7486 if (restype.pointer)
7487 {
7488 register_write(chain: left);
7489 register_write(chain: right);
7490 }
7491
7492 string mix_op;
7493 bool has_boolean_mix = *backend.boolean_mix_function &&
7494 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
7495 bool trivial_mix = to_trivial_mix_op(type: restype, op&: mix_op, left, right, lerp);
7496
7497 // Cannot use boolean mix when the lerp argument is just one boolean,
7498 // fall back to regular trinary statements.
7499 if (lerptype.vecsize == 1)
7500 has_boolean_mix = false;
7501
7502 // If we can reduce the mix to a simple cast, do so.
7503 // This helps for cases like int(bool), uint(bool) which is implemented with
7504 // OpSelect bool 1 0.
7505 if (trivial_mix)
7506 {
7507 emit_unary_func_op(result_type, result_id: id, op0: lerp, op: mix_op.c_str());
7508 }
7509 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
7510 {
7511 // Boolean mix not supported on desktop without extension.
7512 // Was added in OpenGL 4.5 with ES 3.1 compat.
7513 //
7514 // Could use GL_EXT_shader_integer_mix on desktop at least,
7515 // but Apple doesn't support it. :(
7516 // Just implement it as ternary expressions.
7517 auto expr = to_ternary_expression(restype: get<SPIRType>(id: result_type), select: lerp, true_value: right, false_value: left);
7518 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: left) && should_forward(id: right) && should_forward(id: lerp));
7519 inherit_expression_dependencies(dst: id, source: left);
7520 inherit_expression_dependencies(dst: id, source: right);
7521 inherit_expression_dependencies(dst: id, source: lerp);
7522 }
7523 else if (lerptype.basetype == SPIRType::Boolean)
7524 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: backend.boolean_mix_function);
7525 else
7526 emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: "mix");
7527}
7528
7529string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
7530{
7531 // Keep track of the array indices we have used to load the image.
7532 // We'll need to use the same array index into the combined image sampler array.
7533 auto image_expr = to_non_uniform_aware_expression(id: image_id);
7534 string array_expr;
7535 auto array_index = image_expr.find_first_of(c: '[');
7536 if (array_index != string::npos)
7537 array_expr = image_expr.substr(pos: array_index, n: string::npos);
7538
7539 auto &args = current_function->arguments;
7540
7541 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
7542 // all possible combinations into new sampler2D uniforms.
7543 auto *image = maybe_get_backing_variable(chain: image_id);
7544 auto *samp = maybe_get_backing_variable(chain: samp_id);
7545 if (image)
7546 image_id = image->self;
7547 if (samp)
7548 samp_id = samp->self;
7549
7550 auto image_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
7551 pred: [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
7552
7553 auto sampler_itr = find_if(first: begin(cont&: args), last: end(cont&: args),
7554 pred: [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
7555
7556 if (image_itr != end(cont&: args) || sampler_itr != end(cont&: args))
7557 {
7558 // If any parameter originates from a parameter, we will find it in our argument list.
7559 bool global_image = image_itr == end(cont&: args);
7560 bool global_sampler = sampler_itr == end(cont&: args);
7561 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(cont&: args)));
7562 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(cont&: args)));
7563
7564 auto &combined = current_function->combined_parameters;
7565 auto itr = find_if(first: begin(cont&: combined), last: end(cont&: combined), pred: [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
7566 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
7567 p.sampler_id == sid;
7568 });
7569
7570 if (itr != end(cont&: combined))
7571 return to_expression(id: itr->id) + array_expr;
7572 else
7573 {
7574 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
7575 "build_combined_image_samplers() used "
7576 "before compile() was called?");
7577 }
7578 }
7579 else
7580 {
7581 // For global sampler2D, look directly at the global remapping table.
7582 auto &mapping = combined_image_samplers;
7583 auto itr = find_if(first: begin(cont&: mapping), last: end(cont&: mapping), pred: [image_id, samp_id](const CombinedImageSampler &combined) {
7584 return combined.image_id == image_id && combined.sampler_id == samp_id;
7585 });
7586
7587 if (itr != end(cont&: combined_image_samplers))
7588 return to_expression(id: itr->combined_id) + array_expr;
7589 else
7590 {
7591 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
7592 "before compile() was called?");
7593 }
7594 }
7595}
7596
7597bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops)
7598{
7599 switch (op)
7600 {
7601 case OpGroupNonUniformElect:
7602 case OpGroupNonUniformBallot:
7603 case OpGroupNonUniformBallotFindLSB:
7604 case OpGroupNonUniformBallotFindMSB:
7605 case OpGroupNonUniformBroadcast:
7606 case OpGroupNonUniformBroadcastFirst:
7607 case OpGroupNonUniformAll:
7608 case OpGroupNonUniformAny:
7609 case OpGroupNonUniformAllEqual:
7610 case OpControlBarrier:
7611 case OpMemoryBarrier:
7612 case OpGroupNonUniformBallotBitCount:
7613 case OpGroupNonUniformBallotBitExtract:
7614 case OpGroupNonUniformInverseBallot:
7615 return true;
7616 case OpGroupNonUniformIAdd:
7617 case OpGroupNonUniformFAdd:
7618 case OpGroupNonUniformIMul:
7619 case OpGroupNonUniformFMul:
7620 {
7621 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7622 if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
7623 operation == GroupOperationExclusiveScan)
7624 {
7625 return true;
7626 }
7627 else
7628 {
7629 return false;
7630 }
7631 }
7632 default:
7633 return false;
7634 }
7635}
7636
7637void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
7638{
7639 if (options.vulkan_semantics && combined_image_samplers.empty())
7640 {
7641 emit_binary_func_op(result_type, result_id, op0: image_id, op1: samp_id,
7642 op: type_to_glsl(type: get<SPIRType>(id: result_type), id: result_id).c_str());
7643 }
7644 else
7645 {
7646 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
7647 emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forwarding: true, suppress_usage_tracking: true);
7648 }
7649
7650 // Make sure to suppress usage tracking and any expression invalidation.
7651 // It is illegal to create temporaries of opaque types.
7652 forwarded_temporaries.erase(x: result_id);
7653}
7654
7655static inline bool image_opcode_is_sample_no_dref(Op op)
7656{
7657 switch (op)
7658 {
7659 case OpImageSampleExplicitLod:
7660 case OpImageSampleImplicitLod:
7661 case OpImageSampleProjExplicitLod:
7662 case OpImageSampleProjImplicitLod:
7663 case OpImageFetch:
7664 case OpImageRead:
7665 case OpImageSparseSampleExplicitLod:
7666 case OpImageSparseSampleImplicitLod:
7667 case OpImageSparseSampleProjExplicitLod:
7668 case OpImageSparseSampleProjImplicitLod:
7669 case OpImageSparseFetch:
7670 case OpImageSparseRead:
7671 return true;
7672
7673 default:
7674 return false;
7675 }
7676}
7677
7678void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
7679 uint32_t &texel_id)
7680{
7681 // Need to allocate two temporaries.
7682 if (options.es)
7683 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
7684 require_extension_internal(ext: "GL_ARB_sparse_texture2");
7685
7686 auto &temps = extra_sub_expressions[id];
7687 if (temps == 0)
7688 temps = ir.increase_bound_by(count: 2);
7689
7690 feedback_id = temps + 0;
7691 texel_id = temps + 1;
7692
7693 auto &return_type = get<SPIRType>(id: result_type_id);
7694 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
7695 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
7696 emit_uninitialized_temporary(result_type: return_type.member_types[0], result_id: feedback_id);
7697 emit_uninitialized_temporary(result_type: return_type.member_types[1], result_id: texel_id);
7698}
7699
7700uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
7701{
7702 auto itr = extra_sub_expressions.find(x: id);
7703 if (itr == extra_sub_expressions.end())
7704 return 0;
7705 else
7706 return itr->second + 1;
7707}
7708
7709void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
7710{
7711 auto *ops = stream(instr: i);
7712 auto op = static_cast<Op>(i.op);
7713
7714 SmallVector<uint32_t> inherited_expressions;
7715
7716 uint32_t result_type_id = ops[0];
7717 uint32_t id = ops[1];
7718 auto &return_type = get<SPIRType>(id: result_type_id);
7719
7720 uint32_t sparse_code_id = 0;
7721 uint32_t sparse_texel_id = 0;
7722 if (sparse)
7723 emit_sparse_feedback_temporaries(result_type_id, id, feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
7724
7725 bool forward = false;
7726 string expr = to_texture_op(i, sparse, forward: &forward, inherited_expressions);
7727
7728 if (sparse)
7729 {
7730 statement(ts: to_expression(id: sparse_code_id), ts: " = ", ts&: expr, ts: ";");
7731 expr = join(ts: type_to_glsl(type: return_type), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ", ts: to_expression(id: sparse_texel_id),
7732 ts: ")");
7733 forward = true;
7734 inherited_expressions.clear();
7735 }
7736
7737 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
7738 for (auto &inherit : inherited_expressions)
7739 inherit_expression_dependencies(dst: id, source: inherit);
7740
7741 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
7742 switch (op)
7743 {
7744 case OpImageSampleDrefImplicitLod:
7745 case OpImageSampleImplicitLod:
7746 case OpImageSampleProjImplicitLod:
7747 case OpImageSampleProjDrefImplicitLod:
7748 register_control_dependent_expression(expr: id);
7749 break;
7750
7751 default:
7752 break;
7753 }
7754}
7755
7756std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
7757 SmallVector<uint32_t> &inherited_expressions)
7758{
7759 auto *ops = stream(instr: i);
7760 auto op = static_cast<Op>(i.op);
7761 uint32_t length = i.length;
7762
7763 uint32_t result_type_id = ops[0];
7764 VariableID img = ops[2];
7765 uint32_t coord = ops[3];
7766 uint32_t dref = 0;
7767 uint32_t comp = 0;
7768 bool gather = false;
7769 bool proj = false;
7770 bool fetch = false;
7771 bool nonuniform_expression = false;
7772 const uint32_t *opt = nullptr;
7773
7774 auto &result_type = get<SPIRType>(id: result_type_id);
7775
7776 inherited_expressions.push_back(t: coord);
7777 if (has_decoration(id: img, decoration: DecorationNonUniform) && !maybe_get_backing_variable(chain: img))
7778 nonuniform_expression = true;
7779
7780 switch (op)
7781 {
7782 case OpImageSampleDrefImplicitLod:
7783 case OpImageSampleDrefExplicitLod:
7784 case OpImageSparseSampleDrefImplicitLod:
7785 case OpImageSparseSampleDrefExplicitLod:
7786 dref = ops[4];
7787 opt = &ops[5];
7788 length -= 5;
7789 break;
7790
7791 case OpImageSampleProjDrefImplicitLod:
7792 case OpImageSampleProjDrefExplicitLod:
7793 case OpImageSparseSampleProjDrefImplicitLod:
7794 case OpImageSparseSampleProjDrefExplicitLod:
7795 dref = ops[4];
7796 opt = &ops[5];
7797 length -= 5;
7798 proj = true;
7799 break;
7800
7801 case OpImageDrefGather:
7802 case OpImageSparseDrefGather:
7803 dref = ops[4];
7804 opt = &ops[5];
7805 length -= 5;
7806 gather = true;
7807 if (options.es && options.version < 310)
7808 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
7809 else if (!options.es && options.version < 400)
7810 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
7811 break;
7812
7813 case OpImageGather:
7814 case OpImageSparseGather:
7815 comp = ops[4];
7816 opt = &ops[5];
7817 length -= 5;
7818 gather = true;
7819 if (options.es && options.version < 310)
7820 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
7821 else if (!options.es && options.version < 400)
7822 {
7823 if (!expression_is_constant_null(id: comp))
7824 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
7825 require_extension_internal(ext: "GL_ARB_texture_gather");
7826 }
7827 break;
7828
7829 case OpImageFetch:
7830 case OpImageSparseFetch:
7831 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
7832 opt = &ops[4];
7833 length -= 4;
7834 fetch = true;
7835 break;
7836
7837 case OpImageSampleProjImplicitLod:
7838 case OpImageSampleProjExplicitLod:
7839 case OpImageSparseSampleProjImplicitLod:
7840 case OpImageSparseSampleProjExplicitLod:
7841 opt = &ops[4];
7842 length -= 4;
7843 proj = true;
7844 break;
7845
7846 default:
7847 opt = &ops[4];
7848 length -= 4;
7849 break;
7850 }
7851
7852 // Bypass pointers because we need the real image struct
7853 auto &type = expression_type(id: img);
7854 auto &imgtype = get<SPIRType>(id: type.self);
7855
7856 uint32_t coord_components = 0;
7857 switch (imgtype.image.dim)
7858 {
7859 case spv::Dim1D:
7860 coord_components = 1;
7861 break;
7862 case spv::Dim2D:
7863 coord_components = 2;
7864 break;
7865 case spv::Dim3D:
7866 coord_components = 3;
7867 break;
7868 case spv::DimCube:
7869 coord_components = 3;
7870 break;
7871 case spv::DimBuffer:
7872 coord_components = 1;
7873 break;
7874 default:
7875 coord_components = 2;
7876 break;
7877 }
7878
7879 if (dref)
7880 inherited_expressions.push_back(t: dref);
7881
7882 if (proj)
7883 coord_components++;
7884 if (imgtype.image.arrayed)
7885 coord_components++;
7886
7887 uint32_t bias = 0;
7888 uint32_t lod = 0;
7889 uint32_t grad_x = 0;
7890 uint32_t grad_y = 0;
7891 uint32_t coffset = 0;
7892 uint32_t offset = 0;
7893 uint32_t coffsets = 0;
7894 uint32_t sample = 0;
7895 uint32_t minlod = 0;
7896 uint32_t flags = 0;
7897
7898 if (length)
7899 {
7900 flags = *opt++;
7901 length--;
7902 }
7903
7904 auto test = [&](uint32_t &v, uint32_t flag) {
7905 if (length && (flags & flag))
7906 {
7907 v = *opt++;
7908 inherited_expressions.push_back(t: v);
7909 length--;
7910 }
7911 };
7912
7913 test(bias, ImageOperandsBiasMask);
7914 test(lod, ImageOperandsLodMask);
7915 test(grad_x, ImageOperandsGradMask);
7916 test(grad_y, ImageOperandsGradMask);
7917 test(coffset, ImageOperandsConstOffsetMask);
7918 test(offset, ImageOperandsOffsetMask);
7919 test(coffsets, ImageOperandsConstOffsetsMask);
7920 test(sample, ImageOperandsSampleMask);
7921 test(minlod, ImageOperandsMinLodMask);
7922
7923 TextureFunctionBaseArguments base_args = {};
7924 base_args.img = img;
7925 base_args.imgtype = &imgtype;
7926 base_args.is_fetch = fetch != 0;
7927 base_args.is_gather = gather != 0;
7928 base_args.is_proj = proj != 0;
7929
7930 string expr;
7931 TextureFunctionNameArguments name_args = {};
7932
7933 name_args.base = base_args;
7934 name_args.has_array_offsets = coffsets != 0;
7935 name_args.has_offset = coffset != 0 || offset != 0;
7936 name_args.has_grad = grad_x != 0 || grad_y != 0;
7937 name_args.has_dref = dref != 0;
7938 name_args.is_sparse_feedback = sparse;
7939 name_args.has_min_lod = minlod != 0;
7940 name_args.lod = lod;
7941 expr += to_function_name(args: name_args);
7942 expr += "(";
7943
7944 uint32_t sparse_texel_id = 0;
7945 if (sparse)
7946 sparse_texel_id = get_sparse_feedback_texel_id(id: ops[1]);
7947
7948 TextureFunctionArguments args = {};
7949 args.base = base_args;
7950 args.coord = coord;
7951 args.coord_components = coord_components;
7952 args.dref = dref;
7953 args.grad_x = grad_x;
7954 args.grad_y = grad_y;
7955 args.lod = lod;
7956 args.has_array_offsets = coffsets != 0;
7957
7958 if (coffsets)
7959 args.offset = coffsets;
7960 else if (coffset)
7961 args.offset = coffset;
7962 else
7963 args.offset = offset;
7964
7965 args.bias = bias;
7966 args.component = comp;
7967 args.sample = sample;
7968 args.sparse_texel = sparse_texel_id;
7969 args.min_lod = minlod;
7970 args.nonuniform_expression = nonuniform_expression;
7971 expr += to_function_args(args, p_forward: forward);
7972 expr += ")";
7973
7974 // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
7975 if (is_legacy() && !options.es && is_depth_image(type: imgtype, id: img))
7976 expr += ".r";
7977
7978 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
7979 // Remap back to 4 components as sampling opcodes expect.
7980 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
7981 {
7982 bool image_is_depth = false;
7983 const auto *combined = maybe_get<SPIRCombinedImageSampler>(id: img);
7984 VariableID image_id = combined ? combined->image : img;
7985
7986 if (combined && is_depth_image(type: imgtype, id: combined->image))
7987 image_is_depth = true;
7988 else if (is_depth_image(type: imgtype, id: img))
7989 image_is_depth = true;
7990
7991 // We must also check the backing variable for the image.
7992 // We might have loaded an OpImage, and used that handle for two different purposes.
7993 // Once with comparison, once without.
7994 auto *image_variable = maybe_get_backing_variable(chain: image_id);
7995 if (image_variable && is_depth_image(type: get<SPIRType>(id: image_variable->basetype), id: image_variable->self))
7996 image_is_depth = true;
7997
7998 if (image_is_depth)
7999 expr = remap_swizzle(out_type: result_type, input_components: 1, expr);
8000 }
8001
8002 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
8003 {
8004 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
8005 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
8006 expr = join(ts: type_to_glsl_constructor(type: result_type), ts: "(", ts&: expr, ts: ")");
8007 }
8008
8009 // Deals with reads from MSL. We might need to downconvert to fewer components.
8010 if (op == OpImageRead)
8011 expr = remap_swizzle(out_type: result_type, input_components: 4, expr);
8012
8013 return expr;
8014}
8015
8016bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
8017{
8018 auto *c = maybe_get<SPIRConstant>(id);
8019 if (!c)
8020 return false;
8021 return c->constant_is_null();
8022}
8023
8024bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
8025{
8026 auto &type = expression_type(id: ptr);
8027 if (!is_array(type: get_pointee_type(type)))
8028 return false;
8029
8030 if (!backend.array_is_value_type)
8031 return true;
8032
8033 auto *var = maybe_get_backing_variable(chain: ptr);
8034 if (!var)
8035 return false;
8036
8037 auto &backed_type = get<SPIRType>(id: var->basetype);
8038 return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
8039 has_member_decoration(id: backed_type.self, index: 0, decoration: DecorationOffset);
8040}
8041
8042// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
8043// For some subclasses, the function is a method on the specified image.
8044string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
8045{
8046 if (args.has_min_lod)
8047 {
8048 if (options.es)
8049 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
8050 require_extension_internal(ext: "GL_ARB_sparse_texture_clamp");
8051 }
8052
8053 string fname;
8054 auto &imgtype = *args.base.imgtype;
8055 VariableID tex = args.base.img;
8056
8057 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
8058 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
8059 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
8060 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
8061 bool workaround_lod_array_shadow_as_grad = false;
8062 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
8063 is_depth_image(type: imgtype, id: tex) && args.lod && !args.base.is_fetch)
8064 {
8065 if (!expression_is_constant_null(id: args.lod))
8066 {
8067 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
8068 "expressed in GLSL.");
8069 }
8070 workaround_lod_array_shadow_as_grad = true;
8071 }
8072
8073 if (args.is_sparse_feedback)
8074 fname += "sparse";
8075
8076 if (args.base.is_fetch)
8077 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
8078 else
8079 {
8080 fname += args.is_sparse_feedback ? "Texture" : "texture";
8081
8082 if (args.base.is_gather)
8083 fname += "Gather";
8084 if (args.has_array_offsets)
8085 fname += "Offsets";
8086 if (args.base.is_proj)
8087 fname += "Proj";
8088 if (args.has_grad || workaround_lod_array_shadow_as_grad)
8089 fname += "Grad";
8090 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
8091 fname += "Lod";
8092 }
8093
8094 if (args.has_offset)
8095 fname += "Offset";
8096
8097 if (args.has_min_lod)
8098 fname += "Clamp";
8099
8100 if (args.is_sparse_feedback || args.has_min_lod)
8101 fname += "ARB";
8102
8103 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(op: fname, imgtype, tex) : fname;
8104}
8105
8106std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
8107{
8108 auto *var = maybe_get_backing_variable(chain: id);
8109
8110 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
8111 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
8112 if (var)
8113 {
8114 auto &type = get<SPIRType>(id: var->basetype);
8115 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
8116 {
8117 if (options.vulkan_semantics)
8118 {
8119 if (dummy_sampler_id)
8120 {
8121 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
8122 auto sampled_type = type;
8123 sampled_type.basetype = SPIRType::SampledImage;
8124 return join(ts: type_to_glsl(type: sampled_type), ts: "(", ts: to_non_uniform_aware_expression(id), ts: ", ",
8125 ts: to_expression(id: dummy_sampler_id), ts: ")");
8126 }
8127 else
8128 {
8129 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
8130 require_extension_internal(ext: "GL_EXT_samplerless_texture_functions");
8131 }
8132 }
8133 else
8134 {
8135 if (!dummy_sampler_id)
8136 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
8137 "build_dummy_sampler_for_combined_images() called?");
8138
8139 return to_combined_image_sampler(image_id: id, samp_id: dummy_sampler_id);
8140 }
8141 }
8142 }
8143
8144 return to_non_uniform_aware_expression(id);
8145}
8146
8147// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
8148string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
8149{
8150 VariableID img = args.base.img;
8151 auto &imgtype = *args.base.imgtype;
8152
8153 string farg_str;
8154 if (args.base.is_fetch)
8155 farg_str = convert_separate_image_to_expression(id: img);
8156 else
8157 farg_str = to_non_uniform_aware_expression(id: img);
8158
8159 if (args.nonuniform_expression && farg_str.find_first_of(c: '[') != string::npos)
8160 {
8161 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
8162 farg_str = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: farg_str, ts: ")");
8163 }
8164
8165 bool swizz_func = backend.swizzle_is_function;
8166 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
8167 if (comps == in_comps)
8168 return "";
8169
8170 switch (comps)
8171 {
8172 case 1:
8173 return ".x";
8174 case 2:
8175 return swizz_func ? ".xy()" : ".xy";
8176 case 3:
8177 return swizz_func ? ".xyz()" : ".xyz";
8178 default:
8179 return "";
8180 }
8181 };
8182
8183 bool forward = should_forward(id: args.coord);
8184
8185 // The IR can give us more components than we need, so chop them off as needed.
8186 auto swizzle_expr = swizzle(args.coord_components, expression_type(id: args.coord).vecsize);
8187 // Only enclose the UV expression if needed.
8188 auto coord_expr =
8189 (*swizzle_expr == '\0') ? to_expression(id: args.coord) : (to_enclosed_expression(id: args.coord) + swizzle_expr);
8190
8191 // texelFetch only takes int, not uint.
8192 auto &coord_type = expression_type(id: args.coord);
8193 if (coord_type.basetype == SPIRType::UInt)
8194 {
8195 auto expected_type = coord_type;
8196 expected_type.vecsize = args.coord_components;
8197 expected_type.basetype = SPIRType::Int;
8198 coord_expr = bitcast_expression(target_type: expected_type, expr_type: coord_type.basetype, expr: coord_expr);
8199 }
8200
8201 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
8202 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
8203 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
8204 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
8205 bool workaround_lod_array_shadow_as_grad =
8206 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
8207 is_depth_image(type: imgtype, id: img) && args.lod != 0 && !args.base.is_fetch;
8208
8209 if (args.dref)
8210 {
8211 forward = forward && should_forward(id: args.dref);
8212
8213 // SPIR-V splits dref and coordinate.
8214 if (args.base.is_gather ||
8215 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
8216 {
8217 farg_str += ", ";
8218 farg_str += to_expression(id: args.coord);
8219 farg_str += ", ";
8220 farg_str += to_expression(id: args.dref);
8221 }
8222 else if (args.base.is_proj)
8223 {
8224 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
8225 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
8226 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
8227 farg_str += ", vec4(";
8228
8229 if (imgtype.image.dim == Dim1D)
8230 {
8231 // Could reuse coord_expr, but we will mess up the temporary usage checking.
8232 farg_str += to_enclosed_expression(id: args.coord) + ".x";
8233 farg_str += ", ";
8234 farg_str += "0.0, ";
8235 farg_str += to_expression(id: args.dref);
8236 farg_str += ", ";
8237 farg_str += to_enclosed_expression(id: args.coord) + ".y)";
8238 }
8239 else if (imgtype.image.dim == Dim2D)
8240 {
8241 // Could reuse coord_expr, but we will mess up the temporary usage checking.
8242 farg_str += to_enclosed_expression(id: args.coord) + (swizz_func ? ".xy()" : ".xy");
8243 farg_str += ", ";
8244 farg_str += to_expression(id: args.dref);
8245 farg_str += ", ";
8246 farg_str += to_enclosed_expression(id: args.coord) + ".z)";
8247 }
8248 else
8249 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
8250 }
8251 else
8252 {
8253 // Create a composite which merges coord/dref into a single vector.
8254 auto type = expression_type(id: args.coord);
8255 type.vecsize = args.coord_components + 1;
8256 if (imgtype.image.dim == Dim1D && options.es)
8257 type.vecsize++;
8258 farg_str += ", ";
8259 farg_str += type_to_glsl_constructor(type);
8260 farg_str += "(";
8261
8262 if (imgtype.image.dim == Dim1D && options.es)
8263 {
8264 if (imgtype.image.arrayed)
8265 {
8266 farg_str += enclose_expression(expr: coord_expr) + ".x";
8267 farg_str += ", 0.0, ";
8268 farg_str += enclose_expression(expr: coord_expr) + ".y";
8269 }
8270 else
8271 {
8272 farg_str += coord_expr;
8273 farg_str += ", 0.0";
8274 }
8275 }
8276 else
8277 farg_str += coord_expr;
8278
8279 farg_str += ", ";
8280 farg_str += to_expression(id: args.dref);
8281 farg_str += ")";
8282 }
8283 }
8284 else
8285 {
8286 if (imgtype.image.dim == Dim1D && options.es)
8287 {
8288 // Have to fake a second coordinate.
8289 if (type_is_floating_point(type: coord_type))
8290 {
8291 // Cannot mix proj and array.
8292 if (imgtype.image.arrayed || args.base.is_proj)
8293 {
8294 coord_expr = join(ts: "vec3(", ts: enclose_expression(expr: coord_expr), ts: ".x, 0.0, ",
8295 ts: enclose_expression(expr: coord_expr), ts: ".y)");
8296 }
8297 else
8298 coord_expr = join(ts: "vec2(", ts&: coord_expr, ts: ", 0.0)");
8299 }
8300 else
8301 {
8302 if (imgtype.image.arrayed)
8303 {
8304 coord_expr = join(ts: "ivec3(", ts: enclose_expression(expr: coord_expr),
8305 ts: ".x, 0, ",
8306 ts: enclose_expression(expr: coord_expr), ts: ".y)");
8307 }
8308 else
8309 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
8310 }
8311 }
8312
8313 farg_str += ", ";
8314 farg_str += coord_expr;
8315 }
8316
8317 if (args.grad_x || args.grad_y)
8318 {
8319 forward = forward && should_forward(id: args.grad_x);
8320 forward = forward && should_forward(id: args.grad_y);
8321 farg_str += ", ";
8322 farg_str += to_expression(id: args.grad_x);
8323 farg_str += ", ";
8324 farg_str += to_expression(id: args.grad_y);
8325 }
8326
8327 if (args.lod)
8328 {
8329 if (workaround_lod_array_shadow_as_grad)
8330 {
8331 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
8332 // Implementing this as plain texture() is not safe on some implementations.
8333 if (imgtype.image.dim == Dim2D)
8334 farg_str += ", vec2(0.0), vec2(0.0)";
8335 else if (imgtype.image.dim == DimCube)
8336 farg_str += ", vec3(0.0), vec3(0.0)";
8337 }
8338 else
8339 {
8340 forward = forward && should_forward(id: args.lod);
8341 farg_str += ", ";
8342
8343 // Lod expression for TexelFetch in GLSL must be int, and only int.
8344 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
8345 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.lod);
8346 else
8347 farg_str += to_expression(id: args.lod);
8348 }
8349 }
8350 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
8351 {
8352 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
8353 farg_str += ", 0";
8354 }
8355
8356 if (args.offset)
8357 {
8358 forward = forward && should_forward(id: args.offset);
8359 farg_str += ", ";
8360 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.offset);
8361 }
8362
8363 if (args.sample)
8364 {
8365 farg_str += ", ";
8366 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.sample);
8367 }
8368
8369 if (args.min_lod)
8370 {
8371 farg_str += ", ";
8372 farg_str += to_expression(id: args.min_lod);
8373 }
8374
8375 if (args.sparse_texel)
8376 {
8377 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
8378 farg_str += ", ";
8379 farg_str += to_expression(id: args.sparse_texel);
8380 }
8381
8382 if (args.bias)
8383 {
8384 forward = forward && should_forward(id: args.bias);
8385 farg_str += ", ";
8386 farg_str += to_expression(id: args.bias);
8387 }
8388
8389 if (args.component && !expression_is_constant_null(id: args.component))
8390 {
8391 forward = forward && should_forward(id: args.component);
8392 farg_str += ", ";
8393 farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.component);
8394 }
8395
8396 *p_forward = forward;
8397
8398 return farg_str;
8399}
8400
8401Op CompilerGLSL::get_remapped_spirv_op(Op op) const
8402{
8403 if (options.relax_nan_checks)
8404 {
8405 switch (op)
8406 {
8407 case OpFUnordLessThan:
8408 op = OpFOrdLessThan;
8409 break;
8410 case OpFUnordLessThanEqual:
8411 op = OpFOrdLessThanEqual;
8412 break;
8413 case OpFUnordGreaterThan:
8414 op = OpFOrdGreaterThan;
8415 break;
8416 case OpFUnordGreaterThanEqual:
8417 op = OpFOrdGreaterThanEqual;
8418 break;
8419 case OpFUnordEqual:
8420 op = OpFOrdEqual;
8421 break;
8422 case OpFOrdNotEqual:
8423 op = OpFUnordNotEqual;
8424 break;
8425
8426 default:
8427 break;
8428 }
8429 }
8430
8431 return op;
8432}
8433
8434GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
8435{
8436 // Relax to non-NaN aware opcodes.
8437 if (options.relax_nan_checks)
8438 {
8439 switch (std450_op)
8440 {
8441 case GLSLstd450NClamp:
8442 std450_op = GLSLstd450FClamp;
8443 break;
8444 case GLSLstd450NMin:
8445 std450_op = GLSLstd450FMin;
8446 break;
8447 case GLSLstd450NMax:
8448 std450_op = GLSLstd450FMax;
8449 break;
8450 default:
8451 break;
8452 }
8453 }
8454
8455 return std450_op;
8456}
8457
8458void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
8459{
8460 auto op = static_cast<GLSLstd450>(eop);
8461
8462 if (is_legacy() && is_unsigned_glsl_opcode(op))
8463 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
8464
8465 // If we need to do implicit bitcasts, make sure we do it with the correct type.
8466 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length);
8467 auto int_type = to_signed_basetype(width: integer_width);
8468 auto uint_type = to_unsigned_basetype(width: integer_width);
8469
8470 op = get_remapped_glsl_op(std450_op: op);
8471
8472 switch (op)
8473 {
8474 // FP fiddling
8475 case GLSLstd450Round:
8476 if (!is_legacy())
8477 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
8478 else
8479 {
8480 auto op0 = to_enclosed_expression(id: args[0]);
8481 auto &op0_type = expression_type(id: args[0]);
8482 auto expr = join(ts: "floor(", ts&: op0, ts: " + ", ts: type_to_glsl_constructor(type: op0_type), ts: "(0.5))");
8483 bool forward = should_forward(id: args[0]);
8484 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8485 inherit_expression_dependencies(dst: id, source: args[0]);
8486 }
8487 break;
8488
8489 case GLSLstd450RoundEven:
8490 if (!is_legacy())
8491 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "roundEven");
8492 else if (!options.es)
8493 {
8494 // This extension provides round() with round-to-even semantics.
8495 require_extension_internal(ext: "GL_EXT_gpu_shader4");
8496 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round");
8497 }
8498 else
8499 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
8500 break;
8501
8502 case GLSLstd450Trunc:
8503 if (!is_legacy())
8504 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "trunc");
8505 else
8506 {
8507 // Implement by value-casting to int and back.
8508 bool forward = should_forward(id: args[0]);
8509 auto op0 = to_unpacked_expression(id: args[0]);
8510 auto &op0_type = expression_type(id: args[0]);
8511 auto via_type = op0_type;
8512 via_type.basetype = SPIRType::Int;
8513 auto expr = join(ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts&: op0, ts: "))");
8514 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8515 inherit_expression_dependencies(dst: id, source: args[0]);
8516 }
8517 break;
8518
8519 case GLSLstd450SAbs:
8520 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "abs", input_type: int_type, expected_result_type: int_type);
8521 break;
8522 case GLSLstd450FAbs:
8523 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "abs");
8524 break;
8525 case GLSLstd450SSign:
8526 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "sign", input_type: int_type, expected_result_type: int_type);
8527 break;
8528 case GLSLstd450FSign:
8529 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sign");
8530 break;
8531 case GLSLstd450Floor:
8532 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "floor");
8533 break;
8534 case GLSLstd450Ceil:
8535 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "ceil");
8536 break;
8537 case GLSLstd450Fract:
8538 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "fract");
8539 break;
8540 case GLSLstd450Radians:
8541 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "radians");
8542 break;
8543 case GLSLstd450Degrees:
8544 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "degrees");
8545 break;
8546 case GLSLstd450Fma:
8547 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
8548 {
8549 auto expr = join(ts: to_enclosed_expression(id: args[0]), ts: " * ", ts: to_enclosed_expression(id: args[1]), ts: " + ",
8550 ts: to_enclosed_expression(id: args[2]));
8551
8552 emit_op(result_type, result_id: id, rhs: expr,
8553 forwarding: should_forward(id: args[0]) && should_forward(id: args[1]) && should_forward(id: args[2]));
8554 for (uint32_t i = 0; i < 3; i++)
8555 inherit_expression_dependencies(dst: id, source: args[i]);
8556 }
8557 else
8558 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "fma");
8559 break;
8560
8561 case GLSLstd450Modf:
8562 register_call_out_argument(id: args[1]);
8563 if (!is_legacy())
8564 {
8565 forced_temporaries.insert(x: id);
8566 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "modf");
8567 }
8568 else
8569 {
8570 //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
8571 auto &op1_type = expression_type(id: args[1]);
8572 auto via_type = op1_type;
8573 via_type.basetype = SPIRType::Int;
8574 statement(ts: to_expression(id: args[1]), ts: " = ",
8575 ts: type_to_glsl(type: op1_type), ts: "(", ts: type_to_glsl(type: via_type),
8576 ts: "(", ts: to_expression(id: args[0]), ts: "));");
8577 emit_binary_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "-");
8578 }
8579 break;
8580
8581 case GLSLstd450ModfStruct:
8582 {
8583 auto &type = get<SPIRType>(id: result_type);
8584 emit_uninitialized_temporary_expression(type: result_type, id);
8585 if (!is_legacy())
8586 {
8587 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "modf(", ts: to_expression(id: args[0]), ts: ", ",
8588 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
8589 }
8590 else
8591 {
8592 //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
8593 auto &op0_type = expression_type(id: args[0]);
8594 auto via_type = op0_type;
8595 via_type.basetype = SPIRType::Int;
8596 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: " = ", ts: type_to_glsl(type: op0_type),
8597 ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts: to_expression(id: args[0]), ts: "));");
8598 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: to_enclosed_expression(id: args[0]), ts: " - ",
8599 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ";");
8600 }
8601 break;
8602 }
8603
8604 // Minmax
8605 case GLSLstd450UMin:
8606 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: uint_type, skip_cast_if_equal_type: false);
8607 break;
8608
8609 case GLSLstd450SMin:
8610 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: int_type, skip_cast_if_equal_type: false);
8611 break;
8612
8613 case GLSLstd450FMin:
8614 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "min");
8615 break;
8616
8617 case GLSLstd450FMax:
8618 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "max");
8619 break;
8620
8621 case GLSLstd450UMax:
8622 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: uint_type, skip_cast_if_equal_type: false);
8623 break;
8624
8625 case GLSLstd450SMax:
8626 emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: int_type, skip_cast_if_equal_type: false);
8627 break;
8628
8629 case GLSLstd450FClamp:
8630 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp");
8631 break;
8632
8633 case GLSLstd450UClamp:
8634 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: uint_type);
8635 break;
8636
8637 case GLSLstd450SClamp:
8638 emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: int_type);
8639 break;
8640
8641 // Trig
8642 case GLSLstd450Sin:
8643 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sin");
8644 break;
8645 case GLSLstd450Cos:
8646 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cos");
8647 break;
8648 case GLSLstd450Tan:
8649 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tan");
8650 break;
8651 case GLSLstd450Asin:
8652 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asin");
8653 break;
8654 case GLSLstd450Acos:
8655 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acos");
8656 break;
8657 case GLSLstd450Atan:
8658 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atan");
8659 break;
8660 case GLSLstd450Sinh:
8661 if (!is_legacy())
8662 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sinh");
8663 else
8664 {
8665 bool forward = should_forward(id: args[0]);
8666 auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") - exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5");
8667 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8668 inherit_expression_dependencies(dst: id, source: args[0]);
8669 }
8670 break;
8671 case GLSLstd450Cosh:
8672 if (!is_legacy())
8673 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cosh");
8674 else
8675 {
8676 bool forward = should_forward(id: args[0]);
8677 auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") + exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5");
8678 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8679 inherit_expression_dependencies(dst: id, source: args[0]);
8680 }
8681 break;
8682 case GLSLstd450Tanh:
8683 if (!is_legacy())
8684 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tanh");
8685 else
8686 {
8687 // Create temporaries to store the result of exp(arg) and exp(-arg).
8688 uint32_t &ids = extra_sub_expressions[id];
8689 if (!ids)
8690 {
8691 ids = ir.increase_bound_by(count: 2);
8692
8693 // Inherit precision qualifier (legacy has no NoContraction).
8694 if (has_decoration(id, decoration: DecorationRelaxedPrecision))
8695 {
8696 set_decoration(id: ids, decoration: DecorationRelaxedPrecision);
8697 set_decoration(id: ids + 1, decoration: DecorationRelaxedPrecision);
8698 }
8699 }
8700 uint32_t epos_id = ids;
8701 uint32_t eneg_id = ids + 1;
8702
8703 emit_op(result_type, result_id: epos_id, rhs: join(ts: "exp(", ts: to_expression(id: args[0]), ts: ")"), forwarding: false);
8704 emit_op(result_type, result_id: eneg_id, rhs: join(ts: "exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")"), forwarding: false);
8705 inherit_expression_dependencies(dst: epos_id, source: args[0]);
8706 inherit_expression_dependencies(dst: eneg_id, source: args[0]);
8707
8708 auto expr = join(ts: "(", ts: to_enclosed_expression(id: epos_id), ts: " - ", ts: to_enclosed_expression(id: eneg_id), ts: ") / "
8709 "(", ts: to_enclosed_expression(id: epos_id), ts: " + ", ts: to_enclosed_expression(id: eneg_id), ts: ")");
8710 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
8711 inherit_expression_dependencies(dst: id, source: epos_id);
8712 inherit_expression_dependencies(dst: id, source: eneg_id);
8713 }
8714 break;
8715 case GLSLstd450Asinh:
8716 if (!is_legacy())
8717 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asinh");
8718 else
8719 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Asinh);
8720 break;
8721 case GLSLstd450Acosh:
8722 if (!is_legacy())
8723 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acosh");
8724 else
8725 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Acosh);
8726 break;
8727 case GLSLstd450Atanh:
8728 if (!is_legacy())
8729 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atanh");
8730 else
8731 emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Atanh);
8732 break;
8733 case GLSLstd450Atan2:
8734 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "atan");
8735 break;
8736
8737 // Exponentials
8738 case GLSLstd450Pow:
8739 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "pow");
8740 break;
8741 case GLSLstd450Exp:
8742 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp");
8743 break;
8744 case GLSLstd450Log:
8745 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log");
8746 break;
8747 case GLSLstd450Exp2:
8748 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp2");
8749 break;
8750 case GLSLstd450Log2:
8751 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log2");
8752 break;
8753 case GLSLstd450Sqrt:
8754 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sqrt");
8755 break;
8756 case GLSLstd450InverseSqrt:
8757 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inversesqrt");
8758 break;
8759
8760 // Matrix math
8761 case GLSLstd450Determinant:
8762 {
8763 // No need to transpose - it doesn't affect the determinant
8764 auto *e = maybe_get<SPIRExpression>(id: args[0]);
8765 bool old_transpose = e && e->need_transpose;
8766 if (old_transpose)
8767 e->need_transpose = false;
8768
8769 if (options.version < 150) // also matches ES 100
8770 {
8771 auto &type = expression_type(id: args[0]);
8772 assert(type.vecsize >= 2 && type.vecsize <= 4);
8773 assert(type.vecsize == type.columns);
8774
8775 // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid
8776 if (type.basetype != SPIRType::Float)
8777 SPIRV_CROSS_THROW("Unsupported type for matrix determinant");
8778
8779 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8780 require_polyfill(polyfill: static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)),
8781 relaxed);
8782 emit_unary_func_op(result_type, result_id: id, op0: args[0],
8783 op: (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant");
8784 }
8785 else
8786 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "determinant");
8787
8788 if (old_transpose)
8789 e->need_transpose = true;
8790 break;
8791 }
8792
8793 case GLSLstd450MatrixInverse:
8794 {
8795 // The inverse of the transpose is the same as the transpose of
8796 // the inverse, so we can just flip need_transpose of the result.
8797 auto *a = maybe_get<SPIRExpression>(id: args[0]);
8798 bool old_transpose = a && a->need_transpose;
8799 if (old_transpose)
8800 a->need_transpose = false;
8801
8802 const char *func = "inverse";
8803 if (options.version < 140) // also matches ES 100
8804 {
8805 auto &type = get<SPIRType>(id: result_type);
8806 assert(type.vecsize >= 2 && type.vecsize <= 4);
8807 assert(type.vecsize == type.columns);
8808
8809 // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid
8810 if (type.basetype != SPIRType::Float)
8811 SPIRV_CROSS_THROW("Unsupported type for matrix inverse");
8812
8813 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8814 require_polyfill(polyfill: static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)),
8815 relaxed);
8816 func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse";
8817 }
8818
8819 bool forward = should_forward(id: args[0]);
8820 auto &e = emit_op(result_type, result_id: id, rhs: join(ts&: func, ts: "(", ts: to_unpacked_expression(id: args[0]), ts: ")"), forwarding: forward);
8821 inherit_expression_dependencies(dst: id, source: args[0]);
8822
8823 if (old_transpose)
8824 {
8825 e.need_transpose = true;
8826 a->need_transpose = true;
8827 }
8828 break;
8829 }
8830
8831 // Lerping
8832 case GLSLstd450FMix:
8833 case GLSLstd450IMix:
8834 {
8835 emit_mix_op(result_type, id, left: args[0], right: args[1], lerp: args[2]);
8836 break;
8837 }
8838 case GLSLstd450Step:
8839 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "step");
8840 break;
8841 case GLSLstd450SmoothStep:
8842 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "smoothstep");
8843 break;
8844
8845 // Packing
8846 case GLSLstd450Frexp:
8847 register_call_out_argument(id: args[1]);
8848 forced_temporaries.insert(x: id);
8849 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "frexp");
8850 break;
8851
8852 case GLSLstd450FrexpStruct:
8853 {
8854 auto &type = get<SPIRType>(id: result_type);
8855 emit_uninitialized_temporary_expression(type: result_type, id);
8856 statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "frexp(", ts: to_expression(id: args[0]), ts: ", ",
8857 ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
8858 break;
8859 }
8860
8861 case GLSLstd450Ldexp:
8862 {
8863 bool forward = should_forward(id: args[0]) && should_forward(id: args[1]);
8864
8865 auto op0 = to_unpacked_expression(id: args[0]);
8866 auto op1 = to_unpacked_expression(id: args[1]);
8867 auto &op1_type = expression_type(id: args[1]);
8868 if (op1_type.basetype != SPIRType::Int)
8869 {
8870 // Need a value cast here.
8871 auto target_type = op1_type;
8872 target_type.basetype = SPIRType::Int;
8873 op1 = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op1, ts: ")");
8874 }
8875
8876 auto expr = join(ts: "ldexp(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
8877
8878 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
8879 inherit_expression_dependencies(dst: id, source: args[0]);
8880 inherit_expression_dependencies(dst: id, source: args[1]);
8881 break;
8882 }
8883
8884 case GLSLstd450PackSnorm4x8:
8885 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm4x8");
8886 break;
8887 case GLSLstd450PackUnorm4x8:
8888 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm4x8");
8889 break;
8890 case GLSLstd450PackSnorm2x16:
8891 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm2x16");
8892 break;
8893 case GLSLstd450PackUnorm2x16:
8894 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm2x16");
8895 break;
8896 case GLSLstd450PackHalf2x16:
8897 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packHalf2x16");
8898 break;
8899 case GLSLstd450UnpackSnorm4x8:
8900 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm4x8");
8901 break;
8902 case GLSLstd450UnpackUnorm4x8:
8903 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm4x8");
8904 break;
8905 case GLSLstd450UnpackSnorm2x16:
8906 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm2x16");
8907 break;
8908 case GLSLstd450UnpackUnorm2x16:
8909 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm2x16");
8910 break;
8911 case GLSLstd450UnpackHalf2x16:
8912 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackHalf2x16");
8913 break;
8914
8915 case GLSLstd450PackDouble2x32:
8916 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packDouble2x32");
8917 break;
8918 case GLSLstd450UnpackDouble2x32:
8919 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackDouble2x32");
8920 break;
8921
8922 // Vector math
8923 case GLSLstd450Length:
8924 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "length");
8925 break;
8926 case GLSLstd450Distance:
8927 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "distance");
8928 break;
8929 case GLSLstd450Cross:
8930 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "cross");
8931 break;
8932 case GLSLstd450Normalize:
8933 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "normalize");
8934 break;
8935 case GLSLstd450FaceForward:
8936 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "faceforward");
8937 break;
8938 case GLSLstd450Reflect:
8939 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "reflect");
8940 break;
8941 case GLSLstd450Refract:
8942 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "refract");
8943 break;
8944
8945 // Bit-fiddling
8946 case GLSLstd450FindILsb:
8947 // findLSB always returns int.
8948 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findLSB", input_type: expression_type(id: args[0]).basetype, expected_result_type: int_type);
8949 break;
8950
8951 case GLSLstd450FindSMsb:
8952 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: int_type, expected_result_type: int_type);
8953 break;
8954
8955 case GLSLstd450FindUMsb:
8956 emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: uint_type,
8957 expected_result_type: int_type); // findMSB always returns int.
8958 break;
8959
8960 // Multisampled varying
8961 case GLSLstd450InterpolateAtCentroid:
8962 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "interpolateAtCentroid");
8963 break;
8964 case GLSLstd450InterpolateAtSample:
8965 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtSample");
8966 break;
8967 case GLSLstd450InterpolateAtOffset:
8968 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtOffset");
8969 break;
8970
8971 case GLSLstd450NMin:
8972 case GLSLstd450NMax:
8973 {
8974 if (options.vulkan_semantics)
8975 {
8976 require_extension_internal(ext: "GL_EXT_spirv_intrinsics");
8977 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
8978 Polyfill poly = {};
8979 switch (get<SPIRType>(id: result_type).width)
8980 {
8981 case 16:
8982 poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16;
8983 break;
8984
8985 case 32:
8986 poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32;
8987 break;
8988
8989 case 64:
8990 poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64;
8991 break;
8992
8993 default:
8994 SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
8995 }
8996
8997 require_polyfill(polyfill: poly, relaxed);
8998
8999 // Function return decorations are broken, so need to do double polyfill.
9000 if (relaxed)
9001 require_polyfill(polyfill: poly, relaxed: false);
9002
9003 const char *op_str;
9004 if (relaxed)
9005 op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed";
9006 else
9007 op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax";
9008
9009 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: op_str);
9010 }
9011 else
9012 {
9013 emit_nminmax_op(result_type, id, op0: args[0], op1: args[1], op);
9014 }
9015 break;
9016 }
9017
9018 case GLSLstd450NClamp:
9019 {
9020 if (options.vulkan_semantics)
9021 {
9022 require_extension_internal(ext: "GL_EXT_spirv_intrinsics");
9023 bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision);
9024 Polyfill poly = {};
9025 switch (get<SPIRType>(id: result_type).width)
9026 {
9027 case 16:
9028 poly = PolyfillNClamp16;
9029 break;
9030
9031 case 32:
9032 poly = PolyfillNClamp32;
9033 break;
9034
9035 case 64:
9036 poly = PolyfillNClamp64;
9037 break;
9038
9039 default:
9040 SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
9041 }
9042
9043 require_polyfill(polyfill: poly, relaxed);
9044
9045 // Function return decorations are broken, so need to do double polyfill.
9046 if (relaxed)
9047 require_polyfill(polyfill: poly, relaxed: false);
9048
9049 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: relaxed ? "spvNClampRelaxed" : "spvNClamp");
9050 }
9051 else
9052 {
9053 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
9054 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
9055 uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
9056 if (!max_id)
9057 max_id = ir.increase_bound_by(count: 1);
9058
9059 // Inherit precision qualifiers.
9060 ir.meta[max_id] = ir.meta[id];
9061
9062 emit_nminmax_op(result_type, id: max_id, op0: args[0], op1: args[1], op: GLSLstd450NMax);
9063 emit_nminmax_op(result_type, id, op0: max_id, op1: args[2], op: GLSLstd450NMin);
9064 }
9065 break;
9066 }
9067
9068 default:
9069 statement(ts: "// unimplemented GLSL op ", ts&: eop);
9070 break;
9071 }
9072}
9073
9074void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
9075{
9076 // Need to emulate this call.
9077 uint32_t &ids = extra_sub_expressions[id];
9078 if (!ids)
9079 {
9080 ids = ir.increase_bound_by(count: 5);
9081 auto btype = get<SPIRType>(id: result_type);
9082 btype.basetype = SPIRType::Boolean;
9083 set<SPIRType>(id: ids, args&: btype);
9084 }
9085
9086 uint32_t btype_id = ids + 0;
9087 uint32_t left_nan_id = ids + 1;
9088 uint32_t right_nan_id = ids + 2;
9089 uint32_t tmp_id = ids + 3;
9090 uint32_t mixed_first_id = ids + 4;
9091
9092 // Inherit precision qualifiers.
9093 ir.meta[tmp_id] = ir.meta[id];
9094 ir.meta[mixed_first_id] = ir.meta[id];
9095
9096 if (!is_legacy())
9097 {
9098 emit_unary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op: "isnan");
9099 emit_unary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op: "isnan");
9100 }
9101 else if (expression_type(id: op0).vecsize > 1)
9102 {
9103 // If the number doesn't equal itself, it must be NaN
9104 emit_binary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "notEqual");
9105 emit_binary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "notEqual");
9106 }
9107 else
9108 {
9109 emit_binary_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "!=");
9110 emit_binary_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "!=");
9111 }
9112 emit_binary_func_op(result_type, result_id: tmp_id, op0, op1, op: op == GLSLstd450NMin ? "min" : "max");
9113 emit_mix_op(result_type, id: mixed_first_id, left: tmp_id, right: op1, lerp: left_nan_id);
9114 emit_mix_op(result_type, id, left: mixed_first_id, right: op0, lerp: right_nan_id);
9115}
9116
9117void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
9118{
9119 const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
9120 std::string expr;
9121 bool forward = should_forward(id: op0);
9122
9123 switch (op)
9124 {
9125 case GLSLstd450Asinh:
9126 expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(",
9127 ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " + ", ts&: one, ts: "))");
9128 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
9129 break;
9130
9131 case GLSLstd450Acosh:
9132 expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(",
9133 ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " - ", ts&: one, ts: "))");
9134 break;
9135
9136 case GLSLstd450Atanh:
9137 expr = join(ts: "log((", ts&: one, ts: " + ", ts: to_enclosed_expression(id: op0), ts: ") / "
9138 "(", ts&: one, ts: " - ", ts: to_enclosed_expression(id: op0), ts: ")) * 0.5",
9139 ts: backend.float_literal_suffix ? "f" : "");
9140 break;
9141
9142 default:
9143 SPIRV_CROSS_THROW("Invalid op.");
9144 }
9145
9146 emit_op(result_type, result_id: id, rhs: expr, forwarding: forward);
9147 inherit_expression_dependencies(dst: id, source: op0);
9148}
9149
9150void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
9151 uint32_t)
9152{
9153 require_extension_internal(ext: "GL_AMD_shader_ballot");
9154
9155 enum AMDShaderBallot
9156 {
9157 SwizzleInvocationsAMD = 1,
9158 SwizzleInvocationsMaskedAMD = 2,
9159 WriteInvocationAMD = 3,
9160 MbcntAMD = 4
9161 };
9162
9163 auto op = static_cast<AMDShaderBallot>(eop);
9164
9165 switch (op)
9166 {
9167 case SwizzleInvocationsAMD:
9168 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsAMD");
9169 register_control_dependent_expression(expr: id);
9170 break;
9171
9172 case SwizzleInvocationsMaskedAMD:
9173 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsMaskedAMD");
9174 register_control_dependent_expression(expr: id);
9175 break;
9176
9177 case WriteInvocationAMD:
9178 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "writeInvocationAMD");
9179 register_control_dependent_expression(expr: id);
9180 break;
9181
9182 case MbcntAMD:
9183 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "mbcntAMD");
9184 register_control_dependent_expression(expr: id);
9185 break;
9186
9187 default:
9188 statement(ts: "// unimplemented SPV AMD shader ballot op ", ts&: eop);
9189 break;
9190 }
9191}
9192
9193void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
9194 const uint32_t *args, uint32_t)
9195{
9196 require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter");
9197
9198 enum AMDShaderExplicitVertexParameter
9199 {
9200 InterpolateAtVertexAMD = 1
9201 };
9202
9203 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
9204
9205 switch (op)
9206 {
9207 case InterpolateAtVertexAMD:
9208 emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtVertexAMD");
9209 break;
9210
9211 default:
9212 statement(ts: "// unimplemented SPV AMD shader explicit vertex parameter op ", ts&: eop);
9213 break;
9214 }
9215}
9216
9217void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
9218 const uint32_t *args, uint32_t)
9219{
9220 require_extension_internal(ext: "GL_AMD_shader_trinary_minmax");
9221
9222 enum AMDShaderTrinaryMinMax
9223 {
9224 FMin3AMD = 1,
9225 UMin3AMD = 2,
9226 SMin3AMD = 3,
9227 FMax3AMD = 4,
9228 UMax3AMD = 5,
9229 SMax3AMD = 6,
9230 FMid3AMD = 7,
9231 UMid3AMD = 8,
9232 SMid3AMD = 9
9233 };
9234
9235 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
9236
9237 switch (op)
9238 {
9239 case FMin3AMD:
9240 case UMin3AMD:
9241 case SMin3AMD:
9242 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "min3");
9243 break;
9244
9245 case FMax3AMD:
9246 case UMax3AMD:
9247 case SMax3AMD:
9248 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "max3");
9249 break;
9250
9251 case FMid3AMD:
9252 case UMid3AMD:
9253 case SMid3AMD:
9254 emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "mid3");
9255 break;
9256
9257 default:
9258 statement(ts: "// unimplemented SPV AMD shader trinary minmax op ", ts&: eop);
9259 break;
9260 }
9261}
9262
9263void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
9264 uint32_t)
9265{
9266 require_extension_internal(ext: "GL_AMD_gcn_shader");
9267
9268 enum AMDGCNShader
9269 {
9270 CubeFaceIndexAMD = 1,
9271 CubeFaceCoordAMD = 2,
9272 TimeAMD = 3
9273 };
9274
9275 auto op = static_cast<AMDGCNShader>(eop);
9276
9277 switch (op)
9278 {
9279 case CubeFaceIndexAMD:
9280 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceIndexAMD");
9281 break;
9282 case CubeFaceCoordAMD:
9283 emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceCoordAMD");
9284 break;
9285 case TimeAMD:
9286 {
9287 string expr = "timeAMD()";
9288 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
9289 register_control_dependent_expression(expr: id);
9290 break;
9291 }
9292
9293 default:
9294 statement(ts: "// unimplemented SPV AMD gcn shader op ", ts&: eop);
9295 break;
9296 }
9297}
9298
9299void CompilerGLSL::emit_subgroup_op(const Instruction &i)
9300{
9301 const uint32_t *ops = stream(instr: i);
9302 auto op = static_cast<Op>(i.op);
9303
9304 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
9305 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
9306
9307 // If we need to do implicit bitcasts, make sure we do it with the correct type.
9308 uint32_t integer_width = get_integer_width_for_instruction(instr: i);
9309 auto int_type = to_signed_basetype(width: integer_width);
9310 auto uint_type = to_unsigned_basetype(width: integer_width);
9311
9312 switch (op)
9313 {
9314 case OpGroupNonUniformElect:
9315 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupElect);
9316 break;
9317
9318 case OpGroupNonUniformBallotBitCount:
9319 {
9320 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
9321 if (operation == GroupOperationReduce)
9322 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
9323 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
9324 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
9325 }
9326 break;
9327
9328 case OpGroupNonUniformBallotBitExtract:
9329 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
9330 break;
9331
9332 case OpGroupNonUniformInverseBallot:
9333 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
9334 break;
9335
9336 case OpGroupNonUniformBallot:
9337 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallot);
9338 break;
9339
9340 case OpGroupNonUniformBallotFindLSB:
9341 case OpGroupNonUniformBallotFindMSB:
9342 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
9343 break;
9344
9345 case OpGroupNonUniformBroadcast:
9346 case OpGroupNonUniformBroadcastFirst:
9347 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
9348 break;
9349
9350 case OpGroupNonUniformShuffle:
9351 case OpGroupNonUniformShuffleXor:
9352 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle");
9353 break;
9354
9355 case OpGroupNonUniformShuffleUp:
9356 case OpGroupNonUniformShuffleDown:
9357 require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle_relative");
9358 break;
9359
9360 case OpGroupNonUniformAll:
9361 case OpGroupNonUniformAny:
9362 case OpGroupNonUniformAllEqual:
9363 {
9364 const SPIRType &type = expression_type(id: ops[3]);
9365 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
9366 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
9367 else
9368 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAllEqualT);
9369 }
9370 break;
9371
9372 // clang-format off
9373#define GLSL_GROUP_OP(OP)\
9374 case OpGroupNonUniform##OP:\
9375 {\
9376 auto operation = static_cast<GroupOperation>(ops[3]);\
9377 if (operation == GroupOperationClusteredReduce)\
9378 require_extension_internal("GL_KHR_shader_subgroup_clustered");\
9379 else if (operation == GroupOperationReduce)\
9380 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
9381 else if (operation == GroupOperationExclusiveScan)\
9382 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
9383 else if (operation == GroupOperationInclusiveScan)\
9384 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
9385 else\
9386 SPIRV_CROSS_THROW("Invalid group operation.");\
9387 break;\
9388 }
9389
9390 GLSL_GROUP_OP(IAdd)
9391 GLSL_GROUP_OP(FAdd)
9392 GLSL_GROUP_OP(IMul)
9393 GLSL_GROUP_OP(FMul)
9394
9395#undef GLSL_GROUP_OP
9396 // clang-format on
9397
9398 case OpGroupNonUniformFMin:
9399 case OpGroupNonUniformFMax:
9400 case OpGroupNonUniformSMin:
9401 case OpGroupNonUniformSMax:
9402 case OpGroupNonUniformUMin:
9403 case OpGroupNonUniformUMax:
9404 case OpGroupNonUniformBitwiseAnd:
9405 case OpGroupNonUniformBitwiseOr:
9406 case OpGroupNonUniformBitwiseXor:
9407 case OpGroupNonUniformLogicalAnd:
9408 case OpGroupNonUniformLogicalOr:
9409 case OpGroupNonUniformLogicalXor:
9410 {
9411 auto operation = static_cast<GroupOperation>(ops[3]);
9412 if (operation == GroupOperationClusteredReduce)
9413 {
9414 require_extension_internal(ext: "GL_KHR_shader_subgroup_clustered");
9415 }
9416 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
9417 operation == GroupOperationReduce)
9418 {
9419 require_extension_internal(ext: "GL_KHR_shader_subgroup_arithmetic");
9420 }
9421 else
9422 SPIRV_CROSS_THROW("Invalid group operation.");
9423 break;
9424 }
9425
9426 case OpGroupNonUniformQuadSwap:
9427 case OpGroupNonUniformQuadBroadcast:
9428 require_extension_internal(ext: "GL_KHR_shader_subgroup_quad");
9429 break;
9430
9431 default:
9432 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
9433 }
9434
9435 uint32_t result_type = ops[0];
9436 uint32_t id = ops[1];
9437
9438 auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2]));
9439 if (scope != ScopeSubgroup)
9440 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
9441
9442 switch (op)
9443 {
9444 case OpGroupNonUniformElect:
9445 emit_op(result_type, result_id: id, rhs: "subgroupElect()", forwarding: true);
9446 break;
9447
9448 case OpGroupNonUniformBroadcast:
9449 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBroadcast");
9450 break;
9451
9452 case OpGroupNonUniformBroadcastFirst:
9453 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBroadcastFirst");
9454 break;
9455
9456 case OpGroupNonUniformBallot:
9457 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallot");
9458 break;
9459
9460 case OpGroupNonUniformInverseBallot:
9461 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupInverseBallot");
9462 break;
9463
9464 case OpGroupNonUniformBallotBitExtract:
9465 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBallotBitExtract");
9466 break;
9467
9468 case OpGroupNonUniformBallotFindLSB:
9469 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindLSB");
9470 break;
9471
9472 case OpGroupNonUniformBallotFindMSB:
9473 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindMSB");
9474 break;
9475
9476 case OpGroupNonUniformBallotBitCount:
9477 {
9478 auto operation = static_cast<GroupOperation>(ops[3]);
9479 if (operation == GroupOperationReduce)
9480 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotBitCount");
9481 else if (operation == GroupOperationInclusiveScan)
9482 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotInclusiveBitCount");
9483 else if (operation == GroupOperationExclusiveScan)
9484 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotExclusiveBitCount");
9485 else
9486 SPIRV_CROSS_THROW("Invalid BitCount operation.");
9487 break;
9488 }
9489
9490 case OpGroupNonUniformShuffle:
9491 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffle");
9492 break;
9493
9494 case OpGroupNonUniformShuffleXor:
9495 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleXor");
9496 break;
9497
9498 case OpGroupNonUniformShuffleUp:
9499 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleUp");
9500 break;
9501
9502 case OpGroupNonUniformShuffleDown:
9503 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleDown");
9504 break;
9505
9506 case OpGroupNonUniformAll:
9507 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAll");
9508 break;
9509
9510 case OpGroupNonUniformAny:
9511 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAny");
9512 break;
9513
9514 case OpGroupNonUniformAllEqual:
9515 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAllEqual");
9516 break;
9517
9518 // clang-format off
9519#define GLSL_GROUP_OP(op, glsl_op) \
9520case OpGroupNonUniform##op: \
9521 { \
9522 auto operation = static_cast<GroupOperation>(ops[3]); \
9523 if (operation == GroupOperationReduce) \
9524 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
9525 else if (operation == GroupOperationInclusiveScan) \
9526 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
9527 else if (operation == GroupOperationExclusiveScan) \
9528 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
9529 else if (operation == GroupOperationClusteredReduce) \
9530 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
9531 else \
9532 SPIRV_CROSS_THROW("Invalid group operation."); \
9533 break; \
9534 }
9535
9536#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
9537case OpGroupNonUniform##op: \
9538 { \
9539 auto operation = static_cast<GroupOperation>(ops[3]); \
9540 if (operation == GroupOperationReduce) \
9541 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
9542 else if (operation == GroupOperationInclusiveScan) \
9543 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
9544 else if (operation == GroupOperationExclusiveScan) \
9545 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
9546 else if (operation == GroupOperationClusteredReduce) \
9547 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
9548 else \
9549 SPIRV_CROSS_THROW("Invalid group operation."); \
9550 break; \
9551 }
9552
9553 GLSL_GROUP_OP(FAdd, Add)
9554 GLSL_GROUP_OP(FMul, Mul)
9555 GLSL_GROUP_OP(FMin, Min)
9556 GLSL_GROUP_OP(FMax, Max)
9557 GLSL_GROUP_OP(IAdd, Add)
9558 GLSL_GROUP_OP(IMul, Mul)
9559 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
9560 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
9561 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
9562 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
9563 GLSL_GROUP_OP(BitwiseAnd, And)
9564 GLSL_GROUP_OP(BitwiseOr, Or)
9565 GLSL_GROUP_OP(BitwiseXor, Xor)
9566 GLSL_GROUP_OP(LogicalAnd, And)
9567 GLSL_GROUP_OP(LogicalOr, Or)
9568 GLSL_GROUP_OP(LogicalXor, Xor)
9569#undef GLSL_GROUP_OP
9570#undef GLSL_GROUP_OP_CAST
9571 // clang-format on
9572
9573 case OpGroupNonUniformQuadSwap:
9574 {
9575 uint32_t direction = evaluate_constant_u32(id: ops[4]);
9576 if (direction == 0)
9577 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapHorizontal");
9578 else if (direction == 1)
9579 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapVertical");
9580 else if (direction == 2)
9581 emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapDiagonal");
9582 else
9583 SPIRV_CROSS_THROW("Invalid quad swap direction.");
9584 break;
9585 }
9586
9587 case OpGroupNonUniformQuadBroadcast:
9588 {
9589 emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupQuadBroadcast");
9590 break;
9591 }
9592
9593 default:
9594 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
9595 }
9596
9597 register_control_dependent_expression(expr: id);
9598}
9599
9600string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
9601{
9602 // OpBitcast can deal with pointers.
9603 if (out_type.pointer || in_type.pointer)
9604 {
9605 if (out_type.vecsize == 2 || in_type.vecsize == 2)
9606 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
9607 return type_to_glsl(type: out_type);
9608 }
9609
9610 if (out_type.basetype == in_type.basetype)
9611 return "";
9612
9613 assert(out_type.basetype != SPIRType::Boolean);
9614 assert(in_type.basetype != SPIRType::Boolean);
9615
9616 bool integral_cast = type_is_integral(type: out_type) && type_is_integral(type: in_type);
9617 bool same_size_cast = out_type.width == in_type.width;
9618
9619 // Trivial bitcast case, casts between integers.
9620 if (integral_cast && same_size_cast)
9621 return type_to_glsl(type: out_type);
9622
9623 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
9624 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
9625 return "unpack8";
9626 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
9627 return "pack16";
9628 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
9629 return "pack32";
9630
9631 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
9632 // 16-bit, 32-bit and 64-bit floats.
9633 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
9634 {
9635 if (is_legacy_es())
9636 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
9637 else if (!options.es && options.version < 330)
9638 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9639 return "floatBitsToUint";
9640 }
9641 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
9642 {
9643 if (is_legacy_es())
9644 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
9645 else if (!options.es && options.version < 330)
9646 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9647 return "floatBitsToInt";
9648 }
9649 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
9650 {
9651 if (is_legacy_es())
9652 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
9653 else if (!options.es && options.version < 330)
9654 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9655 return "uintBitsToFloat";
9656 }
9657 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
9658 {
9659 if (is_legacy_es())
9660 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
9661 else if (!options.es && options.version < 330)
9662 require_extension_internal(ext: "GL_ARB_shader_bit_encoding");
9663 return "intBitsToFloat";
9664 }
9665
9666 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
9667 return "doubleBitsToInt64";
9668 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
9669 return "doubleBitsToUint64";
9670 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
9671 return "int64BitsToDouble";
9672 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
9673 return "uint64BitsToDouble";
9674 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
9675 return "float16BitsToInt16";
9676 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
9677 return "float16BitsToUint16";
9678 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
9679 return "int16BitsToFloat16";
9680 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
9681 return "uint16BitsToFloat16";
9682
9683 // And finally, some even more special purpose casts.
9684 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
9685 return "packUint2x32";
9686 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
9687 return "unpackUint2x32";
9688 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
9689 return "unpackFloat2x16";
9690 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
9691 return "packFloat2x16";
9692 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
9693 return "packInt2x16";
9694 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
9695 return "unpackInt2x16";
9696 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
9697 return "packUint2x16";
9698 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
9699 return "unpackUint2x16";
9700 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
9701 return "packInt4x16";
9702 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
9703 return "unpackInt4x16";
9704 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
9705 return "packUint4x16";
9706 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
9707 return "unpackUint4x16";
9708
9709 return "";
9710}
9711
9712string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
9713{
9714 auto op = bitcast_glsl_op(out_type: result_type, in_type: expression_type(id: argument));
9715 if (op.empty())
9716 return to_enclosed_unpacked_expression(id: argument);
9717 else
9718 return join(ts&: op, ts: "(", ts: to_unpacked_expression(id: argument), ts: ")");
9719}
9720
9721std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
9722{
9723 auto expr = to_expression(id: arg);
9724 auto &src_type = expression_type(id: arg);
9725 if (src_type.basetype != target_type)
9726 {
9727 auto target = src_type;
9728 target.basetype = target_type;
9729 expr = join(ts: bitcast_glsl_op(out_type: target, in_type: src_type), ts: "(", ts&: expr, ts: ")");
9730 }
9731
9732 return expr;
9733}
9734
9735std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
9736 const std::string &expr)
9737{
9738 if (target_type.basetype == expr_type)
9739 return expr;
9740
9741 auto src_type = target_type;
9742 src_type.basetype = expr_type;
9743 return join(ts: bitcast_glsl_op(out_type: target_type, in_type: src_type), ts: "(", ts: expr, ts: ")");
9744}
9745
9746string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
9747{
9748 switch (builtin)
9749 {
9750 case BuiltInPosition:
9751 return "gl_Position";
9752 case BuiltInPointSize:
9753 return "gl_PointSize";
9754 case BuiltInClipDistance:
9755 {
9756 if (options.es)
9757 require_extension_internal(ext: "GL_EXT_clip_cull_distance");
9758 return "gl_ClipDistance";
9759 }
9760 case BuiltInCullDistance:
9761 {
9762 if (options.es)
9763 require_extension_internal(ext: "GL_EXT_clip_cull_distance");
9764 return "gl_CullDistance";
9765 }
9766 case BuiltInVertexId:
9767 if (options.vulkan_semantics)
9768 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
9769 "with GL semantics.");
9770 return "gl_VertexID";
9771 case BuiltInInstanceId:
9772 if (options.vulkan_semantics)
9773 {
9774 auto model = get_entry_point().model;
9775 switch (model)
9776 {
9777 case spv::ExecutionModelIntersectionKHR:
9778 case spv::ExecutionModelAnyHitKHR:
9779 case spv::ExecutionModelClosestHitKHR:
9780 // gl_InstanceID is allowed in these shaders.
9781 break;
9782
9783 default:
9784 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
9785 "created with GL semantics.");
9786 }
9787 }
9788 if (!options.es && options.version < 140)
9789 {
9790 require_extension_internal(ext: "GL_ARB_draw_instanced");
9791 }
9792 return "gl_InstanceID";
9793 case BuiltInVertexIndex:
9794 if (options.vulkan_semantics)
9795 return "gl_VertexIndex";
9796 else
9797 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
9798 case BuiltInInstanceIndex:
9799 if (options.vulkan_semantics)
9800 return "gl_InstanceIndex";
9801
9802 if (!options.es && options.version < 140)
9803 {
9804 require_extension_internal(ext: "GL_ARB_draw_instanced");
9805 }
9806
9807 if (options.vertex.support_nonzero_base_instance)
9808 {
9809 if (!options.vulkan_semantics)
9810 {
9811 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
9812 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9813 }
9814 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
9815 }
9816 else
9817 return "gl_InstanceID";
9818 case BuiltInPrimitiveId:
9819 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
9820 return "gl_PrimitiveIDIn";
9821 else
9822 return "gl_PrimitiveID";
9823 case BuiltInInvocationId:
9824 return "gl_InvocationID";
9825 case BuiltInLayer:
9826 return "gl_Layer";
9827 case BuiltInViewportIndex:
9828 return "gl_ViewportIndex";
9829 case BuiltInTessLevelOuter:
9830 return "gl_TessLevelOuter";
9831 case BuiltInTessLevelInner:
9832 return "gl_TessLevelInner";
9833 case BuiltInTessCoord:
9834 return "gl_TessCoord";
9835 case BuiltInPatchVertices:
9836 return "gl_PatchVerticesIn";
9837 case BuiltInFragCoord:
9838 return "gl_FragCoord";
9839 case BuiltInPointCoord:
9840 return "gl_PointCoord";
9841 case BuiltInFrontFacing:
9842 return "gl_FrontFacing";
9843 case BuiltInFragDepth:
9844 return "gl_FragDepth";
9845 case BuiltInNumWorkgroups:
9846 return "gl_NumWorkGroups";
9847 case BuiltInWorkgroupSize:
9848 return "gl_WorkGroupSize";
9849 case BuiltInWorkgroupId:
9850 return "gl_WorkGroupID";
9851 case BuiltInLocalInvocationId:
9852 return "gl_LocalInvocationID";
9853 case BuiltInGlobalInvocationId:
9854 return "gl_GlobalInvocationID";
9855 case BuiltInLocalInvocationIndex:
9856 return "gl_LocalInvocationIndex";
9857 case BuiltInHelperInvocation:
9858 return "gl_HelperInvocation";
9859
9860 case BuiltInBaseVertex:
9861 if (options.es)
9862 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
9863
9864 if (options.vulkan_semantics)
9865 {
9866 if (options.version < 460)
9867 {
9868 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9869 return "gl_BaseVertexARB";
9870 }
9871 return "gl_BaseVertex";
9872 }
9873 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9874 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9875 return "SPIRV_Cross_BaseVertex";
9876
9877 case BuiltInBaseInstance:
9878 if (options.es)
9879 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
9880
9881 if (options.vulkan_semantics)
9882 {
9883 if (options.version < 460)
9884 {
9885 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9886 return "gl_BaseInstanceARB";
9887 }
9888 return "gl_BaseInstance";
9889 }
9890 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9891 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9892 return "SPIRV_Cross_BaseInstance";
9893
9894 case BuiltInDrawIndex:
9895 if (options.es)
9896 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
9897
9898 if (options.vulkan_semantics)
9899 {
9900 if (options.version < 460)
9901 {
9902 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9903 return "gl_DrawIDARB";
9904 }
9905 return "gl_DrawID";
9906 }
9907 // On regular GL, this is soft-enabled and we emit ifdefs in code.
9908 require_extension_internal(ext: "GL_ARB_shader_draw_parameters");
9909 return "gl_DrawIDARB";
9910
9911 case BuiltInSampleId:
9912 if (is_legacy())
9913 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9914 else if (options.es && options.version < 320)
9915 require_extension_internal(ext: "GL_OES_sample_variables");
9916 else if (!options.es && options.version < 400)
9917 require_extension_internal(ext: "GL_ARB_sample_shading");
9918 return "gl_SampleID";
9919
9920 case BuiltInSampleMask:
9921 if (is_legacy())
9922 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9923 else if (options.es && options.version < 320)
9924 require_extension_internal(ext: "GL_OES_sample_variables");
9925 else if (!options.es && options.version < 400)
9926 require_extension_internal(ext: "GL_ARB_sample_shading");
9927
9928 if (storage == StorageClassInput)
9929 return "gl_SampleMaskIn";
9930 else
9931 return "gl_SampleMask";
9932
9933 case BuiltInSamplePosition:
9934 if (is_legacy())
9935 SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
9936 else if (options.es && options.version < 320)
9937 require_extension_internal(ext: "GL_OES_sample_variables");
9938 else if (!options.es && options.version < 400)
9939 require_extension_internal(ext: "GL_ARB_sample_shading");
9940 return "gl_SamplePosition";
9941
9942 case BuiltInViewIndex:
9943 if (options.vulkan_semantics)
9944 return "gl_ViewIndex";
9945 else
9946 return "gl_ViewID_OVR";
9947
9948 case BuiltInNumSubgroups:
9949 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::NumSubgroups);
9950 return "gl_NumSubgroups";
9951
9952 case BuiltInSubgroupId:
9953 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupID);
9954 return "gl_SubgroupID";
9955
9956 case BuiltInSubgroupSize:
9957 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupSize);
9958 return "gl_SubgroupSize";
9959
9960 case BuiltInSubgroupLocalInvocationId:
9961 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInvocationID);
9962 return "gl_SubgroupInvocationID";
9963
9964 case BuiltInSubgroupEqMask:
9965 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9966 return "gl_SubgroupEqMask";
9967
9968 case BuiltInSubgroupGeMask:
9969 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9970 return "gl_SubgroupGeMask";
9971
9972 case BuiltInSubgroupGtMask:
9973 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9974 return "gl_SubgroupGtMask";
9975
9976 case BuiltInSubgroupLeMask:
9977 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9978 return "gl_SubgroupLeMask";
9979
9980 case BuiltInSubgroupLtMask:
9981 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask);
9982 return "gl_SubgroupLtMask";
9983
9984 case BuiltInLaunchIdKHR:
9985 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
9986 case BuiltInLaunchSizeKHR:
9987 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
9988 case BuiltInWorldRayOriginKHR:
9989 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
9990 case BuiltInWorldRayDirectionKHR:
9991 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
9992 case BuiltInObjectRayOriginKHR:
9993 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
9994 case BuiltInObjectRayDirectionKHR:
9995 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
9996 case BuiltInRayTminKHR:
9997 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
9998 case BuiltInRayTmaxKHR:
9999 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
10000 case BuiltInInstanceCustomIndexKHR:
10001 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
10002 case BuiltInObjectToWorldKHR:
10003 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
10004 case BuiltInWorldToObjectKHR:
10005 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
10006 case BuiltInHitTNV:
10007 // gl_HitTEXT is an alias of RayTMax in KHR.
10008 return "gl_HitTNV";
10009 case BuiltInHitKindKHR:
10010 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
10011 case BuiltInIncomingRayFlagsKHR:
10012 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
10013
10014 case BuiltInBaryCoordKHR:
10015 {
10016 if (options.es && options.version < 320)
10017 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
10018 else if (!options.es && options.version < 450)
10019 SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
10020
10021 if (barycentric_is_nv)
10022 {
10023 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
10024 return "gl_BaryCoordNV";
10025 }
10026 else
10027 {
10028 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
10029 return "gl_BaryCoordEXT";
10030 }
10031 }
10032
10033 case BuiltInBaryCoordNoPerspNV:
10034 {
10035 if (options.es && options.version < 320)
10036 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
10037 else if (!options.es && options.version < 450)
10038 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
10039
10040 if (barycentric_is_nv)
10041 {
10042 require_extension_internal(ext: "GL_NV_fragment_shader_barycentric");
10043 return "gl_BaryCoordNoPerspNV";
10044 }
10045 else
10046 {
10047 require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric");
10048 return "gl_BaryCoordNoPerspEXT";
10049 }
10050 }
10051
10052 case BuiltInFragStencilRefEXT:
10053 {
10054 if (!options.es)
10055 {
10056 require_extension_internal(ext: "GL_ARB_shader_stencil_export");
10057 return "gl_FragStencilRefARB";
10058 }
10059 else
10060 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
10061 }
10062
10063 case BuiltInPrimitiveShadingRateKHR:
10064 {
10065 if (!options.vulkan_semantics)
10066 SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
10067 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
10068 return "gl_PrimitiveShadingRateEXT";
10069 }
10070
10071 case BuiltInShadingRateKHR:
10072 {
10073 if (!options.vulkan_semantics)
10074 SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
10075 require_extension_internal(ext: "GL_EXT_fragment_shading_rate");
10076 return "gl_ShadingRateEXT";
10077 }
10078
10079 case BuiltInDeviceIndex:
10080 if (!options.vulkan_semantics)
10081 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
10082 require_extension_internal(ext: "GL_EXT_device_group");
10083 return "gl_DeviceIndex";
10084
10085 case BuiltInFullyCoveredEXT:
10086 if (!options.es)
10087 require_extension_internal(ext: "GL_NV_conservative_raster_underestimation");
10088 else
10089 SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
10090 return "gl_FragFullyCoveredNV";
10091
10092 case BuiltInPrimitiveTriangleIndicesEXT:
10093 return "gl_PrimitiveTriangleIndicesEXT";
10094 case BuiltInPrimitiveLineIndicesEXT:
10095 return "gl_PrimitiveLineIndicesEXT";
10096 case BuiltInPrimitivePointIndicesEXT:
10097 return "gl_PrimitivePointIndicesEXT";
10098 case BuiltInCullPrimitiveEXT:
10099 return "gl_CullPrimitiveEXT";
10100
10101 default:
10102 return join(ts: "gl_BuiltIn_", ts: convert_to_string(t: builtin));
10103 }
10104}
10105
10106const char *CompilerGLSL::index_to_swizzle(uint32_t index)
10107{
10108 switch (index)
10109 {
10110 case 0:
10111 return "x";
10112 case 1:
10113 return "y";
10114 case 2:
10115 return "z";
10116 case 3:
10117 return "w";
10118 default:
10119 return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
10120 }
10121}
10122
10123void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
10124 AccessChainFlags flags, bool &access_chain_is_arrayed,
10125 uint32_t index)
10126{
10127 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
10128 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
10129 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
10130
10131 string idx_expr = index_is_literal ? convert_to_string(t: index) : to_unpacked_expression(id: index, register_expression_read);
10132
10133 // For the case where the base of an OpPtrAccessChain already ends in [n],
10134 // we need to use the index as an offset to the existing index, otherwise,
10135 // we can just use the index directly.
10136 if (ptr_chain && access_chain_is_arrayed)
10137 {
10138 size_t split_pos = expr.find_last_of(c: ']');
10139 size_t enclose_split = expr.find_last_of(c: ')');
10140
10141 // If we have already enclosed the expression, don't try to be clever, it will break.
10142 if (split_pos > enclose_split || enclose_split == string::npos)
10143 {
10144 string expr_front = expr.substr(pos: 0, n: split_pos);
10145 string expr_back = expr.substr(pos: split_pos);
10146 expr = expr_front + " + " + enclose_expression(expr: idx_expr) + expr_back;
10147 return;
10148 }
10149 }
10150
10151 expr += "[";
10152 expr += idx_expr;
10153 expr += "]";
10154}
10155
10156bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
10157{
10158 return true;
10159}
10160
10161string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
10162 AccessChainFlags flags, AccessChainMeta *meta)
10163{
10164 string expr;
10165
10166 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
10167 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
10168 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
10169 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
10170 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
10171 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
10172
10173 if (!chain_only)
10174 {
10175 // We handle transpose explicitly, so don't resolve that here.
10176 auto *e = maybe_get<SPIRExpression>(id: base);
10177 bool old_transpose = e && e->need_transpose;
10178 if (e)
10179 e->need_transpose = false;
10180 expr = to_enclosed_expression(id: base, register_expression_read);
10181 if (e)
10182 e->need_transpose = old_transpose;
10183 }
10184
10185 // Start traversing type hierarchy at the proper non-pointer types,
10186 // but keep type_id referencing the original pointer for use below.
10187 uint32_t type_id = expression_type_id(id: base);
10188 const auto *type = &get_pointee_type(type_id);
10189
10190 if (!backend.native_pointers)
10191 {
10192 if (ptr_chain)
10193 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
10194
10195 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
10196 // continuing the access chain.
10197 if (should_dereference(id: base))
10198 expr = dereference_expression(expr_type: get<SPIRType>(id: type_id), expr);
10199 }
10200 else if (should_dereference(id: base) && type->basetype != SPIRType::Struct && !ptr_chain)
10201 expr = join(ts: "(", ts: dereference_expression(expr_type: *type, expr), ts: ")");
10202
10203 bool access_chain_is_arrayed = expr.find_first_of(c: '[') != string::npos;
10204 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(id: base);
10205 bool is_packed = has_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypePacked);
10206 uint32_t physical_type = get_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypeID);
10207 bool is_invariant = has_decoration(id: base, decoration: DecorationInvariant);
10208 bool relaxed_precision = has_decoration(id: base, decoration: DecorationRelaxedPrecision);
10209 bool pending_array_enclose = false;
10210 bool dimension_flatten = false;
10211 bool access_meshlet_position_y = false;
10212
10213 if (auto *base_expr = maybe_get<SPIRExpression>(id: base))
10214 {
10215 access_meshlet_position_y = base_expr->access_meshlet_position_y;
10216 }
10217
10218 // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden
10219 bool hide_first_subscript = count > 1 && is_user_type_structured(id: base);
10220
10221 const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
10222 AccessChainFlags mod_flags = flags;
10223 if (!is_literal)
10224 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
10225 if (!is_ptr_chain)
10226 mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
10227 access_chain_internal_append_index(expr, base, type, flags: mod_flags, access_chain_is_arrayed, index);
10228 check_physical_type_cast(expr, type, physical_type);
10229 };
10230
10231 for (uint32_t i = 0; i < count; i++)
10232 {
10233 uint32_t index = indices[i];
10234
10235 bool is_literal = index_is_literal;
10236 if (is_literal && msb_is_id && (index >> 31u) != 0u)
10237 {
10238 is_literal = false;
10239 index &= 0x7fffffffu;
10240 }
10241
10242 bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(type: *type);
10243
10244 if (ptr_chain_array_entry)
10245 {
10246 // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed.
10247 // We are considered to have a pointer to array and one element shifts by one array at a time.
10248 // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness,
10249 // so we have to take pointer to array explicitly.
10250 if (!should_dereference(id: base))
10251 expr = enclose_expression(expr: address_of_expression(expr));
10252 }
10253
10254 if (ptr_chain && i == 0)
10255 {
10256 // Pointer chains
10257 // If we are flattening multidimensional arrays, only create opening bracket on first
10258 // array index.
10259 if (options.flatten_multidimensional_arrays)
10260 {
10261 dimension_flatten = type->array.size() >= 1;
10262 pending_array_enclose = dimension_flatten;
10263 if (pending_array_enclose)
10264 expr += "[";
10265 }
10266
10267 if (options.flatten_multidimensional_arrays && dimension_flatten)
10268 {
10269 // If we are flattening multidimensional arrays, do manual stride computation.
10270 if (is_literal)
10271 expr += convert_to_string(t: index);
10272 else
10273 expr += to_enclosed_expression(id: index, register_expression_read);
10274
10275 for (auto j = uint32_t(type->array.size()); j; j--)
10276 {
10277 expr += " * ";
10278 expr += enclose_expression(expr: to_array_size(type: *type, index: j - 1));
10279 }
10280
10281 if (type->array.empty())
10282 pending_array_enclose = false;
10283 else
10284 expr += " + ";
10285
10286 if (!pending_array_enclose)
10287 expr += "]";
10288 }
10289 else
10290 {
10291 append_index(index, is_literal, true);
10292 }
10293
10294 if (type->basetype == SPIRType::ControlPointArray)
10295 {
10296 type_id = type->parent_type;
10297 type = &get<SPIRType>(id: type_id);
10298 }
10299
10300 access_chain_is_arrayed = true;
10301
10302 // Explicitly enclose the expression if this is one of the weird pointer-to-array cases.
10303 // We don't want any future indexing to add to this array dereference.
10304 // Enclosing the expression blocks that and avoids any shenanigans with operand priority.
10305 if (ptr_chain_array_entry)
10306 expr = join(ts: "(", ts&: expr, ts: ")");
10307 }
10308 // Arrays
10309 else if (!type->array.empty())
10310 {
10311 // If we are flattening multidimensional arrays, only create opening bracket on first
10312 // array index.
10313 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
10314 {
10315 dimension_flatten = type->array.size() > 1;
10316 pending_array_enclose = dimension_flatten;
10317 if (pending_array_enclose)
10318 expr += "[";
10319 }
10320
10321 assert(type->parent_type);
10322
10323 auto *var = maybe_get<SPIRVariable>(id: base);
10324 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(var: *var) &&
10325 !has_decoration(id: type->self, decoration: DecorationBlock))
10326 {
10327 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
10328 // Normally, these variables live in blocks when compiled from GLSL,
10329 // but HLSL seems to just emit straight arrays here.
10330 // We must pretend this access goes through gl_in/gl_out arrays
10331 // to be able to access certain builtins as arrays.
10332 // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
10333 auto builtin = ir.meta[base].decoration.builtin_type;
10334 bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
10335
10336 switch (builtin)
10337 {
10338 case BuiltInCullDistance:
10339 case BuiltInClipDistance:
10340 if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here.
10341 {
10342 append_index(index, is_literal);
10343 break;
10344 }
10345 // fallthrough
10346 case BuiltInPosition:
10347 case BuiltInPointSize:
10348 if (mesh_shader)
10349 expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10350 else if (var->storage == StorageClassInput)
10351 expr = join(ts: "gl_in[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10352 else if (var->storage == StorageClassOutput)
10353 expr = join(ts: "gl_out[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10354 else
10355 append_index(index, is_literal);
10356 break;
10357
10358 case BuiltInPrimitiveId:
10359 case BuiltInLayer:
10360 case BuiltInViewportIndex:
10361 case BuiltInCullPrimitiveEXT:
10362 case BuiltInPrimitiveShadingRateKHR:
10363 if (mesh_shader)
10364 expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10365 else
10366 append_index(index, is_literal);
10367 break;
10368
10369 default:
10370 append_index(index, is_literal);
10371 break;
10372 }
10373 }
10374 else if (backend.force_merged_mesh_block && i == 0 && var &&
10375 !is_builtin_variable(var: *var) && var->storage == StorageClassOutput)
10376 {
10377 if (is_per_primitive_variable(var: *var))
10378 expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10379 else
10380 expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr);
10381 }
10382 else if (options.flatten_multidimensional_arrays && dimension_flatten)
10383 {
10384 // If we are flattening multidimensional arrays, do manual stride computation.
10385 auto &parent_type = get<SPIRType>(id: type->parent_type);
10386
10387 if (is_literal)
10388 expr += convert_to_string(t: index);
10389 else
10390 expr += to_enclosed_expression(id: index, register_expression_read);
10391
10392 for (auto j = uint32_t(parent_type.array.size()); j; j--)
10393 {
10394 expr += " * ";
10395 expr += enclose_expression(expr: to_array_size(type: parent_type, index: j - 1));
10396 }
10397
10398 if (parent_type.array.empty())
10399 pending_array_enclose = false;
10400 else
10401 expr += " + ";
10402
10403 if (!pending_array_enclose)
10404 expr += "]";
10405 }
10406 else if (index_is_literal || !builtin_translates_to_nonarray(builtin: BuiltIn(get_decoration(id: base, decoration: DecorationBuiltIn))))
10407 {
10408 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
10409 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
10410 // For literal indices we are working on composites, so we ignore this since we have already converted to proper array.
10411 append_index(index, is_literal);
10412 }
10413
10414 if (var && has_decoration(id: var->self, decoration: DecorationBuiltIn) &&
10415 get_decoration(id: var->self, decoration: DecorationBuiltIn) == BuiltInPosition &&
10416 get_execution_model() == ExecutionModelMeshEXT)
10417 {
10418 access_meshlet_position_y = true;
10419 }
10420
10421 type_id = type->parent_type;
10422 type = &get<SPIRType>(id: type_id);
10423
10424 // If the physical type has an unnatural vecsize,
10425 // we must assume it's a faked struct where the .data member
10426 // is used for the real payload.
10427 if (physical_type && (is_vector(type: *type) || is_scalar(type: *type)))
10428 {
10429 auto &phys = get<SPIRType>(id: physical_type);
10430 if (phys.vecsize > 4)
10431 expr += ".data";
10432 }
10433
10434 access_chain_is_arrayed = true;
10435 }
10436 // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
10437 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
10438 else if (type->basetype == SPIRType::Struct)
10439 {
10440 if (!is_literal)
10441 index = evaluate_constant_u32(id: index);
10442
10443 if (index < uint32_t(type->member_type_index_redirection.size()))
10444 index = type->member_type_index_redirection[index];
10445
10446 if (index >= type->member_types.size())
10447 SPIRV_CROSS_THROW("Member index is out of bounds!");
10448
10449 if (hide_first_subscript)
10450 {
10451 // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers
10452 hide_first_subscript = false;
10453 }
10454 else
10455 {
10456 BuiltIn builtin = BuiltInMax;
10457 if (is_member_builtin(type: *type, index, builtin: &builtin) && access_chain_needs_stage_io_builtin_translation(base))
10458 {
10459 if (access_chain_is_arrayed)
10460 {
10461 expr += ".";
10462 expr += builtin_to_glsl(builtin, storage: type->storage);
10463 }
10464 else
10465 expr = builtin_to_glsl(builtin, storage: type->storage);
10466
10467 if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT)
10468 {
10469 access_meshlet_position_y = true;
10470 }
10471 }
10472 else
10473 {
10474 // If the member has a qualified name, use it as the entire chain
10475 string qual_mbr_name = get_member_qualified_name(type_id, index);
10476 if (!qual_mbr_name.empty())
10477 expr = qual_mbr_name;
10478 else if (flatten_member_reference)
10479 expr += join(ts: "_", ts: to_member_name(type: *type, index));
10480 else
10481 {
10482 // Any pointer de-refences for values are handled in the first access chain.
10483 // For pointer chains, the pointer-ness is resolved through an array access.
10484 // The only time this is not true is when accessing array of SSBO/UBO.
10485 // This case is explicitly handled.
10486 expr += to_member_reference(base, type: *type, index, ptr_chain_is_resolved: ptr_chain || i != 0);
10487 }
10488 }
10489 }
10490
10491 if (has_member_decoration(id: type->self, index, decoration: DecorationInvariant))
10492 is_invariant = true;
10493 if (has_member_decoration(id: type->self, index, decoration: DecorationRelaxedPrecision))
10494 relaxed_precision = true;
10495
10496 is_packed = member_is_packed_physical_type(type: *type, index);
10497 if (member_is_remapped_physical_type(type: *type, index))
10498 physical_type = get_extended_member_decoration(type: type->self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
10499 else
10500 physical_type = 0;
10501
10502 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(type: *type, index);
10503 type = &get<SPIRType>(id: type->member_types[index]);
10504 }
10505 // Matrix -> Vector
10506 else if (type->columns > 1)
10507 {
10508 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
10509 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
10510 // by flipping indexing order of the matrix.
10511
10512 expr += "[";
10513 if (is_literal)
10514 expr += convert_to_string(t: index);
10515 else
10516 expr += to_unpacked_expression(id: index, register_expression_read);
10517 expr += "]";
10518
10519 // If the physical type has an unnatural vecsize,
10520 // we must assume it's a faked struct where the .data member
10521 // is used for the real payload.
10522 if (physical_type)
10523 {
10524 auto &phys = get<SPIRType>(id: physical_type);
10525 if (phys.vecsize > 4 || phys.columns > 4)
10526 expr += ".data";
10527 }
10528
10529 type_id = type->parent_type;
10530 type = &get<SPIRType>(id: type_id);
10531 }
10532 // Vector -> Scalar
10533 else if (type->vecsize > 1)
10534 {
10535 string deferred_index;
10536 if (row_major_matrix_needs_conversion)
10537 {
10538 // Flip indexing order.
10539 auto column_index = expr.find_last_of(c: '[');
10540 if (column_index != string::npos)
10541 {
10542 deferred_index = expr.substr(pos: column_index);
10543
10544 auto end_deferred_index = deferred_index.find_last_of(c: ']');
10545 if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size())
10546 {
10547 // If we have any data member fixups, it must be transposed so that it refers to this index.
10548 // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
10549 // and needs to be [1].data[0] instead.
10550 end_deferred_index++;
10551 deferred_index = deferred_index.substr(pos: end_deferred_index) +
10552 deferred_index.substr(pos: 0, n: end_deferred_index);
10553 }
10554
10555 expr.resize(n: column_index);
10556 }
10557 }
10558
10559 // Internally, access chain implementation can also be used on composites,
10560 // ignore scalar access workarounds in this case.
10561 StorageClass effective_storage = StorageClassGeneric;
10562 bool ignore_potential_sliced_writes = false;
10563 if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
10564 {
10565 if (expression_type(id: base).pointer)
10566 effective_storage = get_expression_effective_storage_class(ptr: base);
10567
10568 // Special consideration for control points.
10569 // Control points can only be written by InvocationID, so there is no need
10570 // to consider scalar access chains here.
10571 // Cleans up some cases where it's very painful to determine the accurate storage class
10572 // since blocks can be partially masked ...
10573 auto *var = maybe_get_backing_variable(chain: base);
10574 if (var && var->storage == StorageClassOutput &&
10575 get_execution_model() == ExecutionModelTessellationControl &&
10576 !has_decoration(id: var->self, decoration: DecorationPatch))
10577 {
10578 ignore_potential_sliced_writes = true;
10579 }
10580 }
10581 else
10582 ignore_potential_sliced_writes = true;
10583
10584 if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
10585 {
10586 // On some backends, we might not be able to safely access individual scalars in a vector.
10587 // To work around this, we might have to cast the access chain reference to something which can,
10588 // like a pointer to scalar, which we can then index into.
10589 prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
10590 is_packed);
10591 }
10592
10593 if (is_literal)
10594 {
10595 bool out_of_bounds = (index >= type->vecsize);
10596
10597 if (!is_packed && !row_major_matrix_needs_conversion)
10598 {
10599 expr += ".";
10600 expr += index_to_swizzle(index: out_of_bounds ? 0 : index);
10601 }
10602 else
10603 {
10604 // For packed vectors, we can only access them as an array, not by swizzle.
10605 expr += join(ts: "[", ts: out_of_bounds ? 0 : index, ts: "]");
10606 }
10607 }
10608 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
10609 {
10610 auto &c = get<SPIRConstant>(id: index);
10611 bool out_of_bounds = (c.scalar() >= type->vecsize);
10612
10613 if (c.specialization)
10614 {
10615 // If the index is a spec constant, we cannot turn extract into a swizzle.
10616 expr += join(ts: "[", ts: out_of_bounds ? "0" : to_expression(id: index), ts: "]");
10617 }
10618 else
10619 {
10620 expr += ".";
10621 expr += index_to_swizzle(index: out_of_bounds ? 0 : c.scalar());
10622 }
10623 }
10624 else
10625 {
10626 expr += "[";
10627 expr += to_unpacked_expression(id: index, register_expression_read);
10628 expr += "]";
10629 }
10630
10631 if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
10632 {
10633 if (prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage,
10634 is_packed))
10635 {
10636 // We're in a pointer context now, so just remove any member dereference.
10637 auto first_index = deferred_index.find_first_of(c: '[');
10638 if (first_index != string::npos && first_index != 0)
10639 deferred_index = deferred_index.substr(pos: first_index);
10640 }
10641 }
10642
10643 if (access_meshlet_position_y)
10644 {
10645 if (is_literal)
10646 {
10647 access_meshlet_position_y = index == 1;
10648 }
10649 else
10650 {
10651 const auto *c = maybe_get<SPIRConstant>(id: index);
10652 if (c)
10653 access_meshlet_position_y = c->scalar() == 1;
10654 else
10655 {
10656 // We don't know, but we have to assume no.
10657 // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave.
10658 access_meshlet_position_y = false;
10659 }
10660 }
10661 }
10662
10663 expr += deferred_index;
10664 row_major_matrix_needs_conversion = false;
10665
10666 is_packed = false;
10667 physical_type = 0;
10668 type_id = type->parent_type;
10669 type = &get<SPIRType>(id: type_id);
10670 }
10671 else if (!backend.allow_truncated_access_chain)
10672 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
10673 }
10674
10675 if (pending_array_enclose)
10676 {
10677 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
10678 "but the access chain was terminated in the middle of a multidimensional array. "
10679 "This is not supported.");
10680 }
10681
10682 if (meta)
10683 {
10684 meta->need_transpose = row_major_matrix_needs_conversion;
10685 meta->storage_is_packed = is_packed;
10686 meta->storage_is_invariant = is_invariant;
10687 meta->storage_physical_type = physical_type;
10688 meta->relaxed_precision = relaxed_precision;
10689 meta->access_meshlet_position_y = access_meshlet_position_y;
10690 }
10691
10692 return expr;
10693}
10694
10695void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
10696{
10697}
10698
10699bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
10700{
10701 return false;
10702}
10703
10704string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
10705{
10706 auto ret = join(ts: basename, ts: "_", ts: to_member_name(type, index));
10707 ParsedIR::sanitize_underscores(str&: ret);
10708 return ret;
10709}
10710
10711string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
10712 AccessChainMeta *meta, bool ptr_chain)
10713{
10714 if (flattened_buffer_blocks.count(x: base))
10715 {
10716 uint32_t matrix_stride = 0;
10717 uint32_t array_stride = 0;
10718 bool need_transpose = false;
10719 flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset: 0, word_stride: 16, need_transpose: &need_transpose, matrix_stride: &matrix_stride,
10720 array_stride: &array_stride, ptr_chain);
10721
10722 if (meta)
10723 {
10724 meta->need_transpose = target_type.columns > 1 && need_transpose;
10725 meta->storage_is_packed = false;
10726 }
10727
10728 return flattened_access_chain(base, indices, count, target_type, offset: 0, matrix_stride, array_stride,
10729 need_transpose);
10730 }
10731 else if (flattened_structs.count(x: base) && count > 0)
10732 {
10733 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
10734 if (ptr_chain)
10735 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
10736
10737 if (flattened_structs[base])
10738 {
10739 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
10740 if (meta)
10741 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
10742 }
10743
10744 auto chain = access_chain_internal(base, indices, count, flags, meta: nullptr).substr(pos: 1);
10745 if (meta)
10746 {
10747 meta->need_transpose = false;
10748 meta->storage_is_packed = false;
10749 }
10750
10751 auto basename = to_flattened_access_chain_expression(id: base);
10752 auto ret = join(ts&: basename, ts: "_", ts&: chain);
10753 ParsedIR::sanitize_underscores(str&: ret);
10754 return ret;
10755 }
10756 else
10757 {
10758 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
10759 if (ptr_chain)
10760 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
10761 return access_chain_internal(base, indices, count, flags, meta);
10762 }
10763}
10764
10765string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
10766{
10767 auto expr = type_to_glsl_constructor(type);
10768 expr += '(';
10769
10770 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
10771 {
10772 if (i)
10773 expr += ", ";
10774
10775 auto &member_type = get<SPIRType>(id: type.member_types[i]);
10776 if (member_type.basetype == SPIRType::Struct)
10777 expr += load_flattened_struct(basename: to_flattened_struct_member(basename, type, index: i), type: member_type);
10778 else
10779 expr += to_flattened_struct_member(basename, type, index: i);
10780 }
10781 expr += ')';
10782 return expr;
10783}
10784
10785std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
10786{
10787 // Do not use to_expression as that will unflatten access chains.
10788 string basename;
10789 if (const auto *var = maybe_get<SPIRVariable>(id))
10790 basename = to_name(id: var->self);
10791 else if (const auto *expr = maybe_get<SPIRExpression>(id))
10792 basename = expr->expression;
10793 else
10794 basename = to_expression(id);
10795
10796 return basename;
10797}
10798
10799void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
10800 const SmallVector<uint32_t> &indices)
10801{
10802 SmallVector<uint32_t> sub_indices = indices;
10803 sub_indices.push_back(t: 0);
10804
10805 auto *member_type = &type;
10806 for (auto &index : indices)
10807 member_type = &get<SPIRType>(id: member_type->member_types[index]);
10808
10809 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
10810 {
10811 sub_indices.back() = i;
10812 auto lhs = join(ts: basename, ts: "_", ts: to_member_name(type: *member_type, index: i));
10813 ParsedIR::sanitize_underscores(str&: lhs);
10814
10815 if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct)
10816 {
10817 store_flattened_struct(basename: lhs, rhs_id, type, indices: sub_indices);
10818 }
10819 else
10820 {
10821 auto rhs = to_expression(id: rhs_id) + to_multi_member_reference(type, indices: sub_indices);
10822 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
10823 }
10824 }
10825}
10826
10827void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
10828{
10829 auto &type = expression_type(id: lhs_id);
10830 auto basename = to_flattened_access_chain_expression(id: lhs_id);
10831 store_flattened_struct(basename, rhs_id: value, type, indices: {});
10832}
10833
10834std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
10835 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
10836 uint32_t /* array_stride */, bool need_transpose)
10837{
10838 if (!target_type.array.empty())
10839 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
10840 else if (target_type.basetype == SPIRType::Struct)
10841 return flattened_access_chain_struct(base, indices, count, target_type, offset);
10842 else if (target_type.columns > 1)
10843 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
10844 else
10845 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
10846}
10847
10848std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
10849 const SPIRType &target_type, uint32_t offset)
10850{
10851 std::string expr;
10852
10853 if (backend.can_declare_struct_inline)
10854 {
10855 expr += type_to_glsl_constructor(type: target_type);
10856 expr += "(";
10857 }
10858 else
10859 expr += "{";
10860
10861 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
10862 {
10863 if (i != 0)
10864 expr += ", ";
10865
10866 const SPIRType &member_type = get<SPIRType>(id: target_type.member_types[i]);
10867 uint32_t member_offset = type_struct_member_offset(type: target_type, index: i);
10868
10869 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
10870 // ahead of time.
10871 bool need_transpose = false;
10872 bool relaxed = false;
10873 uint32_t matrix_stride = 0;
10874 if (member_type.columns > 1)
10875 {
10876 auto decorations = combined_decoration_for_member(type: target_type, index: i);
10877 need_transpose = decorations.get(bit: DecorationRowMajor);
10878 relaxed = decorations.get(bit: DecorationRelaxedPrecision);
10879 matrix_stride = type_struct_member_matrix_stride(type: target_type, index: i);
10880 }
10881
10882 auto tmp = flattened_access_chain(base, indices, count, target_type: member_type, offset: offset + member_offset, matrix_stride,
10883 0 /* array_stride */, need_transpose);
10884
10885 // Cannot forward transpositions, so resolve them here.
10886 if (need_transpose)
10887 expr += convert_row_major_matrix(exp_str: tmp, exp_type: member_type, physical_type_id: 0, is_packed: false, relaxed);
10888 else
10889 expr += tmp;
10890 }
10891
10892 expr += backend.can_declare_struct_inline ? ")" : "}";
10893
10894 return expr;
10895}
10896
10897std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
10898 const SPIRType &target_type, uint32_t offset,
10899 uint32_t matrix_stride, bool need_transpose)
10900{
10901 assert(matrix_stride);
10902 SPIRType tmp_type = target_type;
10903 if (need_transpose)
10904 swap(a&: tmp_type.vecsize, b&: tmp_type.columns);
10905
10906 std::string expr;
10907
10908 expr += type_to_glsl_constructor(type: tmp_type);
10909 expr += "(";
10910
10911 for (uint32_t i = 0; i < tmp_type.columns; i++)
10912 {
10913 if (i != 0)
10914 expr += ", ";
10915
10916 expr += flattened_access_chain_vector(base, indices, count, target_type: tmp_type, offset: offset + i * matrix_stride, matrix_stride,
10917 /* need_transpose= */ false);
10918 }
10919
10920 expr += ")";
10921
10922 return expr;
10923}
10924
10925std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
10926 const SPIRType &target_type, uint32_t offset,
10927 uint32_t matrix_stride, bool need_transpose)
10928{
10929 auto result = flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset, word_stride: 16);
10930
10931 auto buffer_name = to_name(id: expression_type(id: base).self);
10932
10933 if (need_transpose)
10934 {
10935 std::string expr;
10936
10937 if (target_type.vecsize > 1)
10938 {
10939 expr += type_to_glsl_constructor(type: target_type);
10940 expr += "(";
10941 }
10942
10943 for (uint32_t i = 0; i < target_type.vecsize; ++i)
10944 {
10945 if (i != 0)
10946 expr += ", ";
10947
10948 uint32_t component_offset = result.second + i * matrix_stride;
10949
10950 assert(component_offset % (target_type.width / 8) == 0);
10951 uint32_t index = component_offset / (target_type.width / 8);
10952
10953 expr += buffer_name;
10954 expr += "[";
10955 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
10956 expr += convert_to_string(t: index / 4);
10957 expr += "]";
10958
10959 expr += vector_swizzle(vecsize: 1, index: index % 4);
10960 }
10961
10962 if (target_type.vecsize > 1)
10963 {
10964 expr += ")";
10965 }
10966
10967 return expr;
10968 }
10969 else
10970 {
10971 assert(result.second % (target_type.width / 8) == 0);
10972 uint32_t index = result.second / (target_type.width / 8);
10973
10974 std::string expr;
10975
10976 expr += buffer_name;
10977 expr += "[";
10978 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
10979 expr += convert_to_string(t: index / 4);
10980 expr += "]";
10981
10982 expr += vector_swizzle(vecsize: target_type.vecsize, index: index % 4);
10983
10984 return expr;
10985 }
10986}
10987
10988std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
10989 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
10990 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
10991{
10992 // Start traversing type hierarchy at the proper non-pointer types.
10993 const auto *type = &get_pointee_type(type: basetype);
10994
10995 std::string expr;
10996
10997 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
10998 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
10999 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
11000 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
11001
11002 for (uint32_t i = 0; i < count; i++)
11003 {
11004 uint32_t index = indices[i];
11005
11006 // Pointers
11007 if (ptr_chain && i == 0)
11008 {
11009 // Here, the pointer type will be decorated with an array stride.
11010 array_stride = get_decoration(id: basetype.self, decoration: DecorationArrayStride);
11011 if (!array_stride)
11012 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
11013
11014 auto *constant = maybe_get<SPIRConstant>(id: index);
11015 if (constant)
11016 {
11017 // Constant array access.
11018 offset += constant->scalar() * array_stride;
11019 }
11020 else
11021 {
11022 // Dynamic array access.
11023 if (array_stride % word_stride)
11024 {
11025 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
11026 "of a 4-component vector. "
11027 "Likely culprit here is a float or vec2 array inside a push "
11028 "constant block which is std430. "
11029 "This cannot be flattened. Try using std140 layout instead.");
11030 }
11031
11032 expr += to_enclosed_expression(id: index);
11033 expr += " * ";
11034 expr += convert_to_string(t: array_stride / word_stride);
11035 expr += " + ";
11036 }
11037 }
11038 // Arrays
11039 else if (!type->array.empty())
11040 {
11041 auto *constant = maybe_get<SPIRConstant>(id: index);
11042 if (constant)
11043 {
11044 // Constant array access.
11045 offset += constant->scalar() * array_stride;
11046 }
11047 else
11048 {
11049 // Dynamic array access.
11050 if (array_stride % word_stride)
11051 {
11052 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
11053 "of a 4-component vector. "
11054 "Likely culprit here is a float or vec2 array inside a push "
11055 "constant block which is std430. "
11056 "This cannot be flattened. Try using std140 layout instead.");
11057 }
11058
11059 expr += to_enclosed_expression(id: index, register_expression_read: false);
11060 expr += " * ";
11061 expr += convert_to_string(t: array_stride / word_stride);
11062 expr += " + ";
11063 }
11064
11065 uint32_t parent_type = type->parent_type;
11066 type = &get<SPIRType>(id: parent_type);
11067
11068 if (!type->array.empty())
11069 array_stride = get_decoration(id: parent_type, decoration: DecorationArrayStride);
11070 }
11071 // For structs, the index refers to a constant, which indexes into the members.
11072 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
11073 else if (type->basetype == SPIRType::Struct)
11074 {
11075 index = evaluate_constant_u32(id: index);
11076
11077 if (index >= type->member_types.size())
11078 SPIRV_CROSS_THROW("Member index is out of bounds!");
11079
11080 offset += type_struct_member_offset(type: *type, index);
11081
11082 auto &struct_type = *type;
11083 type = &get<SPIRType>(id: type->member_types[index]);
11084
11085 if (type->columns > 1)
11086 {
11087 matrix_stride = type_struct_member_matrix_stride(type: struct_type, index);
11088 row_major_matrix_needs_conversion =
11089 combined_decoration_for_member(type: struct_type, index).get(bit: DecorationRowMajor);
11090 }
11091 else
11092 row_major_matrix_needs_conversion = false;
11093
11094 if (!type->array.empty())
11095 array_stride = type_struct_member_array_stride(type: struct_type, index);
11096 }
11097 // Matrix -> Vector
11098 else if (type->columns > 1)
11099 {
11100 auto *constant = maybe_get<SPIRConstant>(id: index);
11101 if (constant)
11102 {
11103 index = evaluate_constant_u32(id: index);
11104 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
11105 }
11106 else
11107 {
11108 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
11109 // Dynamic array access.
11110 if (indexing_stride % word_stride)
11111 {
11112 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
11113 "4-component vector. "
11114 "Likely culprit here is a row-major matrix being accessed dynamically. "
11115 "This cannot be flattened. Try using std140 layout instead.");
11116 }
11117
11118 expr += to_enclosed_expression(id: index, register_expression_read: false);
11119 expr += " * ";
11120 expr += convert_to_string(t: indexing_stride / word_stride);
11121 expr += " + ";
11122 }
11123
11124 type = &get<SPIRType>(id: type->parent_type);
11125 }
11126 // Vector -> Scalar
11127 else if (type->vecsize > 1)
11128 {
11129 auto *constant = maybe_get<SPIRConstant>(id: index);
11130 if (constant)
11131 {
11132 index = evaluate_constant_u32(id: index);
11133 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
11134 }
11135 else
11136 {
11137 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
11138
11139 // Dynamic array access.
11140 if (indexing_stride % word_stride)
11141 {
11142 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
11143 "size of a 4-component vector. "
11144 "This cannot be flattened in legacy targets.");
11145 }
11146
11147 expr += to_enclosed_expression(id: index, register_expression_read: false);
11148 expr += " * ";
11149 expr += convert_to_string(t: indexing_stride / word_stride);
11150 expr += " + ";
11151 }
11152
11153 type = &get<SPIRType>(id: type->parent_type);
11154 }
11155 else
11156 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
11157 }
11158
11159 if (need_transpose)
11160 *need_transpose = row_major_matrix_needs_conversion;
11161 if (out_matrix_stride)
11162 *out_matrix_stride = matrix_stride;
11163 if (out_array_stride)
11164 *out_array_stride = array_stride;
11165
11166 return std::make_pair(x&: expr, y&: offset);
11167}
11168
11169bool CompilerGLSL::should_dereference(uint32_t id)
11170{
11171 const auto &type = expression_type(id);
11172 // Non-pointer expressions don't need to be dereferenced.
11173 if (!type.pointer)
11174 return false;
11175
11176 // Handles shouldn't be dereferenced either.
11177 if (!expression_is_lvalue(id))
11178 return false;
11179
11180 // If id is a variable but not a phi variable, we should not dereference it.
11181 if (auto *var = maybe_get<SPIRVariable>(id))
11182 return var->phi_variable;
11183
11184 if (auto *expr = maybe_get<SPIRExpression>(id))
11185 {
11186 // If id is an access chain, we should not dereference it.
11187 if (expr->access_chain)
11188 return false;
11189
11190 // If id is a forwarded copy of a variable pointer, we should not dereference it.
11191 SPIRVariable *var = nullptr;
11192 while (expr->loaded_from && expression_is_forwarded(id: expr->self))
11193 {
11194 auto &src_type = expression_type(id: expr->loaded_from);
11195 // To be a copy, the pointer and its source expression must be the
11196 // same type. Can't check type.self, because for some reason that's
11197 // usually the base type with pointers stripped off. This check is
11198 // complex enough that I've hoisted it out of the while condition.
11199 if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
11200 src_type.parent_type != type.parent_type)
11201 break;
11202 if ((var = maybe_get<SPIRVariable>(id: expr->loaded_from)))
11203 break;
11204 if (!(expr = maybe_get<SPIRExpression>(id: expr->loaded_from)))
11205 break;
11206 }
11207
11208 return !var || var->phi_variable;
11209 }
11210
11211 // Otherwise, we should dereference this pointer expression.
11212 return true;
11213}
11214
11215bool CompilerGLSL::should_forward(uint32_t id) const
11216{
11217 // If id is a variable we will try to forward it regardless of force_temporary check below
11218 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
11219
11220 auto *var = maybe_get<SPIRVariable>(id);
11221 if (var)
11222 {
11223 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
11224 return !(has_decoration(id, decoration: DecorationBuiltIn) && has_decoration(id, decoration: DecorationVolatile));
11225 }
11226
11227 // For debugging emit temporary variables for all expressions
11228 if (options.force_temporary)
11229 return false;
11230
11231 // If an expression carries enough dependencies we need to stop forwarding at some point,
11232 // or we explode compilers. There are usually limits to how much we can nest expressions.
11233 auto *expr = maybe_get<SPIRExpression>(id);
11234 const uint32_t max_expression_dependencies = 64;
11235 if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
11236 return false;
11237
11238 if (expr && expr->loaded_from
11239 && has_decoration(id: expr->loaded_from, decoration: DecorationBuiltIn)
11240 && has_decoration(id: expr->loaded_from, decoration: DecorationVolatile))
11241 {
11242 // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
11243 return false;
11244 }
11245
11246 // Immutable expression can always be forwarded.
11247 if (is_immutable(id))
11248 return true;
11249
11250 return false;
11251}
11252
11253bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
11254{
11255 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
11256 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
11257}
11258
11259void CompilerGLSL::track_expression_read(uint32_t id)
11260{
11261 switch (ir.ids[id].get_type())
11262 {
11263 case TypeExpression:
11264 {
11265 auto &e = get<SPIRExpression>(id);
11266 for (auto implied_read : e.implied_read_expressions)
11267 track_expression_read(id: implied_read);
11268 break;
11269 }
11270
11271 case TypeAccessChain:
11272 {
11273 auto &e = get<SPIRAccessChain>(id);
11274 for (auto implied_read : e.implied_read_expressions)
11275 track_expression_read(id: implied_read);
11276 break;
11277 }
11278
11279 default:
11280 break;
11281 }
11282
11283 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
11284 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
11285 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
11286 {
11287 auto &v = expression_usage_counts[id];
11288 v++;
11289
11290 // If we create an expression outside a loop,
11291 // but access it inside a loop, we're implicitly reading it multiple times.
11292 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
11293 // working inside the backend compiler.
11294 if (expression_read_implies_multiple_reads(id))
11295 v++;
11296
11297 if (v >= 2)
11298 {
11299 //if (v == 2)
11300 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
11301
11302 // Force a recompile after this pass to avoid forwarding this variable.
11303 force_temporary_and_recompile(id);
11304 }
11305 }
11306}
11307
11308bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
11309{
11310 if (forced_temporaries.find(x: id) != end(cont&: forced_temporaries))
11311 return false;
11312
11313 for (uint32_t i = 0; i < num_args; i++)
11314 if (!should_forward(id: args[i]))
11315 return false;
11316
11317 // We need to forward globals as well.
11318 if (!pure)
11319 {
11320 for (auto global : global_variables)
11321 if (!should_forward(id: global))
11322 return false;
11323 for (auto aliased : aliased_variables)
11324 if (!should_forward(id: aliased))
11325 return false;
11326 }
11327
11328 return true;
11329}
11330
11331void CompilerGLSL::register_impure_function_call()
11332{
11333 // Impure functions can modify globals and aliased variables, so invalidate them as well.
11334 for (auto global : global_variables)
11335 flush_dependees(var&: get<SPIRVariable>(id: global));
11336 for (auto aliased : aliased_variables)
11337 flush_dependees(var&: get<SPIRVariable>(id: aliased));
11338}
11339
11340void CompilerGLSL::register_call_out_argument(uint32_t id)
11341{
11342 register_write(chain: id);
11343
11344 auto *var = maybe_get<SPIRVariable>(id);
11345 if (var)
11346 flush_variable_declaration(id: var->self);
11347}
11348
11349string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
11350{
11351 // These variables are always function local,
11352 // so make sure we emit the variable without storage qualifiers.
11353 // Some backends will inject custom variables locally in a function
11354 // with a storage qualifier which is not function-local.
11355 auto old_storage = var.storage;
11356 var.storage = StorageClassFunction;
11357 auto expr = variable_decl(variable: var);
11358 var.storage = old_storage;
11359 return expr;
11360}
11361
11362void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
11363{
11364 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
11365 if (var.allocate_temporary_copy && !flushed_phi_variables.count(x: var.self))
11366 {
11367 auto &type = get<SPIRType>(id: var.basetype);
11368 auto &flags = get_decoration_bitset(id: var.self);
11369 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: join(ts: "_", ts: var.self, ts: "_copy")), ts: ";");
11370 flushed_phi_variables.insert(x: var.self);
11371 }
11372}
11373
11374void CompilerGLSL::flush_variable_declaration(uint32_t id)
11375{
11376 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
11377 auto *var = maybe_get<SPIRVariable>(id);
11378 if (var && var->deferred_declaration)
11379 {
11380 string initializer;
11381 if (options.force_zero_initialized_variables &&
11382 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
11383 var->storage == StorageClassPrivate) &&
11384 !var->initializer && type_can_zero_initialize(type: get_variable_data_type(var: *var)))
11385 {
11386 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: *var)));
11387 }
11388
11389 statement(ts: variable_decl_function_local(var&: *var), ts&: initializer, ts: ";");
11390 var->deferred_declaration = false;
11391 }
11392 if (var)
11393 {
11394 emit_variable_temporary_copies(var: *var);
11395 }
11396}
11397
11398bool CompilerGLSL::remove_duplicate_swizzle(string &op)
11399{
11400 auto pos = op.find_last_of(c: '.');
11401 if (pos == string::npos || pos == 0)
11402 return false;
11403
11404 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
11405
11406 if (backend.swizzle_is_function)
11407 {
11408 if (final_swiz.size() < 2)
11409 return false;
11410
11411 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
11412 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
11413 else
11414 return false;
11415 }
11416
11417 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
11418 // If so, and previous swizzle is of same length,
11419 // we can drop the final swizzle altogether.
11420 for (uint32_t i = 0; i < final_swiz.size(); i++)
11421 {
11422 static const char expected[] = { 'x', 'y', 'z', 'w' };
11423 if (i >= 4 || final_swiz[i] != expected[i])
11424 return false;
11425 }
11426
11427 auto prevpos = op.find_last_of(c: '.', pos: pos - 1);
11428 if (prevpos == string::npos)
11429 return false;
11430
11431 prevpos++;
11432
11433 // Make sure there are only swizzles here ...
11434 for (auto i = prevpos; i < pos; i++)
11435 {
11436 if (op[i] < 'w' || op[i] > 'z')
11437 {
11438 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
11439 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
11440 break;
11441 return false;
11442 }
11443 }
11444
11445 // If original swizzle is large enough, just carve out the components we need.
11446 // E.g. foobar.wyx.xy will turn into foobar.wy.
11447 if (pos - prevpos >= final_swiz.size())
11448 {
11449 op.erase(pos: prevpos + final_swiz.size(), n: string::npos);
11450
11451 // Add back the function call ...
11452 if (backend.swizzle_is_function)
11453 op += "()";
11454 }
11455 return true;
11456}
11457
11458// Optimizes away vector swizzles where we have something like
11459// vec3 foo;
11460// foo.xyz <-- swizzle expression does nothing.
11461// This is a very common pattern after OpCompositeCombine.
11462bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
11463{
11464 auto pos = op.find_last_of(c: '.');
11465 if (pos == string::npos || pos == 0)
11466 return false;
11467
11468 string final_swiz = op.substr(pos: pos + 1, n: string::npos);
11469
11470 if (backend.swizzle_is_function)
11471 {
11472 if (final_swiz.size() < 2)
11473 return false;
11474
11475 if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()")
11476 final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos);
11477 else
11478 return false;
11479 }
11480
11481 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
11482 // If so, and previous swizzle is of same length,
11483 // we can drop the final swizzle altogether.
11484 for (uint32_t i = 0; i < final_swiz.size(); i++)
11485 {
11486 static const char expected[] = { 'x', 'y', 'z', 'w' };
11487 if (i >= 4 || final_swiz[i] != expected[i])
11488 return false;
11489 }
11490
11491 auto &type = expression_type(id: base);
11492
11493 // Sanity checking ...
11494 assert(type.columns == 1 && type.array.empty());
11495
11496 if (type.vecsize == final_swiz.size())
11497 op.erase(pos: pos, n: string::npos);
11498 return true;
11499}
11500
11501string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
11502{
11503 ID base = 0;
11504 string op;
11505 string subop;
11506
11507 // Can only merge swizzles for vectors.
11508 auto &type = get<SPIRType>(id: return_type);
11509 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
11510 bool swizzle_optimization = false;
11511
11512 for (uint32_t i = 0; i < length; i++)
11513 {
11514 auto *e = maybe_get<SPIRExpression>(id: elems[i]);
11515
11516 // If we're merging another scalar which belongs to the same base
11517 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
11518 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
11519 {
11520 // Only supposed to be used for vector swizzle -> scalar.
11521 assert(!e->expression.empty() && e->expression.front() == '.');
11522 subop += e->expression.substr(pos: 1, n: string::npos);
11523 swizzle_optimization = true;
11524 }
11525 else
11526 {
11527 // We'll likely end up with duplicated swizzles, e.g.
11528 // foobar.xyz.xyz from patterns like
11529 // OpVectorShuffle
11530 // OpCompositeExtract x 3
11531 // OpCompositeConstruct 3x + other scalar.
11532 // Just modify op in-place.
11533 if (swizzle_optimization)
11534 {
11535 if (backend.swizzle_is_function)
11536 subop += "()";
11537
11538 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
11539 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
11540 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
11541 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
11542 // Case 1:
11543 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
11544 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
11545 // Case 2:
11546 // foo.xyz: Duplicate swizzle won't kick in.
11547 // If foo is vec3, we can remove xyz, giving just foo.
11548 if (!remove_duplicate_swizzle(op&: subop))
11549 remove_unity_swizzle(base, op&: subop);
11550
11551 // Strips away redundant parens if we created them during component extraction.
11552 strip_enclosed_expression(expr&: subop);
11553 swizzle_optimization = false;
11554 op += subop;
11555 }
11556 else
11557 op += subop;
11558
11559 if (i)
11560 op += ", ";
11561
11562 bool uses_buffer_offset =
11563 type.basetype == SPIRType::Struct && has_member_decoration(id: type.self, index: i, decoration: DecorationOffset);
11564 subop = to_composite_constructor_expression(parent_type: type, id: elems[i], block_like_type: uses_buffer_offset);
11565 }
11566
11567 base = e ? e->base_expression : ID(0);
11568 }
11569
11570 if (swizzle_optimization)
11571 {
11572 if (backend.swizzle_is_function)
11573 subop += "()";
11574
11575 if (!remove_duplicate_swizzle(op&: subop))
11576 remove_unity_swizzle(base, op&: subop);
11577 // Strips away redundant parens if we created them during component extraction.
11578 strip_enclosed_expression(expr&: subop);
11579 }
11580
11581 op += subop;
11582 return op;
11583}
11584
11585bool CompilerGLSL::skip_argument(uint32_t id) const
11586{
11587 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
11588 {
11589 auto &type = expression_type(id);
11590 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
11591 return true;
11592 }
11593 return false;
11594}
11595
11596bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
11597{
11598 // Do this with strings because we have a very clear pattern we can check for and it avoids
11599 // adding lots of special cases to the code emission.
11600 if (rhs.size() < lhs.size() + 3)
11601 return false;
11602
11603 // Do not optimize matrices. They are a bit awkward to reason about in general
11604 // (in which order does operation happen?), and it does not work on MSL anyways.
11605 if (type.vecsize > 1 && type.columns > 1)
11606 return false;
11607
11608 auto index = rhs.find(str: lhs);
11609 if (index != 0)
11610 return false;
11611
11612 // TODO: Shift operators, but it's not important for now.
11613 auto op = rhs.find_first_of(s: "+-/*%|&^", pos: lhs.size() + 1);
11614 if (op != lhs.size() + 1)
11615 return false;
11616
11617 // Check that the op is followed by space. This excludes && and ||.
11618 if (rhs[op + 1] != ' ')
11619 return false;
11620
11621 char bop = rhs[op];
11622 auto expr = rhs.substr(pos: lhs.size() + 3);
11623
11624 // Avoids false positives where we get a = a * b + c.
11625 // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this.
11626 if (needs_enclose_expression(expr))
11627 return false;
11628
11629 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
11630 // Find some common patterns which are equivalent.
11631 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
11632 statement(ts: lhs, ts&: bop, ts&: bop, ts: ";");
11633 else
11634 statement(ts: lhs, ts: " ", ts&: bop, ts: "= ", ts&: expr, ts: ";");
11635 return true;
11636}
11637
11638void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
11639{
11640 if (forwarded_temporaries.find(x: expr) == end(cont&: forwarded_temporaries))
11641 return;
11642
11643 assert(current_emitting_block);
11644 current_emitting_block->invalidate_expressions.push_back(t: expr);
11645}
11646
11647void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
11648{
11649 current_emitting_block = &block;
11650
11651 if (backend.requires_relaxed_precision_analysis)
11652 {
11653 // If PHI variables are consumed in unexpected precision contexts, copy them here.
11654 for (size_t i = 0, n = block.phi_variables.size(); i < n; i++)
11655 {
11656 auto &phi = block.phi_variables[i];
11657
11658 // Ensure we only copy once. We know a-priori that this array will lay out
11659 // the same function variables together.
11660 if (i && block.phi_variables[i - 1].function_variable == phi.function_variable)
11661 continue;
11662
11663 auto itr = temporary_to_mirror_precision_alias.find(x: phi.function_variable);
11664 if (itr != temporary_to_mirror_precision_alias.end())
11665 {
11666 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
11667 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
11668 EmbeddedInstruction inst;
11669 inst.op = OpCopyObject;
11670 inst.length = 3;
11671 inst.ops.push_back(t: expression_type_id(id: itr->first));
11672 inst.ops.push_back(t: itr->second);
11673 inst.ops.push_back(t: itr->first);
11674 emit_instruction(instr: inst);
11675 }
11676 }
11677 }
11678
11679 for (auto &op : block.ops)
11680 {
11681 auto temporary_copy = handle_instruction_precision(instr: op);
11682 emit_instruction(instr: op);
11683 if (temporary_copy.dst_id)
11684 {
11685 // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
11686 // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
11687 EmbeddedInstruction inst;
11688 inst.op = OpCopyObject;
11689 inst.length = 3;
11690 inst.ops.push_back(t: expression_type_id(id: temporary_copy.src_id));
11691 inst.ops.push_back(t: temporary_copy.dst_id);
11692 inst.ops.push_back(t: temporary_copy.src_id);
11693
11694 // Never attempt to hoist mirrored temporaries.
11695 // They are hoisted in lock-step with their parents.
11696 block_temporary_hoisting = true;
11697 emit_instruction(instr: inst);
11698 block_temporary_hoisting = false;
11699 }
11700 }
11701
11702 current_emitting_block = nullptr;
11703}
11704
11705void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
11706{
11707 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
11708 // these will be marked as having suppressed usage tracking.
11709 // Our only concern is to make sure arithmetic operations are done in similar ways.
11710 if (expression_is_forwarded(id: expr.self) && !expression_suppresses_usage_tracking(id: expr.self) &&
11711 forced_invariant_temporaries.count(x: expr.self) == 0)
11712 {
11713 force_temporary_and_recompile(id: expr.self);
11714 forced_invariant_temporaries.insert(x: expr.self);
11715
11716 for (auto &dependent : expr.expression_dependencies)
11717 disallow_forwarding_in_expression_chain(expr: get<SPIRExpression>(id: dependent));
11718 }
11719}
11720
11721void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
11722{
11723 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
11724 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
11725 // in one translation unit, but not another, e.g. due to multiple use of an expression.
11726 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
11727 // expressions to be temporaries.
11728 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
11729 // for all reasonable uses of invariant.
11730 if (!has_decoration(id: store_id, decoration: DecorationInvariant))
11731 return;
11732
11733 auto *expr = maybe_get<SPIRExpression>(id: value_id);
11734 if (!expr)
11735 return;
11736
11737 disallow_forwarding_in_expression_chain(expr: *expr);
11738}
11739
11740void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
11741{
11742 auto rhs = to_pointer_expression(id: rhs_expression);
11743
11744 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
11745 if (!rhs.empty())
11746 {
11747 handle_store_to_invariant_variable(store_id: lhs_expression, value_id: rhs_expression);
11748
11749 if (!unroll_array_to_complex_store(target_id: lhs_expression, source_id: rhs_expression))
11750 {
11751 auto lhs = to_dereferenced_expression(id: lhs_expression);
11752 if (has_decoration(id: lhs_expression, decoration: DecorationNonUniform))
11753 convert_non_uniform_expression(expr&: lhs, ptr_id: lhs_expression);
11754
11755 // We might need to cast in order to store to a builtin.
11756 cast_to_variable_store(target_id: lhs_expression, expr&: rhs, expr_type: expression_type(id: rhs_expression));
11757
11758 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
11759 // While this is purely cosmetic, this is important for legacy ESSL where loop
11760 // variable increments must be in either i++ or i += const-expr.
11761 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
11762 if (!optimize_read_modify_write(type: expression_type(id: rhs_expression), lhs, rhs))
11763 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
11764 }
11765 register_write(chain: lhs_expression);
11766 }
11767}
11768
11769uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
11770{
11771 if (instr.length < 3)
11772 return 32;
11773
11774 auto *ops = stream(instr);
11775
11776 switch (instr.op)
11777 {
11778 case OpSConvert:
11779 case OpConvertSToF:
11780 case OpUConvert:
11781 case OpConvertUToF:
11782 case OpIEqual:
11783 case OpINotEqual:
11784 case OpSLessThan:
11785 case OpSLessThanEqual:
11786 case OpSGreaterThan:
11787 case OpSGreaterThanEqual:
11788 case OpULessThan:
11789 case OpULessThanEqual:
11790 case OpUGreaterThan:
11791 case OpUGreaterThanEqual:
11792 return expression_type(id: ops[2]).width;
11793
11794 case OpSMulExtended:
11795 case OpUMulExtended:
11796 return get<SPIRType>(id: get<SPIRType>(id: ops[0]).member_types[0]).width;
11797
11798 default:
11799 {
11800 // We can look at result type which is more robust.
11801 auto *type = maybe_get<SPIRType>(id: ops[0]);
11802 if (type && type_is_integral(type: *type))
11803 return type->width;
11804 else
11805 return 32;
11806 }
11807 }
11808}
11809
11810uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
11811{
11812 if (length < 1)
11813 return 32;
11814
11815 switch (op)
11816 {
11817 case GLSLstd450SAbs:
11818 case GLSLstd450SSign:
11819 case GLSLstd450UMin:
11820 case GLSLstd450SMin:
11821 case GLSLstd450UMax:
11822 case GLSLstd450SMax:
11823 case GLSLstd450UClamp:
11824 case GLSLstd450SClamp:
11825 case GLSLstd450FindSMsb:
11826 case GLSLstd450FindUMsb:
11827 return expression_type(id: ops[0]).width;
11828
11829 default:
11830 {
11831 // We don't need to care about other opcodes, just return 32.
11832 return 32;
11833 }
11834 }
11835}
11836
11837void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
11838{
11839 // Only GLSL supports RelaxedPrecision directly.
11840 // We cannot implement this in HLSL or MSL because it is tied to the type system.
11841 // In SPIR-V, everything must masquerade as 32-bit.
11842 if (!backend.requires_relaxed_precision_analysis)
11843 return;
11844
11845 auto input_precision = analyze_expression_precision(args, length);
11846
11847 // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
11848 // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
11849 if (input_precision == Options::Mediump)
11850 set_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
11851}
11852
11853CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
11854{
11855 // Now, analyze the precision at which the arguments would run.
11856 // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
11857 // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
11858 // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
11859 // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
11860 // correct precision.
11861 bool expression_has_highp = false;
11862 bool expression_has_mediump = false;
11863
11864 for (uint32_t i = 0; i < length; i++)
11865 {
11866 uint32_t arg = args[i];
11867
11868 auto handle_type = ir.ids[arg].get_type();
11869 if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
11870 continue;
11871
11872 if (has_decoration(id: arg, decoration: DecorationRelaxedPrecision))
11873 expression_has_mediump = true;
11874 else
11875 expression_has_highp = true;
11876 }
11877
11878 if (expression_has_highp)
11879 return Options::Highp;
11880 else if (expression_has_mediump)
11881 return Options::Mediump;
11882 else
11883 return Options::DontCare;
11884}
11885
11886void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
11887{
11888 if (!backend.requires_relaxed_precision_analysis)
11889 return;
11890
11891 auto &type = get<SPIRType>(id: type_id);
11892
11893 // RelaxedPrecision only applies to 32-bit values.
11894 if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
11895 return;
11896
11897 bool operation_is_highp = !has_decoration(id: dst_id, decoration: DecorationRelaxedPrecision);
11898
11899 auto input_precision = analyze_expression_precision(args, length);
11900 if (input_precision == Options::DontCare)
11901 {
11902 consume_temporary_in_precision_context(type_id, id: dst_id, precision: input_precision);
11903 return;
11904 }
11905
11906 // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
11907 // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
11908 // However, if the expression is not, inputs must be expanded to 32-bit first,
11909 // since the operation must run at high precision.
11910 // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
11911 // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
11912 // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
11913 if ((operation_is_highp && input_precision == Options::Mediump) ||
11914 (!operation_is_highp && input_precision == Options::Highp))
11915 {
11916 auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
11917 for (uint32_t i = 0; i < length; i++)
11918 {
11919 // Rewrites the opcode so that we consume an ID in correct precision context.
11920 // This is pretty hacky, but it's the most straight forward way of implementing this without adding
11921 // lots of extra passes to rewrite all code blocks.
11922 args[i] = consume_temporary_in_precision_context(type_id: expression_type_id(id: args[i]), id: args[i], precision);
11923 }
11924 }
11925}
11926
11927// This is probably not exhaustive ...
11928static bool opcode_is_precision_sensitive_operation(Op op)
11929{
11930 switch (op)
11931 {
11932 case OpFAdd:
11933 case OpFSub:
11934 case OpFMul:
11935 case OpFNegate:
11936 case OpIAdd:
11937 case OpISub:
11938 case OpIMul:
11939 case OpSNegate:
11940 case OpFMod:
11941 case OpFDiv:
11942 case OpFRem:
11943 case OpSMod:
11944 case OpSDiv:
11945 case OpSRem:
11946 case OpUMod:
11947 case OpUDiv:
11948 case OpVectorTimesMatrix:
11949 case OpMatrixTimesVector:
11950 case OpMatrixTimesMatrix:
11951 case OpDPdx:
11952 case OpDPdy:
11953 case OpDPdxCoarse:
11954 case OpDPdyCoarse:
11955 case OpDPdxFine:
11956 case OpDPdyFine:
11957 case OpFwidth:
11958 case OpFwidthCoarse:
11959 case OpFwidthFine:
11960 case OpVectorTimesScalar:
11961 case OpMatrixTimesScalar:
11962 case OpOuterProduct:
11963 case OpFConvert:
11964 case OpSConvert:
11965 case OpUConvert:
11966 case OpConvertSToF:
11967 case OpConvertUToF:
11968 case OpConvertFToU:
11969 case OpConvertFToS:
11970 return true;
11971
11972 default:
11973 return false;
11974 }
11975}
11976
11977// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
11978// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
11979// relevant when operating on the IDs, not when shuffling things around.
11980static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
11981{
11982 switch (op)
11983 {
11984 case OpLoad:
11985 case OpAccessChain:
11986 case OpInBoundsAccessChain:
11987 case OpCompositeExtract:
11988 case OpVectorExtractDynamic:
11989 case OpSampledImage:
11990 case OpImage:
11991 case OpCopyObject:
11992
11993 case OpImageRead:
11994 case OpImageFetch:
11995 case OpImageSampleImplicitLod:
11996 case OpImageSampleProjImplicitLod:
11997 case OpImageSampleDrefImplicitLod:
11998 case OpImageSampleProjDrefImplicitLod:
11999 case OpImageSampleExplicitLod:
12000 case OpImageSampleProjExplicitLod:
12001 case OpImageSampleDrefExplicitLod:
12002 case OpImageSampleProjDrefExplicitLod:
12003 case OpImageGather:
12004 case OpImageDrefGather:
12005 case OpImageSparseRead:
12006 case OpImageSparseFetch:
12007 case OpImageSparseSampleImplicitLod:
12008 case OpImageSparseSampleProjImplicitLod:
12009 case OpImageSparseSampleDrefImplicitLod:
12010 case OpImageSparseSampleProjDrefImplicitLod:
12011 case OpImageSparseSampleExplicitLod:
12012 case OpImageSparseSampleProjExplicitLod:
12013 case OpImageSparseSampleDrefExplicitLod:
12014 case OpImageSparseSampleProjDrefExplicitLod:
12015 case OpImageSparseGather:
12016 case OpImageSparseDrefGather:
12017 arg_count = 1;
12018 return true;
12019
12020 case OpVectorShuffle:
12021 arg_count = 2;
12022 return true;
12023
12024 case OpCompositeConstruct:
12025 return true;
12026
12027 default:
12028 break;
12029 }
12030
12031 return false;
12032}
12033
12034CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
12035{
12036 auto ops = stream_mutable(instr: instruction);
12037 auto opcode = static_cast<Op>(instruction.op);
12038 uint32_t length = instruction.length;
12039
12040 if (backend.requires_relaxed_precision_analysis)
12041 {
12042 if (length > 2)
12043 {
12044 uint32_t forwarding_length = length - 2;
12045
12046 if (opcode_is_precision_sensitive_operation(op: opcode))
12047 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[2], length: forwarding_length);
12048 else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(id: ops[2]).ext == SPIRExtension::GLSL)
12049 analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[4], length: forwarding_length - 2);
12050 else if (opcode_is_precision_forwarding_instruction(op: opcode, arg_count&: forwarding_length))
12051 forward_relaxed_precision(dst_id: ops[1], args: &ops[2], length: forwarding_length);
12052 }
12053
12054 uint32_t result_type = 0, result_id = 0;
12055 if (instruction_to_result_type(result_type, result_id, op: opcode, args: ops, length))
12056 {
12057 auto itr = temporary_to_mirror_precision_alias.find(x: ops[1]);
12058 if (itr != temporary_to_mirror_precision_alias.end())
12059 return { .dst_id: itr->second, .src_id: itr->first };
12060 }
12061 }
12062
12063 return {};
12064}
12065
12066void CompilerGLSL::emit_instruction(const Instruction &instruction)
12067{
12068 auto ops = stream(instr: instruction);
12069 auto opcode = static_cast<Op>(instruction.op);
12070 uint32_t length = instruction.length;
12071
12072#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
12073#define GLSL_BOP_CAST(op, type) \
12074 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
12075 opcode_is_sign_invariant(opcode), implicit_integer_promotion)
12076#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
12077#define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
12078#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
12079#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
12080#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
12081#define GLSL_BFOP_CAST(op, type) \
12082 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
12083#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
12084#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
12085
12086 // If we need to do implicit bitcasts, make sure we do it with the correct type.
12087 uint32_t integer_width = get_integer_width_for_instruction(instr: instruction);
12088 auto int_type = to_signed_basetype(width: integer_width);
12089 auto uint_type = to_unsigned_basetype(width: integer_width);
12090
12091 // Handle C implicit integer promotion rules.
12092 // If we get implicit promotion to int, need to make sure we cast by value to intended return type,
12093 // otherwise, future sign-dependent operations and bitcasts will break.
12094 bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
12095 opcode_can_promote_integer_implicitly(opcode) &&
12096 get<SPIRType>(id: ops[0]).vecsize == 1;
12097
12098 opcode = get_remapped_spirv_op(op: opcode);
12099
12100 switch (opcode)
12101 {
12102 // Dealing with memory
12103 case OpLoad:
12104 {
12105 uint32_t result_type = ops[0];
12106 uint32_t id = ops[1];
12107 uint32_t ptr = ops[2];
12108
12109 flush_variable_declaration(id: ptr);
12110
12111 // If we're loading from memory that cannot be changed by the shader,
12112 // just forward the expression directly to avoid needless temporaries.
12113 // If an expression is mutable and forwardable, we speculate that it is immutable.
12114 bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
12115
12116 // If loading a non-native row-major matrix, mark the expression as need_transpose.
12117 bool need_transpose = false;
12118 bool old_need_transpose = false;
12119
12120 auto *ptr_expression = maybe_get<SPIRExpression>(id: ptr);
12121
12122 if (forward)
12123 {
12124 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
12125 // taking the expression.
12126 if (ptr_expression && ptr_expression->need_transpose)
12127 {
12128 old_need_transpose = true;
12129 ptr_expression->need_transpose = false;
12130 need_transpose = true;
12131 }
12132 else if (is_non_native_row_major_matrix(id: ptr))
12133 need_transpose = true;
12134 }
12135
12136 // If we are forwarding this load,
12137 // don't register the read to access chain here, defer that to when we actually use the expression,
12138 // using the add_implied_read_expression mechanism.
12139 string expr;
12140
12141 bool is_packed = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12142 bool is_remapped = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID);
12143 if (forward || (!is_packed && !is_remapped))
12144 {
12145 // For the simple case, we do not need to deal with repacking.
12146 expr = to_dereferenced_expression(id: ptr, register_expression_read: false);
12147 }
12148 else
12149 {
12150 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
12151 // storing the expression to a temporary.
12152 expr = to_unpacked_expression(id: ptr);
12153 }
12154
12155 auto &type = get<SPIRType>(id: result_type);
12156 auto &expr_type = expression_type(id: ptr);
12157
12158 // If the expression has more vector components than the result type, insert
12159 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
12160 // happen with e.g. the MSL backend replacing the type of an input variable.
12161 if (expr_type.vecsize > type.vecsize)
12162 expr = enclose_expression(expr: expr + vector_swizzle(vecsize: type.vecsize, index: 0));
12163
12164 if (forward && ptr_expression)
12165 ptr_expression->need_transpose = old_need_transpose;
12166
12167 // We might need to cast in order to load from a builtin.
12168 cast_from_variable_load(source_id: ptr, expr, expr_type: type);
12169
12170 if (forward && ptr_expression)
12171 ptr_expression->need_transpose = false;
12172
12173 // We might be trying to load a gl_Position[N], where we should be
12174 // doing float4[](gl_in[i].gl_Position, ...) instead.
12175 // Similar workarounds are required for input arrays in tessellation.
12176 // Also, loading from gl_SampleMask array needs special unroll.
12177 unroll_array_from_complex_load(target_id: id, source_id: ptr, expr);
12178
12179 if (!type_is_opaque_value(type) && has_decoration(id: ptr, decoration: DecorationNonUniform))
12180 {
12181 // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
12182 convert_non_uniform_expression(expr, ptr_id: ptr);
12183 }
12184
12185 if (forward && ptr_expression)
12186 ptr_expression->need_transpose = old_need_transpose;
12187
12188 bool flattened = ptr_expression && flattened_buffer_blocks.count(x: ptr_expression->loaded_from) != 0;
12189
12190 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(id: ptr) && !flattened)
12191 rewrite_load_for_wrapped_row_major(expr, loaded_type: result_type, ptr);
12192
12193 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
12194 // However, if we try to load a complex, composite object from a flattened buffer,
12195 // we should avoid emitting the same code over and over and lower the result to a temporary.
12196 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
12197
12198 SPIRExpression *e = nullptr;
12199 if (!forward && expression_is_non_value_type_array(ptr))
12200 {
12201 // Complicated load case where we need to make a copy of ptr, but we cannot, because
12202 // it is an array, and our backend does not support arrays as value types.
12203 // Emit the temporary, and copy it explicitly.
12204 e = &emit_uninitialized_temporary_expression(type: result_type, id);
12205 emit_array_copy(expr: nullptr, lhs_id: id, rhs_id: ptr, lhs_storage: StorageClassFunction, rhs_storage: get_expression_effective_storage_class(ptr));
12206 }
12207 else
12208 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: forward, suppress_usage_tracking: !usage_tracking);
12209
12210 e->need_transpose = need_transpose;
12211 register_read(expr: id, chain: ptr, forwarded: forward);
12212
12213 if (forward)
12214 {
12215 // Pass through whether the result is of a packed type and the physical type ID.
12216 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked))
12217 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12218 if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID))
12219 {
12220 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID,
12221 value: get_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID));
12222 }
12223 }
12224 else
12225 {
12226 // This might have been set on an earlier compilation iteration, force it to be unset.
12227 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12228 unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID);
12229 }
12230
12231 inherit_expression_dependencies(dst: id, source: ptr);
12232 if (forward)
12233 add_implied_read_expression(e&: *e, source: ptr);
12234 break;
12235 }
12236
12237 case OpInBoundsAccessChain:
12238 case OpAccessChain:
12239 case OpPtrAccessChain:
12240 {
12241 auto *var = maybe_get<SPIRVariable>(id: ops[2]);
12242 if (var)
12243 flush_variable_declaration(id: var->self);
12244
12245 // If the base is immutable, the access chain pointer must also be.
12246 // If an expression is mutable and forwardable, we speculate that it is immutable.
12247 AccessChainMeta meta;
12248 bool ptr_chain = opcode == OpPtrAccessChain;
12249 auto &target_type = get<SPIRType>(id: ops[0]);
12250 auto e = access_chain(base: ops[2], indices: &ops[3], count: length - 3, target_type, meta: &meta, ptr_chain);
12251
12252 // If the base is flattened UBO of struct type, the expression has to be a composite.
12253 // In that case, backends which do not support inline syntax need it to be bound to a temporary.
12254 // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
12255 bool requires_temporary = false;
12256 if (flattened_buffer_blocks.count(x: ops[2]) && target_type.basetype == SPIRType::Struct)
12257 requires_temporary = !backend.can_declare_struct_inline;
12258
12259 auto &expr = requires_temporary ?
12260 emit_op(result_type: ops[0], result_id: ops[1], rhs: std::move(e), forwarding: false) :
12261 set<SPIRExpression>(id: ops[1], args: std::move(e), args: ops[0], args: should_forward(id: ops[2]));
12262
12263 auto *backing_variable = maybe_get_backing_variable(chain: ops[2]);
12264 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
12265 expr.need_transpose = meta.need_transpose;
12266 expr.access_chain = true;
12267 expr.access_meshlet_position_y = meta.access_meshlet_position_y;
12268
12269 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
12270 if (meta.storage_is_packed)
12271 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypePacked);
12272 if (meta.storage_physical_type != 0)
12273 set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
12274 if (meta.storage_is_invariant)
12275 set_decoration(id: ops[1], decoration: DecorationInvariant);
12276 if (meta.flattened_struct)
12277 flattened_structs[ops[1]] = true;
12278 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
12279 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
12280
12281 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
12282 // temporary which could be subject to invalidation.
12283 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
12284 forwarded_temporaries.insert(x: ops[1]);
12285 // The access chain itself is never forced to a temporary, but its dependencies might.
12286 suppressed_usage_tracking.insert(x: ops[1]);
12287
12288 for (uint32_t i = 2; i < length; i++)
12289 {
12290 inherit_expression_dependencies(dst: ops[1], source: ops[i]);
12291 add_implied_read_expression(e&: expr, source: ops[i]);
12292 }
12293
12294 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
12295 // we're not forwarded after all.
12296 if (expr.expression_dependencies.empty())
12297 forwarded_temporaries.erase(x: ops[1]);
12298
12299 break;
12300 }
12301
12302 case OpStore:
12303 {
12304 auto *var = maybe_get<SPIRVariable>(id: ops[0]);
12305
12306 if (var && var->statically_assigned)
12307 var->static_expression = ops[1];
12308 else if (var && var->loop_variable && !var->loop_variable_enable)
12309 var->static_expression = ops[1];
12310 else if (var && var->remapped_variable && var->static_expression)
12311 {
12312 // Skip the write.
12313 }
12314 else if (flattened_structs.count(x: ops[0]))
12315 {
12316 store_flattened_struct(lhs_id: ops[0], value: ops[1]);
12317 register_write(chain: ops[0]);
12318 }
12319 else
12320 {
12321 emit_store_statement(lhs_expression: ops[0], rhs_expression: ops[1]);
12322 }
12323
12324 // Storing a pointer results in a variable pointer, so we must conservatively assume
12325 // we can write through it.
12326 if (expression_type(id: ops[1]).pointer)
12327 register_write(chain: ops[1]);
12328 break;
12329 }
12330
12331 case OpArrayLength:
12332 {
12333 uint32_t result_type = ops[0];
12334 uint32_t id = ops[1];
12335 auto e = access_chain_internal(base: ops[2], indices: &ops[3], count: length - 3, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12336 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
12337 convert_non_uniform_expression(expr&: e, ptr_id: ops[2]);
12338 set<SPIRExpression>(id, args: join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts&: e, ts: ".length())"), args&: result_type,
12339 args: true);
12340 break;
12341 }
12342
12343 // Function calls
12344 case OpFunctionCall:
12345 {
12346 uint32_t result_type = ops[0];
12347 uint32_t id = ops[1];
12348 uint32_t func = ops[2];
12349 const auto *arg = &ops[3];
12350 length -= 3;
12351
12352 auto &callee = get<SPIRFunction>(id: func);
12353 auto &return_type = get<SPIRType>(id: callee.return_type);
12354 bool pure = function_is_pure(func: callee);
12355 bool control_dependent = function_is_control_dependent(func: callee);
12356
12357 bool callee_has_out_variables = false;
12358 bool emit_return_value_as_argument = false;
12359
12360 // Invalidate out variables passed to functions since they can be OpStore'd to.
12361 for (uint32_t i = 0; i < length; i++)
12362 {
12363 if (callee.arguments[i].write_count)
12364 {
12365 register_call_out_argument(id: arg[i]);
12366 callee_has_out_variables = true;
12367 }
12368
12369 flush_variable_declaration(id: arg[i]);
12370 }
12371
12372 if (!return_type.array.empty() && !backend.can_return_array)
12373 {
12374 callee_has_out_variables = true;
12375 emit_return_value_as_argument = true;
12376 }
12377
12378 if (!pure)
12379 register_impure_function_call();
12380
12381 string funexpr;
12382 SmallVector<string> arglist;
12383 funexpr += to_name(id: func) + "(";
12384
12385 if (emit_return_value_as_argument)
12386 {
12387 statement(ts: type_to_glsl(type: return_type), ts: " ", ts: to_name(id), ts: type_to_array_glsl(type: return_type, variable_id: 0), ts: ";");
12388 arglist.push_back(t: to_name(id));
12389 }
12390
12391 for (uint32_t i = 0; i < length; i++)
12392 {
12393 // Do not pass in separate images or samplers if we're remapping
12394 // to combined image samplers.
12395 if (skip_argument(id: arg[i]))
12396 continue;
12397
12398 arglist.push_back(t: to_func_call_arg(callee.arguments[i], id: arg[i]));
12399 }
12400
12401 for (auto &combined : callee.combined_parameters)
12402 {
12403 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
12404 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
12405 arglist.push_back(t: to_combined_image_sampler(image_id, samp_id: sampler_id));
12406 }
12407
12408 append_global_func_args(func: callee, index: length, arglist);
12409
12410 funexpr += merge(list: arglist);
12411 funexpr += ")";
12412
12413 // Check for function call constraints.
12414 check_function_call_constraints(args: arg, length);
12415
12416 if (return_type.basetype != SPIRType::Void)
12417 {
12418 // If the function actually writes to an out variable,
12419 // take the conservative route and do not forward.
12420 // The problem is that we might not read the function
12421 // result (and emit the function) before an out variable
12422 // is read (common case when return value is ignored!
12423 // In order to avoid start tracking invalid variables,
12424 // just avoid the forwarding problem altogether.
12425 bool forward = args_will_forward(id, args: arg, num_args: length, pure) && !callee_has_out_variables && pure &&
12426 (forced_temporaries.find(x: id) == end(cont&: forced_temporaries));
12427
12428 if (emit_return_value_as_argument)
12429 {
12430 statement(ts&: funexpr, ts: ";");
12431 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12432 }
12433 else
12434 emit_op(result_type, result_id: id, rhs: funexpr, forwarding: forward);
12435
12436 // Function calls are implicit loads from all variables in question.
12437 // Set dependencies for them.
12438 for (uint32_t i = 0; i < length; i++)
12439 register_read(expr: id, chain: arg[i], forwarded: forward);
12440
12441 // If we're going to forward the temporary result,
12442 // put dependencies on every variable that must not change.
12443 if (forward)
12444 register_global_read_dependencies(func: callee, id);
12445 }
12446 else
12447 statement(ts&: funexpr, ts: ";");
12448
12449 if (control_dependent)
12450 register_control_dependent_expression(expr: id);
12451
12452 break;
12453 }
12454
12455 // Composite munging
12456 case OpCompositeConstruct:
12457 {
12458 uint32_t result_type = ops[0];
12459 uint32_t id = ops[1];
12460 const auto *const elems = &ops[2];
12461 length -= 2;
12462
12463 bool forward = true;
12464 for (uint32_t i = 0; i < length; i++)
12465 forward = forward && should_forward(id: elems[i]);
12466
12467 auto &out_type = get<SPIRType>(id: result_type);
12468 auto *in_type = length > 0 ? &expression_type(id: elems[0]) : nullptr;
12469
12470 // Only splat if we have vector constructors.
12471 // Arrays and structs must be initialized properly in full.
12472 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
12473
12474 bool splat = false;
12475 bool swizzle_splat = false;
12476
12477 if (in_type)
12478 {
12479 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
12480 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
12481
12482 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(type: *in_type))
12483 {
12484 // Cannot swizzle literal integers as a special case.
12485 swizzle_splat = false;
12486 }
12487 }
12488
12489 if (splat || swizzle_splat)
12490 {
12491 uint32_t input = elems[0];
12492 for (uint32_t i = 0; i < length; i++)
12493 {
12494 if (input != elems[i])
12495 {
12496 splat = false;
12497 swizzle_splat = false;
12498 }
12499 }
12500 }
12501
12502 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
12503 forward = false;
12504 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
12505 forward = false;
12506 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12507 forward = false;
12508
12509 string constructor_op;
12510 if (backend.use_initializer_list && composite)
12511 {
12512 bool needs_trailing_tracket = false;
12513 // Only use this path if we are building composites.
12514 // This path cannot be used for arithmetic.
12515 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
12516 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
12517 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
12518 {
12519 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
12520 constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
12521 needs_trailing_tracket = true;
12522 }
12523 constructor_op += "{ ";
12524
12525 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12526 constructor_op += "0";
12527 else if (splat)
12528 constructor_op += to_unpacked_expression(id: elems[0]);
12529 else
12530 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
12531 constructor_op += " }";
12532 if (needs_trailing_tracket)
12533 constructor_op += ")";
12534 }
12535 else if (swizzle_splat && !composite)
12536 {
12537 constructor_op = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 1, expr: to_unpacked_expression(id: elems[0]));
12538 }
12539 else
12540 {
12541 constructor_op = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(";
12542 if (type_is_empty(type: out_type) && !backend.supports_empty_struct)
12543 constructor_op += "0";
12544 else if (splat)
12545 constructor_op += to_unpacked_expression(id: elems[0]);
12546 else
12547 constructor_op += build_composite_combiner(return_type: result_type, elems, length);
12548 constructor_op += ")";
12549 }
12550
12551 if (!constructor_op.empty())
12552 {
12553 emit_op(result_type, result_id: id, rhs: constructor_op, forwarding: forward);
12554 for (uint32_t i = 0; i < length; i++)
12555 inherit_expression_dependencies(dst: id, source: elems[i]);
12556 }
12557 break;
12558 }
12559
12560 case OpVectorInsertDynamic:
12561 {
12562 uint32_t result_type = ops[0];
12563 uint32_t id = ops[1];
12564 uint32_t vec = ops[2];
12565 uint32_t comp = ops[3];
12566 uint32_t index = ops[4];
12567
12568 flush_variable_declaration(id: vec);
12569
12570 // Make a copy, then use access chain to store the variable.
12571 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: vec), ts: ";");
12572 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12573 auto chain = access_chain_internal(base: id, indices: &index, count: 1, flags: 0, meta: nullptr);
12574 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: comp), ts: ";");
12575 break;
12576 }
12577
12578 case OpVectorExtractDynamic:
12579 {
12580 uint32_t result_type = ops[0];
12581 uint32_t id = ops[1];
12582
12583 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: 1, flags: 0, meta: nullptr);
12584 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
12585 inherit_expression_dependencies(dst: id, source: ops[2]);
12586 inherit_expression_dependencies(dst: id, source: ops[3]);
12587 break;
12588 }
12589
12590 case OpCompositeExtract:
12591 {
12592 uint32_t result_type = ops[0];
12593 uint32_t id = ops[1];
12594 length -= 3;
12595
12596 auto &type = get<SPIRType>(id: result_type);
12597
12598 // We can only split the expression here if our expression is forwarded as a temporary.
12599 bool allow_base_expression = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
12600
12601 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
12602 auto &composite_type = expression_type(id: ops[2]);
12603 bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
12604 if (composite_type_is_complex)
12605 allow_base_expression = false;
12606
12607 // Packed expressions or physical ID mapped expressions cannot be split up.
12608 if (has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypePacked) ||
12609 has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypeID))
12610 allow_base_expression = false;
12611
12612 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
12613 // into the base expression.
12614 if (is_non_native_row_major_matrix(id: ops[2]))
12615 allow_base_expression = false;
12616
12617 AccessChainMeta meta;
12618 SPIRExpression *e = nullptr;
12619 auto *c = maybe_get<SPIRConstant>(id: ops[2]);
12620
12621 if (c && !c->specialization && !composite_type_is_complex)
12622 {
12623 auto expr = to_extract_constant_composite_expression(result_type, c: *c, chain: ops + 3, length);
12624 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: true);
12625 }
12626 else if (allow_base_expression && should_forward(id: ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
12627 {
12628 // Only apply this optimization if result is scalar.
12629
12630 // We want to split the access chain from the base.
12631 // This is so we can later combine different CompositeExtract results
12632 // with CompositeConstruct without emitting code like
12633 //
12634 // vec3 temp = texture(...).xyz
12635 // vec4(temp.x, temp.y, temp.z, 1.0).
12636 //
12637 // when we actually wanted to emit this
12638 // vec4(texture(...).xyz, 1.0).
12639 //
12640 // Including the base will prevent this and would trigger multiple reads
12641 // from expression causing it to be forced to an actual temporary in GLSL.
12642 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
12643 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
12644 ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
12645 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
12646 inherit_expression_dependencies(dst: id, source: ops[2]);
12647 e->base_expression = ops[2];
12648
12649 if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
12650 set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision);
12651 }
12652 else
12653 {
12654 auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length,
12655 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta);
12656 e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]), suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2]));
12657 inherit_expression_dependencies(dst: id, source: ops[2]);
12658 }
12659
12660 // Pass through some meta information to the loaded expression.
12661 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
12662 // instead of loading everything through an access chain.
12663 e->need_transpose = meta.need_transpose;
12664 if (meta.storage_is_packed)
12665 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
12666 if (meta.storage_physical_type != 0)
12667 set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type);
12668 if (meta.storage_is_invariant)
12669 set_decoration(id, decoration: DecorationInvariant);
12670
12671 break;
12672 }
12673
12674 case OpCompositeInsert:
12675 {
12676 uint32_t result_type = ops[0];
12677 uint32_t id = ops[1];
12678 uint32_t obj = ops[2];
12679 uint32_t composite = ops[3];
12680 const auto *elems = &ops[4];
12681 length -= 4;
12682
12683 flush_variable_declaration(id: composite);
12684
12685 // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
12686 // Speculate that the input composite is no longer used, and we can modify it in-place.
12687 // There are various scenarios where this is not possible to satisfy.
12688 bool can_modify_in_place = true;
12689 forced_temporaries.insert(x: id);
12690
12691 // Cannot safely RMW PHI variables since they have no way to be invalidated,
12692 // forcing temporaries is not going to help.
12693 // This is similar for Constant and Undef inputs.
12694 // The only safe thing to RMW is SPIRExpression.
12695 // If the expression has already been used (i.e. used in a continue block), we have to keep using
12696 // that loop variable, since we won't be able to override the expression after the fact.
12697 // If the composite is hoisted, we might never be able to properly invalidate any usage
12698 // of that composite in a subsequent loop iteration.
12699 if (invalid_expressions.count(x: composite) ||
12700 block_composite_insert_overwrite.count(x: composite) ||
12701 hoisted_temporaries.count(x: id) || hoisted_temporaries.count(x: composite) ||
12702 maybe_get<SPIRExpression>(id: composite) == nullptr)
12703 {
12704 can_modify_in_place = false;
12705 }
12706 else if (backend.requires_relaxed_precision_analysis &&
12707 has_decoration(id: composite, decoration: DecorationRelaxedPrecision) !=
12708 has_decoration(id, decoration: DecorationRelaxedPrecision) &&
12709 get<SPIRType>(id: result_type).basetype != SPIRType::Struct)
12710 {
12711 // Similarly, if precision does not match for input and output,
12712 // we cannot alias them. If we write a composite into a relaxed precision
12713 // ID, we might get a false truncation.
12714 can_modify_in_place = false;
12715 }
12716
12717 if (can_modify_in_place)
12718 {
12719 // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
12720 if (!forced_temporaries.count(x: composite))
12721 force_temporary_and_recompile(id: composite);
12722
12723 auto chain = access_chain_internal(base: composite, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12724 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
12725 set<SPIRExpression>(id, args: to_expression(id: composite), args&: result_type, args: true);
12726 invalid_expressions.insert(x: composite);
12727 composite_insert_overwritten.insert(x: composite);
12728 }
12729 else
12730 {
12731 if (maybe_get<SPIRUndef>(id: composite) != nullptr)
12732 {
12733 emit_uninitialized_temporary_expression(type: result_type, id);
12734 }
12735 else
12736 {
12737 // Make a copy, then use access chain to store the variable.
12738 statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: composite), ts: ";");
12739 set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true);
12740 }
12741
12742 auto chain = access_chain_internal(base: id, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr);
12743 statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";");
12744 }
12745
12746 break;
12747 }
12748
12749 case OpCopyMemory:
12750 {
12751 uint32_t lhs = ops[0];
12752 uint32_t rhs = ops[1];
12753 if (lhs != rhs)
12754 {
12755 uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
12756 if (!tmp_id)
12757 tmp_id = ir.increase_bound_by(count: 1);
12758 uint32_t tmp_type_id = expression_type(id: rhs).parent_type;
12759
12760 EmbeddedInstruction fake_load, fake_store;
12761 fake_load.op = OpLoad;
12762 fake_load.length = 3;
12763 fake_load.ops.push_back(t: tmp_type_id);
12764 fake_load.ops.push_back(t: tmp_id);
12765 fake_load.ops.push_back(t: rhs);
12766
12767 fake_store.op = OpStore;
12768 fake_store.length = 2;
12769 fake_store.ops.push_back(t: lhs);
12770 fake_store.ops.push_back(t: tmp_id);
12771
12772 // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
12773 // Synthesize a fake Load and Store pair for CopyMemory.
12774 emit_instruction(instruction: fake_load);
12775 emit_instruction(instruction: fake_store);
12776 }
12777 break;
12778 }
12779
12780 case OpCopyLogical:
12781 {
12782 // This is used for copying object of different types, arrays and structs.
12783 // We need to unroll the copy, element-by-element.
12784 uint32_t result_type = ops[0];
12785 uint32_t id = ops[1];
12786 uint32_t rhs = ops[2];
12787
12788 emit_uninitialized_temporary_expression(type: result_type, id);
12789 emit_copy_logical_type(lhs_id: id, lhs_type_id: result_type, rhs_id: rhs, rhs_type_id: expression_type_id(id: rhs), chain: {});
12790 break;
12791 }
12792
12793 case OpCopyObject:
12794 {
12795 uint32_t result_type = ops[0];
12796 uint32_t id = ops[1];
12797 uint32_t rhs = ops[2];
12798 bool pointer = get<SPIRType>(id: result_type).pointer;
12799
12800 auto *chain = maybe_get<SPIRAccessChain>(id: rhs);
12801 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(id: rhs);
12802 if (chain)
12803 {
12804 // Cannot lower to a SPIRExpression, just copy the object.
12805 auto &e = set<SPIRAccessChain>(id, args&: *chain);
12806 e.self = id;
12807 }
12808 else if (imgsamp)
12809 {
12810 // Cannot lower to a SPIRExpression, just copy the object.
12811 // GLSL does not currently use this type and will never get here, but MSL does.
12812 // Handled here instead of CompilerMSL for better integration and general handling,
12813 // and in case GLSL or other subclasses require it in the future.
12814 auto &e = set<SPIRCombinedImageSampler>(id, args&: *imgsamp);
12815 e.self = id;
12816 }
12817 else if (expression_is_lvalue(id: rhs) && !pointer)
12818 {
12819 // Need a copy.
12820 // For pointer types, we copy the pointer itself.
12821 emit_op(result_type, result_id: id, rhs: to_unpacked_expression(id: rhs), forwarding: false);
12822 }
12823 else
12824 {
12825 // RHS expression is immutable, so just forward it.
12826 // Copying these things really make no sense, but
12827 // seems to be allowed anyways.
12828 auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: rhs), forwarding: true, suppress_usage_tracking: true);
12829 if (pointer)
12830 {
12831 auto *var = maybe_get_backing_variable(chain: rhs);
12832 e.loaded_from = var ? var->self : ID(0);
12833 }
12834
12835 // If we're copying an access chain, need to inherit the read expressions.
12836 auto *rhs_expr = maybe_get<SPIRExpression>(id: rhs);
12837 if (rhs_expr)
12838 {
12839 e.implied_read_expressions = rhs_expr->implied_read_expressions;
12840 e.expression_dependencies = rhs_expr->expression_dependencies;
12841 }
12842 }
12843 break;
12844 }
12845
12846 case OpVectorShuffle:
12847 {
12848 uint32_t result_type = ops[0];
12849 uint32_t id = ops[1];
12850 uint32_t vec0 = ops[2];
12851 uint32_t vec1 = ops[3];
12852 const auto *elems = &ops[4];
12853 length -= 4;
12854
12855 auto &type0 = expression_type(id: vec0);
12856
12857 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
12858 // or in our case, T(0).
12859 bool shuffle = false;
12860 for (uint32_t i = 0; i < length; i++)
12861 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
12862 shuffle = true;
12863
12864 // Cannot use swizzles with packed expressions, force shuffle path.
12865 if (!shuffle && has_extended_decoration(id: vec0, decoration: SPIRVCrossDecorationPhysicalTypePacked))
12866 shuffle = true;
12867
12868 string expr;
12869 bool should_fwd, trivial_forward;
12870
12871 if (shuffle)
12872 {
12873 should_fwd = should_forward(id: vec0) && should_forward(id: vec1);
12874 trivial_forward = should_suppress_usage_tracking(id: vec0) && should_suppress_usage_tracking(id: vec1);
12875
12876 // Constructor style and shuffling from two different vectors.
12877 SmallVector<string> args;
12878 for (uint32_t i = 0; i < length; i++)
12879 {
12880 if (elems[i] == 0xffffffffu)
12881 {
12882 // Use a constant 0 here.
12883 // We could use the first component or similar, but then we risk propagating
12884 // a value we might not need, and bog down codegen.
12885 SPIRConstant c;
12886 c.constant_type = type0.parent_type;
12887 assert(type0.parent_type != ID(0));
12888 args.push_back(t: constant_expression(c));
12889 }
12890 else if (elems[i] >= type0.vecsize)
12891 args.push_back(t: to_extract_component_expression(id: vec1, index: elems[i] - type0.vecsize));
12892 else
12893 args.push_back(t: to_extract_component_expression(id: vec0, index: elems[i]));
12894 }
12895 expr += join(ts: type_to_glsl_constructor(type: get<SPIRType>(id: result_type)), ts: "(", ts: merge(list: args), ts: ")");
12896 }
12897 else
12898 {
12899 should_fwd = should_forward(id: vec0);
12900 trivial_forward = should_suppress_usage_tracking(id: vec0);
12901
12902 // We only source from first vector, so can use swizzle.
12903 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
12904 expr += to_enclosed_unpacked_expression(id: vec0);
12905 expr += ".";
12906 for (uint32_t i = 0; i < length; i++)
12907 {
12908 assert(elems[i] != 0xffffffffu);
12909 expr += index_to_swizzle(index: elems[i]);
12910 }
12911
12912 if (backend.swizzle_is_function && length > 1)
12913 expr += "()";
12914 }
12915
12916 // A shuffle is trivial in that it doesn't actually *do* anything.
12917 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
12918
12919 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_fwd, suppress_usage_tracking: trivial_forward);
12920
12921 inherit_expression_dependencies(dst: id, source: vec0);
12922 if (vec0 != vec1)
12923 inherit_expression_dependencies(dst: id, source: vec1);
12924 break;
12925 }
12926
12927 // ALU
12928 case OpIsNan:
12929 if (!is_legacy())
12930 GLSL_UFOP(isnan);
12931 else
12932 {
12933 // Check if the number doesn't equal itself
12934 auto &type = get<SPIRType>(id: ops[0]);
12935 if (type.vecsize > 1)
12936 emit_binary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "notEqual");
12937 else
12938 emit_binary_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "!=");
12939 }
12940 break;
12941
12942 case OpIsInf:
12943 if (!is_legacy())
12944 GLSL_UFOP(isinf);
12945 else
12946 {
12947 // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0
12948 // This is more reliable than checking if product with zero is NaN
12949 uint32_t result_type = ops[0];
12950 uint32_t result_id = ops[1];
12951 uint32_t operand = ops[2];
12952
12953 auto &type = get<SPIRType>(id: result_type);
12954 std::string expr;
12955 if (type.vecsize > 1)
12956 {
12957 expr = type_to_glsl_constructor(type);
12958 expr += '(';
12959 for (uint32_t i = 0; i < type.vecsize; i++)
12960 {
12961 auto comp = to_extract_component_expression(id: operand, index: i);
12962 expr += join(ts&: comp, ts: " != 0.0 && 2.0 * ", ts&: comp, ts: " == ", ts&: comp);
12963
12964 if (i + 1 < type.vecsize)
12965 expr += ", ";
12966 }
12967 expr += ')';
12968 }
12969 else
12970 {
12971 // Register an extra read to force writing out a temporary
12972 auto oper = to_enclosed_expression(id: operand);
12973 track_expression_read(id: operand);
12974 expr += join(ts&: oper, ts: " != 0.0 && 2.0 * ", ts&: oper, ts: " == ", ts&: oper);
12975 }
12976 emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand));
12977
12978 inherit_expression_dependencies(dst: result_id, source: operand);
12979 }
12980 break;
12981
12982 case OpSNegate:
12983 if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0])
12984 GLSL_UOP_CAST(-);
12985 else
12986 GLSL_UOP(-);
12987 break;
12988
12989 case OpFNegate:
12990 GLSL_UOP(-);
12991 break;
12992
12993 case OpIAdd:
12994 {
12995 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
12996 auto type = get<SPIRType>(id: ops[0]).basetype;
12997 GLSL_BOP_CAST(+, type);
12998 break;
12999 }
13000
13001 case OpFAdd:
13002 GLSL_BOP(+);
13003 break;
13004
13005 case OpISub:
13006 {
13007 auto type = get<SPIRType>(id: ops[0]).basetype;
13008 GLSL_BOP_CAST(-, type);
13009 break;
13010 }
13011
13012 case OpFSub:
13013 GLSL_BOP(-);
13014 break;
13015
13016 case OpIMul:
13017 {
13018 auto type = get<SPIRType>(id: ops[0]).basetype;
13019 GLSL_BOP_CAST(*, type);
13020 break;
13021 }
13022
13023 case OpVectorTimesMatrix:
13024 case OpMatrixTimesVector:
13025 {
13026 // If the matrix needs transpose, just flip the multiply order.
13027 auto *e = maybe_get<SPIRExpression>(id: ops[opcode == OpMatrixTimesVector ? 2 : 3]);
13028 if (e && e->need_transpose)
13029 {
13030 e->need_transpose = false;
13031 string expr;
13032
13033 if (opcode == OpMatrixTimesVector)
13034 expr = join(ts: to_enclosed_unpacked_expression(id: ops[3]), ts: " * ",
13035 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
13036 else
13037 expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
13038 ts: to_enclosed_unpacked_expression(id: ops[2]));
13039
13040 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13041 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13042 e->need_transpose = true;
13043 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13044 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13045 }
13046 else
13047 GLSL_BOP(*);
13048 break;
13049 }
13050
13051 case OpMatrixTimesMatrix:
13052 {
13053 auto *a = maybe_get<SPIRExpression>(id: ops[2]);
13054 auto *b = maybe_get<SPIRExpression>(id: ops[3]);
13055
13056 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
13057 // a^T * b^T = (b * a)^T.
13058 if (a && b && a->need_transpose && b->need_transpose)
13059 {
13060 a->need_transpose = false;
13061 b->need_transpose = false;
13062 auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ",
13063 ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])));
13064 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13065 auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13066 e.need_transpose = true;
13067 a->need_transpose = true;
13068 b->need_transpose = true;
13069 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13070 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13071 }
13072 else
13073 GLSL_BOP(*);
13074
13075 break;
13076 }
13077
13078 case OpMatrixTimesScalar:
13079 {
13080 auto *a = maybe_get<SPIRExpression>(id: ops[2]);
13081
13082 // If the matrix need transpose, just mark the result as needing so.
13083 if (a && a->need_transpose)
13084 {
13085 a->need_transpose = false;
13086 auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])), ts: " * ",
13087 ts: to_enclosed_unpacked_expression(id: ops[3]));
13088 bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]);
13089 auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward);
13090 e.need_transpose = true;
13091 a->need_transpose = true;
13092 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13093 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13094 }
13095 else
13096 GLSL_BOP(*);
13097 break;
13098 }
13099
13100 case OpFMul:
13101 case OpVectorTimesScalar:
13102 GLSL_BOP(*);
13103 break;
13104
13105 case OpOuterProduct:
13106 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
13107 {
13108 uint32_t result_type = ops[0];
13109 uint32_t id = ops[1];
13110 uint32_t a = ops[2];
13111 uint32_t b = ops[3];
13112
13113 auto &type = get<SPIRType>(id: result_type);
13114 string expr = type_to_glsl_constructor(type);
13115 expr += "(";
13116 for (uint32_t col = 0; col < type.columns; col++)
13117 {
13118 expr += to_enclosed_expression(id: a);
13119 expr += " * ";
13120 expr += to_extract_component_expression(id: b, index: col);
13121 if (col + 1 < type.columns)
13122 expr += ", ";
13123 }
13124 expr += ")";
13125 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: a) && should_forward(id: b));
13126 inherit_expression_dependencies(dst: id, source: a);
13127 inherit_expression_dependencies(dst: id, source: b);
13128 }
13129 else
13130 GLSL_BFOP(outerProduct);
13131 break;
13132
13133 case OpDot:
13134 GLSL_BFOP(dot);
13135 break;
13136
13137 case OpTranspose:
13138 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
13139 {
13140 // transpose() is not available, so instead, flip need_transpose,
13141 // which can later be turned into an emulated transpose op by
13142 // convert_row_major_matrix(), if necessary.
13143 uint32_t result_type = ops[0];
13144 uint32_t result_id = ops[1];
13145 uint32_t input = ops[2];
13146
13147 // Force need_transpose to false temporarily to prevent
13148 // to_expression() from doing the transpose.
13149 bool need_transpose = false;
13150 auto *input_e = maybe_get<SPIRExpression>(id: input);
13151 if (input_e)
13152 swap(a&: need_transpose, b&: input_e->need_transpose);
13153
13154 bool forward = should_forward(id: input);
13155 auto &e = emit_op(result_type, result_id, rhs: to_expression(id: input), forwarding: forward);
13156 e.need_transpose = !need_transpose;
13157
13158 // Restore the old need_transpose flag.
13159 if (input_e)
13160 input_e->need_transpose = need_transpose;
13161 }
13162 else
13163 GLSL_UFOP(transpose);
13164 break;
13165
13166 case OpSRem:
13167 {
13168 uint32_t result_type = ops[0];
13169 uint32_t result_id = ops[1];
13170 uint32_t op0 = ops[2];
13171 uint32_t op1 = ops[3];
13172
13173 // Needs special handling.
13174 bool forward = should_forward(id: op0) && should_forward(id: op1);
13175 auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(",
13176 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
13177
13178 if (implicit_integer_promotion)
13179 expr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: '(', ts&: expr, ts: ')');
13180
13181 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
13182 inherit_expression_dependencies(dst: result_id, source: op0);
13183 inherit_expression_dependencies(dst: result_id, source: op1);
13184 break;
13185 }
13186
13187 case OpSDiv:
13188 GLSL_BOP_CAST(/, int_type);
13189 break;
13190
13191 case OpUDiv:
13192 GLSL_BOP_CAST(/, uint_type);
13193 break;
13194
13195 case OpIAddCarry:
13196 case OpISubBorrow:
13197 {
13198 if (options.es && options.version < 310)
13199 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
13200 else if (!options.es && options.version < 400)
13201 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
13202
13203 uint32_t result_type = ops[0];
13204 uint32_t result_id = ops[1];
13205 uint32_t op0 = ops[2];
13206 uint32_t op1 = ops[3];
13207 auto &type = get<SPIRType>(id: result_type);
13208 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
13209 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
13210
13211 statement(ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ",
13212 ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 1), ts: ");");
13213 break;
13214 }
13215
13216 case OpUMulExtended:
13217 case OpSMulExtended:
13218 {
13219 if (options.es && options.version < 310)
13220 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
13221 else if (!options.es && options.version < 400)
13222 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
13223
13224 uint32_t result_type = ops[0];
13225 uint32_t result_id = ops[1];
13226 uint32_t op0 = ops[2];
13227 uint32_t op1 = ops[3];
13228 auto &type = get<SPIRType>(id: result_type);
13229 emit_uninitialized_temporary_expression(type: result_type, id: result_id);
13230 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
13231
13232 statement(ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ", ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".",
13233 ts: to_member_name(type, index: 1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: ");");
13234 break;
13235 }
13236
13237 case OpFDiv:
13238 GLSL_BOP(/);
13239 break;
13240
13241 case OpShiftRightLogical:
13242 GLSL_BOP_CAST(>>, uint_type);
13243 break;
13244
13245 case OpShiftRightArithmetic:
13246 GLSL_BOP_CAST(>>, int_type);
13247 break;
13248
13249 case OpShiftLeftLogical:
13250 {
13251 auto type = get<SPIRType>(id: ops[0]).basetype;
13252 GLSL_BOP_CAST(<<, type);
13253 break;
13254 }
13255
13256 case OpBitwiseOr:
13257 {
13258 auto type = get<SPIRType>(id: ops[0]).basetype;
13259 GLSL_BOP_CAST(|, type);
13260 break;
13261 }
13262
13263 case OpBitwiseXor:
13264 {
13265 auto type = get<SPIRType>(id: ops[0]).basetype;
13266 GLSL_BOP_CAST(^, type);
13267 break;
13268 }
13269
13270 case OpBitwiseAnd:
13271 {
13272 auto type = get<SPIRType>(id: ops[0]).basetype;
13273 GLSL_BOP_CAST(&, type);
13274 break;
13275 }
13276
13277 case OpNot:
13278 if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0])
13279 GLSL_UOP_CAST(~);
13280 else
13281 GLSL_UOP(~);
13282 break;
13283
13284 case OpUMod:
13285 GLSL_BOP_CAST(%, uint_type);
13286 break;
13287
13288 case OpSMod:
13289 GLSL_BOP_CAST(%, int_type);
13290 break;
13291
13292 case OpFMod:
13293 GLSL_BFOP(mod);
13294 break;
13295
13296 case OpFRem:
13297 {
13298 uint32_t result_type = ops[0];
13299 uint32_t result_id = ops[1];
13300 uint32_t op0 = ops[2];
13301 uint32_t op1 = ops[3];
13302
13303 // Needs special handling.
13304 bool forward = should_forward(id: op0) && should_forward(id: op1);
13305 std::string expr;
13306 if (!is_legacy())
13307 {
13308 expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "trunc(",
13309 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")");
13310 }
13311 else
13312 {
13313 // Legacy GLSL has no trunc, emulate by casting to int and back
13314 auto &op0_type = expression_type(id: op0);
13315 auto via_type = op0_type;
13316 via_type.basetype = SPIRType::Int;
13317 expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ",
13318 ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(",
13319 ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: "))");
13320 }
13321
13322 emit_op(result_type, result_id, rhs: expr, forwarding: forward);
13323 inherit_expression_dependencies(dst: result_id, source: op0);
13324 inherit_expression_dependencies(dst: result_id, source: op1);
13325 break;
13326 }
13327
13328 // Relational
13329 case OpAny:
13330 GLSL_UFOP(any);
13331 break;
13332
13333 case OpAll:
13334 GLSL_UFOP(all);
13335 break;
13336
13337 case OpSelect:
13338 emit_mix_op(result_type: ops[0], id: ops[1], left: ops[4], right: ops[3], lerp: ops[2]);
13339 break;
13340
13341 case OpLogicalOr:
13342 {
13343 // No vector variant in GLSL for logical OR.
13344 auto result_type = ops[0];
13345 auto id = ops[1];
13346 auto &type = get<SPIRType>(id: result_type);
13347
13348 if (type.vecsize > 1)
13349 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "||", negate: false, expected_type: SPIRType::Unknown);
13350 else
13351 GLSL_BOP(||);
13352 break;
13353 }
13354
13355 case OpLogicalAnd:
13356 {
13357 // No vector variant in GLSL for logical AND.
13358 auto result_type = ops[0];
13359 auto id = ops[1];
13360 auto &type = get<SPIRType>(id: result_type);
13361
13362 if (type.vecsize > 1)
13363 emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "&&", negate: false, expected_type: SPIRType::Unknown);
13364 else
13365 GLSL_BOP(&&);
13366 break;
13367 }
13368
13369 case OpLogicalNot:
13370 {
13371 auto &type = get<SPIRType>(id: ops[0]);
13372 if (type.vecsize > 1)
13373 GLSL_UFOP(not );
13374 else
13375 GLSL_UOP(!);
13376 break;
13377 }
13378
13379 case OpIEqual:
13380 {
13381 if (expression_type(id: ops[2]).vecsize > 1)
13382 GLSL_BFOP_CAST(equal, int_type);
13383 else
13384 GLSL_BOP_CAST(==, int_type);
13385 break;
13386 }
13387
13388 case OpLogicalEqual:
13389 case OpFOrdEqual:
13390 {
13391 if (expression_type(id: ops[2]).vecsize > 1)
13392 GLSL_BFOP(equal);
13393 else
13394 GLSL_BOP(==);
13395 break;
13396 }
13397
13398 case OpINotEqual:
13399 {
13400 if (expression_type(id: ops[2]).vecsize > 1)
13401 GLSL_BFOP_CAST(notEqual, int_type);
13402 else
13403 GLSL_BOP_CAST(!=, int_type);
13404 break;
13405 }
13406
13407 case OpLogicalNotEqual:
13408 case OpFOrdNotEqual:
13409 case OpFUnordNotEqual:
13410 {
13411 // GLSL is fuzzy on what to do with ordered vs unordered not equal.
13412 // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
13413 // but this means we have no easy way of implementing ordered not equal.
13414 if (expression_type(id: ops[2]).vecsize > 1)
13415 GLSL_BFOP(notEqual);
13416 else
13417 GLSL_BOP(!=);
13418 break;
13419 }
13420
13421 case OpUGreaterThan:
13422 case OpSGreaterThan:
13423 {
13424 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
13425 if (expression_type(id: ops[2]).vecsize > 1)
13426 GLSL_BFOP_CAST(greaterThan, type);
13427 else
13428 GLSL_BOP_CAST(>, type);
13429 break;
13430 }
13431
13432 case OpFOrdGreaterThan:
13433 {
13434 if (expression_type(id: ops[2]).vecsize > 1)
13435 GLSL_BFOP(greaterThan);
13436 else
13437 GLSL_BOP(>);
13438 break;
13439 }
13440
13441 case OpUGreaterThanEqual:
13442 case OpSGreaterThanEqual:
13443 {
13444 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
13445 if (expression_type(id: ops[2]).vecsize > 1)
13446 GLSL_BFOP_CAST(greaterThanEqual, type);
13447 else
13448 GLSL_BOP_CAST(>=, type);
13449 break;
13450 }
13451
13452 case OpFOrdGreaterThanEqual:
13453 {
13454 if (expression_type(id: ops[2]).vecsize > 1)
13455 GLSL_BFOP(greaterThanEqual);
13456 else
13457 GLSL_BOP(>=);
13458 break;
13459 }
13460
13461 case OpULessThan:
13462 case OpSLessThan:
13463 {
13464 auto type = opcode == OpULessThan ? uint_type : int_type;
13465 if (expression_type(id: ops[2]).vecsize > 1)
13466 GLSL_BFOP_CAST(lessThan, type);
13467 else
13468 GLSL_BOP_CAST(<, type);
13469 break;
13470 }
13471
13472 case OpFOrdLessThan:
13473 {
13474 if (expression_type(id: ops[2]).vecsize > 1)
13475 GLSL_BFOP(lessThan);
13476 else
13477 GLSL_BOP(<);
13478 break;
13479 }
13480
13481 case OpULessThanEqual:
13482 case OpSLessThanEqual:
13483 {
13484 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
13485 if (expression_type(id: ops[2]).vecsize > 1)
13486 GLSL_BFOP_CAST(lessThanEqual, type);
13487 else
13488 GLSL_BOP_CAST(<=, type);
13489 break;
13490 }
13491
13492 case OpFOrdLessThanEqual:
13493 {
13494 if (expression_type(id: ops[2]).vecsize > 1)
13495 GLSL_BFOP(lessThanEqual);
13496 else
13497 GLSL_BOP(<=);
13498 break;
13499 }
13500
13501 // Conversion
13502 case OpSConvert:
13503 case OpConvertSToF:
13504 case OpUConvert:
13505 case OpConvertUToF:
13506 {
13507 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
13508 uint32_t result_type = ops[0];
13509 uint32_t id = ops[1];
13510
13511 auto &type = get<SPIRType>(id: result_type);
13512 auto &arg_type = expression_type(id: ops[2]);
13513 auto func = type_to_glsl_constructor(type);
13514
13515 if (arg_type.width < type.width || type_is_floating_point(type))
13516 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type, expected_result_type: type.basetype);
13517 else
13518 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
13519 break;
13520 }
13521
13522 case OpConvertFToU:
13523 case OpConvertFToS:
13524 {
13525 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
13526 uint32_t result_type = ops[0];
13527 uint32_t id = ops[1];
13528 auto &type = get<SPIRType>(id: result_type);
13529 auto expected_type = type;
13530 auto &float_type = expression_type(id: ops[2]);
13531 expected_type.basetype =
13532 opcode == OpConvertFToS ? to_signed_basetype(width: type.width) : to_unsigned_basetype(width: type.width);
13533
13534 auto func = type_to_glsl_constructor(type: expected_type);
13535 emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type: float_type.basetype, expected_result_type: expected_type.basetype);
13536 break;
13537 }
13538
13539 case OpFConvert:
13540 {
13541 uint32_t result_type = ops[0];
13542 uint32_t id = ops[1];
13543
13544 auto func = type_to_glsl_constructor(type: get<SPIRType>(id: result_type));
13545 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str());
13546 break;
13547 }
13548
13549 case OpBitcast:
13550 {
13551 uint32_t result_type = ops[0];
13552 uint32_t id = ops[1];
13553 uint32_t arg = ops[2];
13554
13555 if (!emit_complex_bitcast(result_type, id, op0: arg))
13556 {
13557 auto op = bitcast_glsl_op(out_type: get<SPIRType>(id: result_type), in_type: expression_type(id: arg));
13558 emit_unary_func_op(result_type, result_id: id, op0: arg, op: op.c_str());
13559 }
13560 break;
13561 }
13562
13563 case OpQuantizeToF16:
13564 {
13565 uint32_t result_type = ops[0];
13566 uint32_t id = ops[1];
13567 uint32_t arg = ops[2];
13568
13569 string op;
13570 auto &type = get<SPIRType>(id: result_type);
13571
13572 switch (type.vecsize)
13573 {
13574 case 1:
13575 op = join(ts: "unpackHalf2x16(packHalf2x16(vec2(", ts: to_expression(id: arg), ts: "))).x");
13576 break;
13577 case 2:
13578 op = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: "))");
13579 break;
13580 case 3:
13581 {
13582 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
13583 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zz)).x");
13584 op = join(ts: "vec3(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
13585 break;
13586 }
13587 case 4:
13588 {
13589 auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))");
13590 auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zw))");
13591 op = join(ts: "vec4(", ts&: op0, ts: ", ", ts&: op1, ts: ")");
13592 break;
13593 }
13594 default:
13595 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
13596 }
13597
13598 emit_op(result_type, result_id: id, rhs: op, forwarding: should_forward(id: arg));
13599 inherit_expression_dependencies(dst: id, source: arg);
13600 break;
13601 }
13602
13603 // Derivatives
13604 case OpDPdx:
13605 GLSL_UFOP(dFdx);
13606 if (is_legacy_es())
13607 require_extension_internal(ext: "GL_OES_standard_derivatives");
13608 register_control_dependent_expression(expr: ops[1]);
13609 break;
13610
13611 case OpDPdy:
13612 GLSL_UFOP(dFdy);
13613 if (is_legacy_es())
13614 require_extension_internal(ext: "GL_OES_standard_derivatives");
13615 register_control_dependent_expression(expr: ops[1]);
13616 break;
13617
13618 case OpDPdxFine:
13619 GLSL_UFOP(dFdxFine);
13620 if (options.es)
13621 {
13622 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13623 }
13624 if (options.version < 450)
13625 require_extension_internal(ext: "GL_ARB_derivative_control");
13626 register_control_dependent_expression(expr: ops[1]);
13627 break;
13628
13629 case OpDPdyFine:
13630 GLSL_UFOP(dFdyFine);
13631 if (options.es)
13632 {
13633 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13634 }
13635 if (options.version < 450)
13636 require_extension_internal(ext: "GL_ARB_derivative_control");
13637 register_control_dependent_expression(expr: ops[1]);
13638 break;
13639
13640 case OpDPdxCoarse:
13641 if (options.es)
13642 {
13643 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13644 }
13645 GLSL_UFOP(dFdxCoarse);
13646 if (options.version < 450)
13647 require_extension_internal(ext: "GL_ARB_derivative_control");
13648 register_control_dependent_expression(expr: ops[1]);
13649 break;
13650
13651 case OpDPdyCoarse:
13652 GLSL_UFOP(dFdyCoarse);
13653 if (options.es)
13654 {
13655 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13656 }
13657 if (options.version < 450)
13658 require_extension_internal(ext: "GL_ARB_derivative_control");
13659 register_control_dependent_expression(expr: ops[1]);
13660 break;
13661
13662 case OpFwidth:
13663 GLSL_UFOP(fwidth);
13664 if (is_legacy_es())
13665 require_extension_internal(ext: "GL_OES_standard_derivatives");
13666 register_control_dependent_expression(expr: ops[1]);
13667 break;
13668
13669 case OpFwidthCoarse:
13670 GLSL_UFOP(fwidthCoarse);
13671 if (options.es)
13672 {
13673 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13674 }
13675 if (options.version < 450)
13676 require_extension_internal(ext: "GL_ARB_derivative_control");
13677 register_control_dependent_expression(expr: ops[1]);
13678 break;
13679
13680 case OpFwidthFine:
13681 GLSL_UFOP(fwidthFine);
13682 if (options.es)
13683 {
13684 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
13685 }
13686 if (options.version < 450)
13687 require_extension_internal(ext: "GL_ARB_derivative_control");
13688 register_control_dependent_expression(expr: ops[1]);
13689 break;
13690
13691 // Bitfield
13692 case OpBitFieldInsert:
13693 {
13694 emit_bitfield_insert_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op3: ops[5], op: "bitfieldInsert", offset_count_type: SPIRType::Int);
13695 break;
13696 }
13697
13698 case OpBitFieldSExtract:
13699 {
13700 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: int_type, input_type0: int_type,
13701 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
13702 break;
13703 }
13704
13705 case OpBitFieldUExtract:
13706 {
13707 emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: uint_type, input_type0: uint_type,
13708 input_type1: SPIRType::Int, input_type2: SPIRType::Int);
13709 break;
13710 }
13711
13712 case OpBitReverse:
13713 // BitReverse does not have issues with sign since result type must match input type.
13714 GLSL_UFOP(bitfieldReverse);
13715 break;
13716
13717 case OpBitCount:
13718 {
13719 auto basetype = expression_type(id: ops[2]).basetype;
13720 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "bitCount", input_type: basetype, expected_result_type: int_type);
13721 break;
13722 }
13723
13724 // Atomics
13725 case OpAtomicExchange:
13726 {
13727 uint32_t result_type = ops[0];
13728 uint32_t id = ops[1];
13729 uint32_t ptr = ops[2];
13730 // Ignore semantics for now, probably only relevant to CL.
13731 uint32_t val = ops[5];
13732 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
13733
13734 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: val, op);
13735 break;
13736 }
13737
13738 case OpAtomicCompareExchange:
13739 {
13740 uint32_t result_type = ops[0];
13741 uint32_t id = ops[1];
13742 uint32_t ptr = ops[2];
13743 uint32_t val = ops[6];
13744 uint32_t comp = ops[7];
13745 const char *op = check_atomic_image(id: ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
13746
13747 emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: comp, op2: val, op);
13748 break;
13749 }
13750
13751 case OpAtomicLoad:
13752 {
13753 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
13754 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
13755 auto &type = expression_type(id: ops[2]);
13756 forced_temporaries.insert(x: ops[1]);
13757 bool atomic_image = check_atomic_image(id: ops[2]);
13758 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
13759 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
13760 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
13761 const char *increment = unsigned_type ? "0u" : "0";
13762 emit_op(result_type: ops[0], result_id: ops[1],
13763 rhs: join(ts&: op, ts: "(",
13764 ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
13765 flush_all_atomic_capable_variables();
13766 break;
13767 }
13768
13769 case OpAtomicStore:
13770 {
13771 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
13772 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
13773 uint32_t ptr = ops[0];
13774 // Ignore semantics for now, probably only relevant to CL.
13775 uint32_t val = ops[3];
13776 const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange";
13777 statement(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ptr), ts: ", ", ts: to_expression(id: val), ts: ");");
13778 flush_all_atomic_capable_variables();
13779 break;
13780 }
13781
13782 case OpAtomicIIncrement:
13783 case OpAtomicIDecrement:
13784 {
13785 forced_temporaries.insert(x: ops[1]);
13786 auto &type = expression_type(id: ops[2]);
13787 if (type.storage == StorageClassAtomicCounter)
13788 {
13789 // Legacy GLSL stuff, not sure if this is relevant to support.
13790 if (opcode == OpAtomicIIncrement)
13791 GLSL_UFOP(atomicCounterIncrement);
13792 else
13793 GLSL_UFOP(atomicCounterDecrement);
13794 }
13795 else
13796 {
13797 bool atomic_image = check_atomic_image(id: ops[2]);
13798 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
13799 (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt);
13800 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
13801
13802 const char *increment = nullptr;
13803 if (opcode == OpAtomicIIncrement && unsigned_type)
13804 increment = "1u";
13805 else if (opcode == OpAtomicIIncrement)
13806 increment = "1";
13807 else if (unsigned_type)
13808 increment = "uint(-1)";
13809 else
13810 increment = "-1";
13811
13812 emit_op(result_type: ops[0], result_id: ops[1],
13813 rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false);
13814 }
13815
13816 flush_all_atomic_capable_variables();
13817 break;
13818 }
13819
13820 case OpAtomicIAdd:
13821 case OpAtomicFAddEXT:
13822 {
13823 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
13824 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13825 break;
13826 }
13827
13828 case OpAtomicISub:
13829 {
13830 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd";
13831 forced_temporaries.insert(x: ops[1]);
13832 auto expr = join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", -", ts: to_enclosed_expression(id: ops[5]), ts: ")");
13833 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: ops[2]) && should_forward(id: ops[5]));
13834 flush_all_atomic_capable_variables();
13835 break;
13836 }
13837
13838 case OpAtomicSMin:
13839 case OpAtomicUMin:
13840 {
13841 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMin" : "atomicMin";
13842 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13843 break;
13844 }
13845
13846 case OpAtomicSMax:
13847 case OpAtomicUMax:
13848 {
13849 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMax" : "atomicMax";
13850 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13851 break;
13852 }
13853
13854 case OpAtomicAnd:
13855 {
13856 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAnd" : "atomicAnd";
13857 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13858 break;
13859 }
13860
13861 case OpAtomicOr:
13862 {
13863 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicOr" : "atomicOr";
13864 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13865 break;
13866 }
13867
13868 case OpAtomicXor:
13869 {
13870 const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicXor" : "atomicXor";
13871 emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op);
13872 break;
13873 }
13874
13875 // Geometry shaders
13876 case OpEmitVertex:
13877 statement(ts: "EmitVertex();");
13878 break;
13879
13880 case OpEndPrimitive:
13881 statement(ts: "EndPrimitive();");
13882 break;
13883
13884 case OpEmitStreamVertex:
13885 {
13886 if (options.es)
13887 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
13888 else if (!options.es && options.version < 400)
13889 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
13890
13891 auto stream_expr = to_expression(id: ops[0]);
13892 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
13893 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
13894 statement(ts: "EmitStreamVertex(", ts&: stream_expr, ts: ");");
13895 break;
13896 }
13897
13898 case OpEndStreamPrimitive:
13899 {
13900 if (options.es)
13901 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
13902 else if (!options.es && options.version < 400)
13903 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
13904
13905 auto stream_expr = to_expression(id: ops[0]);
13906 if (expression_type(id: ops[0]).basetype != SPIRType::Int)
13907 stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")");
13908 statement(ts: "EndStreamPrimitive(", ts&: stream_expr, ts: ");");
13909 break;
13910 }
13911
13912 // Textures
13913 case OpImageSampleExplicitLod:
13914 case OpImageSampleProjExplicitLod:
13915 case OpImageSampleDrefExplicitLod:
13916 case OpImageSampleProjDrefExplicitLod:
13917 case OpImageSampleImplicitLod:
13918 case OpImageSampleProjImplicitLod:
13919 case OpImageSampleDrefImplicitLod:
13920 case OpImageSampleProjDrefImplicitLod:
13921 case OpImageFetch:
13922 case OpImageGather:
13923 case OpImageDrefGather:
13924 // Gets a bit hairy, so move this to a separate instruction.
13925 emit_texture_op(i: instruction, sparse: false);
13926 break;
13927
13928 case OpImageSparseSampleExplicitLod:
13929 case OpImageSparseSampleProjExplicitLod:
13930 case OpImageSparseSampleDrefExplicitLod:
13931 case OpImageSparseSampleProjDrefExplicitLod:
13932 case OpImageSparseSampleImplicitLod:
13933 case OpImageSparseSampleProjImplicitLod:
13934 case OpImageSparseSampleDrefImplicitLod:
13935 case OpImageSparseSampleProjDrefImplicitLod:
13936 case OpImageSparseFetch:
13937 case OpImageSparseGather:
13938 case OpImageSparseDrefGather:
13939 // Gets a bit hairy, so move this to a separate instruction.
13940 emit_texture_op(i: instruction, sparse: true);
13941 break;
13942
13943 case OpImageSparseTexelsResident:
13944 if (options.es)
13945 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
13946 require_extension_internal(ext: "GL_ARB_sparse_texture2");
13947 emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "sparseTexelsResidentARB", input_type: int_type, expected_result_type: SPIRType::Boolean);
13948 break;
13949
13950 case OpImage:
13951 {
13952 uint32_t result_type = ops[0];
13953 uint32_t id = ops[1];
13954
13955 // Suppress usage tracking.
13956 auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[2]), forwarding: true, suppress_usage_tracking: true);
13957
13958 // When using the image, we need to know which variable it is actually loaded from.
13959 auto *var = maybe_get_backing_variable(chain: ops[2]);
13960 e.loaded_from = var ? var->self : ID(0);
13961 break;
13962 }
13963
13964 case OpImageQueryLod:
13965 {
13966 const char *op = nullptr;
13967 if (!options.es && options.version < 400)
13968 {
13969 require_extension_internal(ext: "GL_ARB_texture_query_lod");
13970 // For some reason, the ARB spec is all-caps.
13971 op = "textureQueryLOD";
13972 }
13973 else if (options.es)
13974 {
13975 if (options.version < 300)
13976 SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES");
13977 require_extension_internal(ext: "GL_EXT_texture_query_lod");
13978 op = "textureQueryLOD";
13979 }
13980 else
13981 op = "textureQueryLod";
13982
13983 auto sampler_expr = to_expression(id: ops[2]);
13984 if (has_decoration(id: ops[2], decoration: DecorationNonUniform))
13985 {
13986 if (maybe_get_backing_variable(chain: ops[2]))
13987 convert_non_uniform_expression(expr&: sampler_expr, ptr_id: ops[2]);
13988 else if (*backend.nonuniform_qualifier != '\0')
13989 sampler_expr = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: sampler_expr, ts: ")");
13990 }
13991
13992 bool forward = should_forward(id: ops[3]);
13993 emit_op(result_type: ops[0], result_id: ops[1],
13994 rhs: join(ts&: op, ts: "(", ts&: sampler_expr, ts: ", ", ts: to_unpacked_expression(id: ops[3]), ts: ")"),
13995 forwarding: forward);
13996 inherit_expression_dependencies(dst: ops[1], source: ops[2]);
13997 inherit_expression_dependencies(dst: ops[1], source: ops[3]);
13998 register_control_dependent_expression(expr: ops[1]);
13999 break;
14000 }
14001
14002 case OpImageQueryLevels:
14003 {
14004 uint32_t result_type = ops[0];
14005 uint32_t id = ops[1];
14006
14007 if (!options.es && options.version < 430)
14008 require_extension_internal(ext: "GL_ARB_texture_query_levels");
14009 if (options.es)
14010 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
14011
14012 auto expr = join(ts: "textureQueryLevels(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14013 auto &restype = get<SPIRType>(id: ops[0]);
14014 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14015 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14016 break;
14017 }
14018
14019 case OpImageQuerySamples:
14020 {
14021 auto &type = expression_type(id: ops[2]);
14022 uint32_t result_type = ops[0];
14023 uint32_t id = ops[1];
14024
14025 if (options.es)
14026 SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile.");
14027 else if (options.version < 450)
14028 require_extension_internal(ext: "GL_ARB_texture_query_samples");
14029
14030 string expr;
14031 if (type.image.sampled == 2)
14032 expr = join(ts: "imageSamples(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14033 else
14034 expr = join(ts: "textureSamples(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14035
14036 auto &restype = get<SPIRType>(id: ops[0]);
14037 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14038 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14039 break;
14040 }
14041
14042 case OpSampledImage:
14043 {
14044 uint32_t result_type = ops[0];
14045 uint32_t id = ops[1];
14046 emit_sampled_image_op(result_type, result_id: id, image_id: ops[2], samp_id: ops[3]);
14047 inherit_expression_dependencies(dst: id, source: ops[2]);
14048 inherit_expression_dependencies(dst: id, source: ops[3]);
14049 break;
14050 }
14051
14052 case OpImageQuerySizeLod:
14053 {
14054 uint32_t result_type = ops[0];
14055 uint32_t id = ops[1];
14056 uint32_t img = ops[2];
14057 auto &type = expression_type(id: img);
14058 auto &imgtype = get<SPIRType>(id: type.self);
14059
14060 std::string fname = "textureSize";
14061 if (is_legacy_desktop())
14062 {
14063 fname = legacy_tex_op(op: fname, imgtype, tex: img);
14064 }
14065 else if (is_legacy_es())
14066 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
14067
14068 auto expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: img), ts: ", ",
14069 ts: bitcast_expression(target_type: SPIRType::Int, arg: ops[3]), ts: ")");
14070
14071 // ES needs to emulate 1D images as 2D.
14072 if (type.image.dim == Dim1D && options.es)
14073 expr = join(ts&: expr, ts: ".x");
14074
14075 auto &restype = get<SPIRType>(id: ops[0]);
14076 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14077 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14078 break;
14079 }
14080
14081 // Image load/store
14082 case OpImageRead:
14083 case OpImageSparseRead:
14084 {
14085 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
14086 // not adding the proper qualifiers.
14087 // If it turns out we need to read the image after all, remove the qualifier and recompile.
14088 auto *var = maybe_get_backing_variable(chain: ops[2]);
14089 if (var)
14090 {
14091 auto &flags = get_decoration_bitset(id: var->self);
14092 if (flags.get(bit: DecorationNonReadable))
14093 {
14094 unset_decoration(id: var->self, decoration: DecorationNonReadable);
14095 force_recompile();
14096 }
14097 }
14098
14099 uint32_t result_type = ops[0];
14100 uint32_t id = ops[1];
14101
14102 bool pure;
14103 string imgexpr;
14104 auto &type = expression_type(id: ops[2]);
14105
14106 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
14107 {
14108 if (type.image.ms)
14109 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
14110
14111 auto itr =
14112 find_if(first: begin(cont&: pls_inputs), last: end(cont&: pls_inputs), pred: [var](const PlsRemap &pls) { return pls.id == var->self; });
14113
14114 if (itr == end(cont&: pls_inputs))
14115 {
14116 // For non-PLS inputs, we rely on subpass type remapping information to get it right
14117 // since ImageRead always returns 4-component vectors and the backing type is opaque.
14118 if (!var->remapped_components)
14119 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
14120 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: var->remapped_components, expr: to_expression(id: ops[2]));
14121 }
14122 else
14123 {
14124 // PLS input could have different number of components than what the SPIR expects, swizzle to
14125 // the appropriate vector size.
14126 uint32_t components = pls_format_to_components(format: itr->format);
14127 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: components, expr: to_expression(id: ops[2]));
14128 }
14129 pure = true;
14130 }
14131 else if (type.image.dim == DimSubpassData)
14132 {
14133 if (var && subpass_input_is_framebuffer_fetch(id: var->self))
14134 {
14135 imgexpr = to_expression(id: var->self);
14136 }
14137 else if (options.vulkan_semantics)
14138 {
14139 // With Vulkan semantics, use the proper Vulkan GLSL construct.
14140 if (type.image.ms)
14141 {
14142 uint32_t operands = ops[4];
14143 if (operands != ImageOperandsSampleMask || length != 6)
14144 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14145 "operand mask was used.");
14146
14147 uint32_t samples = ops[5];
14148 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts: to_expression(id: samples), ts: ")");
14149 }
14150 else
14151 imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14152 }
14153 else
14154 {
14155 if (type.image.ms)
14156 {
14157 uint32_t operands = ops[4];
14158 if (operands != ImageOperandsSampleMask || length != 6)
14159 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14160 "operand mask was used.");
14161
14162 uint32_t samples = ops[5];
14163 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), ",
14164 ts: to_expression(id: samples), ts: ")");
14165 }
14166 else
14167 {
14168 // Implement subpass loads via texture barrier style sampling.
14169 imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), 0)");
14170 }
14171 }
14172 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
14173 pure = true;
14174 }
14175 else
14176 {
14177 bool sparse = opcode == OpImageSparseRead;
14178 uint32_t sparse_code_id = 0;
14179 uint32_t sparse_texel_id = 0;
14180 if (sparse)
14181 emit_sparse_feedback_temporaries(result_type_id: ops[0], id: ops[1], feedback_id&: sparse_code_id, texel_id&: sparse_texel_id);
14182
14183 // imageLoad only accepts int coords, not uint.
14184 auto coord_expr = to_expression(id: ops[3]);
14185 auto target_coord_type = expression_type(id: ops[3]);
14186 target_coord_type.basetype = SPIRType::Int;
14187 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
14188
14189 // ES needs to emulate 1D images as 2D.
14190 if (type.image.dim == Dim1D && options.es)
14191 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
14192
14193 // Plain image load/store.
14194 if (sparse)
14195 {
14196 if (type.image.ms)
14197 {
14198 uint32_t operands = ops[4];
14199 if (operands != ImageOperandsSampleMask || length != 6)
14200 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14201 "operand mask was used.");
14202
14203 uint32_t samples = ops[5];
14204 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
14205 ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
14206 }
14207 else
14208 {
14209 statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ",
14210 ts&: coord_expr, ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");");
14211 }
14212 imgexpr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ",
14213 ts: to_expression(id: sparse_texel_id), ts: ")");
14214 }
14215 else
14216 {
14217 if (type.image.ms)
14218 {
14219 uint32_t operands = ops[4];
14220 if (operands != ImageOperandsSampleMask || length != 6)
14221 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
14222 "operand mask was used.");
14223
14224 uint32_t samples = ops[5];
14225 imgexpr =
14226 join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ")");
14227 }
14228 else
14229 imgexpr = join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ")");
14230 }
14231
14232 if (!sparse)
14233 imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr);
14234 pure = false;
14235 }
14236
14237 if (var)
14238 {
14239 bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries);
14240 auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: forward);
14241
14242 // We only need to track dependencies if we're reading from image load/store.
14243 if (!pure)
14244 {
14245 e.loaded_from = var->self;
14246 if (forward)
14247 var->dependees.push_back(t: id);
14248 }
14249 }
14250 else
14251 emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: false);
14252
14253 inherit_expression_dependencies(dst: id, source: ops[2]);
14254 if (type.image.ms)
14255 inherit_expression_dependencies(dst: id, source: ops[5]);
14256 break;
14257 }
14258
14259 case OpImageTexelPointer:
14260 {
14261 uint32_t result_type = ops[0];
14262 uint32_t id = ops[1];
14263
14264 auto coord_expr = to_expression(id: ops[3]);
14265 auto target_coord_type = expression_type(id: ops[3]);
14266 target_coord_type.basetype = SPIRType::Int;
14267 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr);
14268
14269 auto expr = join(ts: to_expression(id: ops[2]), ts: ", ", ts&: coord_expr);
14270 auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true);
14271
14272 // When using the pointer, we need to know which variable it is actually loaded from.
14273 auto *var = maybe_get_backing_variable(chain: ops[2]);
14274 e.loaded_from = var ? var->self : ID(0);
14275 inherit_expression_dependencies(dst: id, source: ops[3]);
14276 break;
14277 }
14278
14279 case OpImageWrite:
14280 {
14281 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
14282 // not adding the proper qualifiers.
14283 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
14284 auto *var = maybe_get_backing_variable(chain: ops[0]);
14285 if (var)
14286 {
14287 if (has_decoration(id: var->self, decoration: DecorationNonWritable))
14288 {
14289 unset_decoration(id: var->self, decoration: DecorationNonWritable);
14290 force_recompile();
14291 }
14292 }
14293
14294 auto &type = expression_type(id: ops[0]);
14295 auto &value_type = expression_type(id: ops[2]);
14296 auto store_type = value_type;
14297 store_type.vecsize = 4;
14298
14299 // imageStore only accepts int coords, not uint.
14300 auto coord_expr = to_expression(id: ops[1]);
14301 auto target_coord_type = expression_type(id: ops[1]);
14302 target_coord_type.basetype = SPIRType::Int;
14303 coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[1]).basetype, expr: coord_expr);
14304
14305 // ES needs to emulate 1D images as 2D.
14306 if (type.image.dim == Dim1D && options.es)
14307 coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)");
14308
14309 if (type.image.ms)
14310 {
14311 uint32_t operands = ops[3];
14312 if (operands != ImageOperandsSampleMask || length != 5)
14313 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
14314 uint32_t samples = ops[4];
14315 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ",
14316 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
14317 }
14318 else
14319 statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ",
14320 ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");");
14321
14322 if (var && variable_storage_is_aliased(var: *var))
14323 flush_all_aliased_variables();
14324 break;
14325 }
14326
14327 case OpImageQuerySize:
14328 {
14329 auto &type = expression_type(id: ops[2]);
14330 uint32_t result_type = ops[0];
14331 uint32_t id = ops[1];
14332
14333 if (type.basetype == SPIRType::Image)
14334 {
14335 string expr;
14336 if (type.image.sampled == 2)
14337 {
14338 if (!options.es && options.version < 430)
14339 require_extension_internal(ext: "GL_ARB_shader_image_size");
14340 else if (options.es && options.version < 310)
14341 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
14342
14343 // The size of an image is always constant.
14344 expr = join(ts: "imageSize(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")");
14345 }
14346 else
14347 {
14348 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
14349 std::string fname = "textureSize";
14350 if (is_legacy())
14351 {
14352 auto &imgtype = get<SPIRType>(id: type.self);
14353 fname = legacy_tex_op(op: fname, imgtype, tex: ops[2]);
14354 }
14355 expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")");
14356 }
14357
14358 auto &restype = get<SPIRType>(id: ops[0]);
14359 expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr);
14360 emit_op(result_type, result_id: id, rhs: expr, forwarding: true);
14361 }
14362 else
14363 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
14364 break;
14365 }
14366
14367 case OpImageSampleWeightedQCOM:
14368 case OpImageBoxFilterQCOM:
14369 case OpImageBlockMatchSSDQCOM:
14370 case OpImageBlockMatchSADQCOM:
14371 {
14372 require_extension_internal(ext: "GL_QCOM_image_processing");
14373 uint32_t result_type_id = ops[0];
14374 uint32_t id = ops[1];
14375 string expr;
14376 switch (opcode)
14377 {
14378 case OpImageSampleWeightedQCOM:
14379 expr = "textureWeightedQCOM";
14380 break;
14381 case OpImageBoxFilterQCOM:
14382 expr = "textureBoxFilterQCOM";
14383 break;
14384 case OpImageBlockMatchSSDQCOM:
14385 expr = "textureBlockMatchSSDQCOM";
14386 break;
14387 case OpImageBlockMatchSADQCOM:
14388 expr = "textureBlockMatchSADQCOM";
14389 break;
14390 default:
14391 SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
14392 }
14393 expr += "(";
14394
14395 bool forward = false;
14396 expr += to_expression(id: ops[2]);
14397 expr += ", " + to_expression(id: ops[3]);
14398
14399 switch (opcode)
14400 {
14401 case OpImageSampleWeightedQCOM:
14402 expr += ", " + to_non_uniform_aware_expression(id: ops[4]);
14403 break;
14404 case OpImageBoxFilterQCOM:
14405 expr += ", " + to_expression(id: ops[4]);
14406 break;
14407 case OpImageBlockMatchSSDQCOM:
14408 case OpImageBlockMatchSADQCOM:
14409 expr += ", " + to_non_uniform_aware_expression(id: ops[4]);
14410 expr += ", " + to_expression(id: ops[5]);
14411 expr += ", " + to_expression(id: ops[6]);
14412 break;
14413 default:
14414 SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
14415 }
14416
14417 expr += ")";
14418 emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward);
14419
14420 inherit_expression_dependencies(dst: id, source: ops[3]);
14421 if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM)
14422 inherit_expression_dependencies(dst: id, source: ops[5]);
14423
14424 break;
14425 }
14426
14427 // Compute
14428 case OpControlBarrier:
14429 case OpMemoryBarrier:
14430 {
14431 uint32_t execution_scope = 0;
14432 uint32_t memory;
14433 uint32_t semantics;
14434
14435 if (opcode == OpMemoryBarrier)
14436 {
14437 memory = evaluate_constant_u32(id: ops[0]);
14438 semantics = evaluate_constant_u32(id: ops[1]);
14439 }
14440 else
14441 {
14442 execution_scope = evaluate_constant_u32(id: ops[0]);
14443 memory = evaluate_constant_u32(id: ops[1]);
14444 semantics = evaluate_constant_u32(id: ops[2]);
14445 }
14446
14447 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
14448 {
14449 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
14450 if (opcode != OpControlBarrier)
14451 {
14452 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMemBarrier);
14453 }
14454 else
14455 {
14456 request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBarrier);
14457 }
14458 }
14459
14460 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
14461 {
14462 // Control shaders only have barriers, and it implies memory barriers.
14463 if (opcode == OpControlBarrier)
14464 statement(ts: "barrier();");
14465 break;
14466 }
14467
14468 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
14469 semantics = mask_relevant_memory_semantics(semantics);
14470
14471 if (opcode == OpMemoryBarrier)
14472 {
14473 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
14474 // does what we need, so we avoid redundant barriers.
14475 const Instruction *next = get_next_instruction_in_block(instr: instruction);
14476 if (next && next->op == OpControlBarrier)
14477 {
14478 auto *next_ops = stream(instr: *next);
14479 uint32_t next_memory = evaluate_constant_u32(id: next_ops[1]);
14480 uint32_t next_semantics = evaluate_constant_u32(id: next_ops[2]);
14481 next_semantics = mask_relevant_memory_semantics(semantics: next_semantics);
14482
14483 bool memory_scope_covered = false;
14484 if (next_memory == memory)
14485 memory_scope_covered = true;
14486 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
14487 {
14488 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
14489 // scope does not have to match.
14490 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
14491 (memory == ScopeDevice || memory == ScopeWorkgroup))
14492 {
14493 memory_scope_covered = true;
14494 }
14495 }
14496 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
14497 {
14498 // The control barrier has device scope, but the memory barrier just has workgroup scope.
14499 memory_scope_covered = true;
14500 }
14501
14502 // If we have the same memory scope, and all memory types are covered, we're good.
14503 if (memory_scope_covered && (semantics & next_semantics) == semantics)
14504 break;
14505 }
14506 }
14507
14508 // We are synchronizing some memory or syncing execution,
14509 // so we cannot forward any loads beyond the memory barrier.
14510 if (semantics || opcode == OpControlBarrier)
14511 {
14512 assert(current_emitting_block);
14513 flush_control_dependent_expressions(block: current_emitting_block->self);
14514 flush_all_active_variables();
14515 }
14516
14517 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
14518 {
14519 if (semantics == MemorySemanticsWorkgroupMemoryMask)
14520 {
14521 // OpControlBarrier implies a memory barrier for shared memory as well.
14522 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
14523 if (!implies_shared_barrier)
14524 statement(ts: "memoryBarrierShared();");
14525 }
14526 else if (semantics != 0)
14527 statement(ts: "groupMemoryBarrier();");
14528 }
14529 else if (memory == ScopeSubgroup)
14530 {
14531 const uint32_t all_barriers =
14532 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
14533
14534 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
14535 {
14536 // These are not relevant for GLSL, but assume it means memoryBarrier().
14537 // memoryBarrier() does everything, so no need to test anything else.
14538 statement(ts: "subgroupMemoryBarrier();");
14539 }
14540 else if ((semantics & all_barriers) == all_barriers)
14541 {
14542 // Short-hand instead of emitting 3 barriers.
14543 statement(ts: "subgroupMemoryBarrier();");
14544 }
14545 else
14546 {
14547 // Pick out individual barriers.
14548 if (semantics & MemorySemanticsWorkgroupMemoryMask)
14549 statement(ts: "subgroupMemoryBarrierShared();");
14550 if (semantics & MemorySemanticsUniformMemoryMask)
14551 statement(ts: "subgroupMemoryBarrierBuffer();");
14552 if (semantics & MemorySemanticsImageMemoryMask)
14553 statement(ts: "subgroupMemoryBarrierImage();");
14554 }
14555 }
14556 else
14557 {
14558 const uint32_t all_barriers =
14559 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
14560
14561 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
14562 {
14563 // These are not relevant for GLSL, but assume it means memoryBarrier().
14564 // memoryBarrier() does everything, so no need to test anything else.
14565 statement(ts: "memoryBarrier();");
14566 }
14567 else if ((semantics & all_barriers) == all_barriers)
14568 {
14569 // Short-hand instead of emitting 4 barriers.
14570 statement(ts: "memoryBarrier();");
14571 }
14572 else
14573 {
14574 // Pick out individual barriers.
14575 if (semantics & MemorySemanticsWorkgroupMemoryMask)
14576 statement(ts: "memoryBarrierShared();");
14577 if (semantics & MemorySemanticsUniformMemoryMask)
14578 statement(ts: "memoryBarrierBuffer();");
14579 if (semantics & MemorySemanticsImageMemoryMask)
14580 statement(ts: "memoryBarrierImage();");
14581 }
14582 }
14583
14584 if (opcode == OpControlBarrier)
14585 {
14586 if (execution_scope == ScopeSubgroup)
14587 statement(ts: "subgroupBarrier();");
14588 else
14589 statement(ts: "barrier();");
14590 }
14591 break;
14592 }
14593
14594 case OpExtInst:
14595 {
14596 uint32_t extension_set = ops[2];
14597 auto ext = get<SPIRExtension>(id: extension_set).ext;
14598
14599 if (ext == SPIRExtension::GLSL)
14600 {
14601 emit_glsl_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length: length - 4);
14602 }
14603 else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
14604 {
14605 emit_spv_amd_shader_ballot_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14606 }
14607 else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
14608 {
14609 emit_spv_amd_shader_explicit_vertex_parameter_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14610 }
14611 else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
14612 {
14613 emit_spv_amd_shader_trinary_minmax_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14614 }
14615 else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
14616 {
14617 emit_spv_amd_gcn_shader_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4);
14618 }
14619 else if (ext == SPIRExtension::SPV_debug_info ||
14620 ext == SPIRExtension::NonSemanticShaderDebugInfo ||
14621 ext == SPIRExtension::NonSemanticGeneric)
14622 {
14623 break; // Ignore SPIR-V debug information extended instructions.
14624 }
14625 else if (ext == SPIRExtension::NonSemanticDebugPrintf)
14626 {
14627 // Operation 1 is printf.
14628 if (ops[3] == 1)
14629 {
14630 if (!options.vulkan_semantics)
14631 SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
14632 require_extension_internal(ext: "GL_EXT_debug_printf");
14633 auto &format_string = get<SPIRString>(id: ops[4]).str;
14634 string expr = join(ts: "debugPrintfEXT(\"", ts&: format_string, ts: "\"");
14635 for (uint32_t i = 5; i < length; i++)
14636 {
14637 expr += ", ";
14638 expr += to_expression(id: ops[i]);
14639 }
14640 statement(ts&: expr, ts: ");");
14641 }
14642 }
14643 else
14644 {
14645 statement(ts: "// unimplemented ext op ", ts: instruction.op);
14646 break;
14647 }
14648
14649 break;
14650 }
14651
14652 // Legacy sub-group stuff ...
14653 case OpSubgroupBallotKHR:
14654 {
14655 uint32_t result_type = ops[0];
14656 uint32_t id = ops[1];
14657 string expr;
14658 expr = join(ts: "uvec4(unpackUint2x32(ballotARB(" + to_expression(id: ops[2]) + ")), 0u, 0u)");
14659 emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]));
14660
14661 require_extension_internal(ext: "GL_ARB_shader_ballot");
14662 inherit_expression_dependencies(dst: id, source: ops[2]);
14663 register_control_dependent_expression(expr: ops[1]);
14664 break;
14665 }
14666
14667 case OpSubgroupFirstInvocationKHR:
14668 {
14669 uint32_t result_type = ops[0];
14670 uint32_t id = ops[1];
14671 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "readFirstInvocationARB");
14672
14673 require_extension_internal(ext: "GL_ARB_shader_ballot");
14674 register_control_dependent_expression(expr: ops[1]);
14675 break;
14676 }
14677
14678 case OpSubgroupReadInvocationKHR:
14679 {
14680 uint32_t result_type = ops[0];
14681 uint32_t id = ops[1];
14682 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "readInvocationARB");
14683
14684 require_extension_internal(ext: "GL_ARB_shader_ballot");
14685 register_control_dependent_expression(expr: ops[1]);
14686 break;
14687 }
14688
14689 case OpSubgroupAllKHR:
14690 {
14691 uint32_t result_type = ops[0];
14692 uint32_t id = ops[1];
14693 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsARB");
14694
14695 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14696 register_control_dependent_expression(expr: ops[1]);
14697 break;
14698 }
14699
14700 case OpSubgroupAnyKHR:
14701 {
14702 uint32_t result_type = ops[0];
14703 uint32_t id = ops[1];
14704 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "anyInvocationARB");
14705
14706 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14707 register_control_dependent_expression(expr: ops[1]);
14708 break;
14709 }
14710
14711 case OpSubgroupAllEqualKHR:
14712 {
14713 uint32_t result_type = ops[0];
14714 uint32_t id = ops[1];
14715 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsEqualARB");
14716
14717 require_extension_internal(ext: "GL_ARB_shader_group_vote");
14718 register_control_dependent_expression(expr: ops[1]);
14719 break;
14720 }
14721
14722 case OpGroupIAddNonUniformAMD:
14723 case OpGroupFAddNonUniformAMD:
14724 {
14725 uint32_t result_type = ops[0];
14726 uint32_t id = ops[1];
14727 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "addInvocationsNonUniformAMD");
14728
14729 require_extension_internal(ext: "GL_AMD_shader_ballot");
14730 register_control_dependent_expression(expr: ops[1]);
14731 break;
14732 }
14733
14734 case OpGroupFMinNonUniformAMD:
14735 case OpGroupUMinNonUniformAMD:
14736 case OpGroupSMinNonUniformAMD:
14737 {
14738 uint32_t result_type = ops[0];
14739 uint32_t id = ops[1];
14740 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "minInvocationsNonUniformAMD");
14741
14742 require_extension_internal(ext: "GL_AMD_shader_ballot");
14743 register_control_dependent_expression(expr: ops[1]);
14744 break;
14745 }
14746
14747 case OpGroupFMaxNonUniformAMD:
14748 case OpGroupUMaxNonUniformAMD:
14749 case OpGroupSMaxNonUniformAMD:
14750 {
14751 uint32_t result_type = ops[0];
14752 uint32_t id = ops[1];
14753 emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "maxInvocationsNonUniformAMD");
14754
14755 require_extension_internal(ext: "GL_AMD_shader_ballot");
14756 register_control_dependent_expression(expr: ops[1]);
14757 break;
14758 }
14759
14760 case OpFragmentMaskFetchAMD:
14761 {
14762 auto &type = expression_type(id: ops[2]);
14763 uint32_t result_type = ops[0];
14764 uint32_t id = ops[1];
14765
14766 if (type.image.dim == spv::DimSubpassData)
14767 {
14768 emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "fragmentMaskFetchAMD");
14769 }
14770 else
14771 {
14772 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "fragmentMaskFetchAMD");
14773 }
14774
14775 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
14776 break;
14777 }
14778
14779 case OpFragmentFetchAMD:
14780 {
14781 auto &type = expression_type(id: ops[2]);
14782 uint32_t result_type = ops[0];
14783 uint32_t id = ops[1];
14784
14785 if (type.image.dim == spv::DimSubpassData)
14786 {
14787 emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[4], op: "fragmentFetchAMD");
14788 }
14789 else
14790 {
14791 emit_trinary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op2: ops[4], op: "fragmentFetchAMD");
14792 }
14793
14794 require_extension_internal(ext: "GL_AMD_shader_fragment_mask");
14795 break;
14796 }
14797
14798 // Vulkan 1.1 sub-group stuff ...
14799 case OpGroupNonUniformElect:
14800 case OpGroupNonUniformBroadcast:
14801 case OpGroupNonUniformBroadcastFirst:
14802 case OpGroupNonUniformBallot:
14803 case OpGroupNonUniformInverseBallot:
14804 case OpGroupNonUniformBallotBitExtract:
14805 case OpGroupNonUniformBallotBitCount:
14806 case OpGroupNonUniformBallotFindLSB:
14807 case OpGroupNonUniformBallotFindMSB:
14808 case OpGroupNonUniformShuffle:
14809 case OpGroupNonUniformShuffleXor:
14810 case OpGroupNonUniformShuffleUp:
14811 case OpGroupNonUniformShuffleDown:
14812 case OpGroupNonUniformAll:
14813 case OpGroupNonUniformAny:
14814 case OpGroupNonUniformAllEqual:
14815 case OpGroupNonUniformFAdd:
14816 case OpGroupNonUniformIAdd:
14817 case OpGroupNonUniformFMul:
14818 case OpGroupNonUniformIMul:
14819 case OpGroupNonUniformFMin:
14820 case OpGroupNonUniformFMax:
14821 case OpGroupNonUniformSMin:
14822 case OpGroupNonUniformSMax:
14823 case OpGroupNonUniformUMin:
14824 case OpGroupNonUniformUMax:
14825 case OpGroupNonUniformBitwiseAnd:
14826 case OpGroupNonUniformBitwiseOr:
14827 case OpGroupNonUniformBitwiseXor:
14828 case OpGroupNonUniformLogicalAnd:
14829 case OpGroupNonUniformLogicalOr:
14830 case OpGroupNonUniformLogicalXor:
14831 case OpGroupNonUniformQuadSwap:
14832 case OpGroupNonUniformQuadBroadcast:
14833 emit_subgroup_op(i: instruction);
14834 break;
14835
14836 case OpFUnordEqual:
14837 case OpFUnordLessThan:
14838 case OpFUnordGreaterThan:
14839 case OpFUnordLessThanEqual:
14840 case OpFUnordGreaterThanEqual:
14841 {
14842 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
14843 // but glslang always emits ordered floating point compares for GLSL.
14844 // To get unordered compares, we can test the opposite thing and invert the result.
14845 // This way, we force true when there is any NaN present.
14846 uint32_t op0 = ops[2];
14847 uint32_t op1 = ops[3];
14848
14849 string expr;
14850 if (expression_type(id: op0).vecsize > 1)
14851 {
14852 const char *comp_op = nullptr;
14853 switch (opcode)
14854 {
14855 case OpFUnordEqual:
14856 comp_op = "notEqual";
14857 break;
14858
14859 case OpFUnordLessThan:
14860 comp_op = "greaterThanEqual";
14861 break;
14862
14863 case OpFUnordLessThanEqual:
14864 comp_op = "greaterThan";
14865 break;
14866
14867 case OpFUnordGreaterThan:
14868 comp_op = "lessThanEqual";
14869 break;
14870
14871 case OpFUnordGreaterThanEqual:
14872 comp_op = "lessThan";
14873 break;
14874
14875 default:
14876 assert(0);
14877 break;
14878 }
14879
14880 expr = join(ts: "not(", ts&: comp_op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: "))");
14881 }
14882 else
14883 {
14884 const char *comp_op = nullptr;
14885 switch (opcode)
14886 {
14887 case OpFUnordEqual:
14888 comp_op = " != ";
14889 break;
14890
14891 case OpFUnordLessThan:
14892 comp_op = " >= ";
14893 break;
14894
14895 case OpFUnordLessThanEqual:
14896 comp_op = " > ";
14897 break;
14898
14899 case OpFUnordGreaterThan:
14900 comp_op = " <= ";
14901 break;
14902
14903 case OpFUnordGreaterThanEqual:
14904 comp_op = " < ";
14905 break;
14906
14907 default:
14908 assert(0);
14909 break;
14910 }
14911
14912 expr = join(ts: "!(", ts: to_enclosed_unpacked_expression(id: op0), ts&: comp_op, ts: to_enclosed_unpacked_expression(id: op1), ts: ")");
14913 }
14914
14915 emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1));
14916 inherit_expression_dependencies(dst: ops[1], source: op0);
14917 inherit_expression_dependencies(dst: ops[1], source: op1);
14918 break;
14919 }
14920
14921 case OpReportIntersectionKHR:
14922 // NV is same opcode.
14923 forced_temporaries.insert(x: ops[1]);
14924 if (ray_tracing_is_khr)
14925 GLSL_BFOP(reportIntersectionEXT);
14926 else
14927 GLSL_BFOP(reportIntersectionNV);
14928 flush_control_dependent_expressions(block: current_emitting_block->self);
14929 break;
14930 case OpIgnoreIntersectionNV:
14931 // KHR variant is a terminator.
14932 statement(ts: "ignoreIntersectionNV();");
14933 flush_control_dependent_expressions(block: current_emitting_block->self);
14934 break;
14935 case OpTerminateRayNV:
14936 // KHR variant is a terminator.
14937 statement(ts: "terminateRayNV();");
14938 flush_control_dependent_expressions(block: current_emitting_block->self);
14939 break;
14940 case OpTraceNV:
14941 statement(ts: "traceNV(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
14942 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
14943 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
14944 ts: to_expression(id: ops[9]), ts: ", ", ts: to_expression(id: ops[10]), ts: ");");
14945 flush_control_dependent_expressions(block: current_emitting_block->self);
14946 break;
14947 case OpTraceRayKHR:
14948 if (!has_decoration(id: ops[10], decoration: DecorationLocation))
14949 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
14950 statement(ts: "traceRayEXT(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ",
14951 ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
14952 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ",
14953 ts: to_expression(id: ops[9]), ts: ", ", ts: get_decoration(id: ops[10], decoration: DecorationLocation), ts: ");");
14954 flush_control_dependent_expressions(block: current_emitting_block->self);
14955 break;
14956 case OpExecuteCallableNV:
14957 statement(ts: "executeCallableNV(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
14958 flush_control_dependent_expressions(block: current_emitting_block->self);
14959 break;
14960 case OpExecuteCallableKHR:
14961 if (!has_decoration(id: ops[1], decoration: DecorationLocation))
14962 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
14963 statement(ts: "executeCallableEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: get_decoration(id: ops[1], decoration: DecorationLocation), ts: ");");
14964 flush_control_dependent_expressions(block: current_emitting_block->self);
14965 break;
14966
14967 // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
14968 case OpRayQueryInitializeKHR:
14969 flush_variable_declaration(id: ops[0]);
14970 statement(ts: "rayQueryInitializeEXT(",
14971 ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ",
14972 ts: to_expression(id: ops[2]), ts: ", ", ts: to_expression(id: ops[3]), ts: ", ",
14973 ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ",
14974 ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ");");
14975 break;
14976 case OpRayQueryProceedKHR:
14977 flush_variable_declaration(id: ops[0]);
14978 emit_op(result_type: ops[0], result_id: ops[1], rhs: join(ts: "rayQueryProceedEXT(", ts: to_expression(id: ops[2]), ts: ")"), forwarding: false);
14979 break;
14980 case OpRayQueryTerminateKHR:
14981 flush_variable_declaration(id: ops[0]);
14982 statement(ts: "rayQueryTerminateEXT(", ts: to_expression(id: ops[0]), ts: ");");
14983 break;
14984 case OpRayQueryGenerateIntersectionKHR:
14985 flush_variable_declaration(id: ops[0]);
14986 statement(ts: "rayQueryGenerateIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");");
14987 break;
14988 case OpRayQueryConfirmIntersectionKHR:
14989 flush_variable_declaration(id: ops[0]);
14990 statement(ts: "rayQueryConfirmIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ");");
14991 break;
14992#define GLSL_RAY_QUERY_GET_OP(op) \
14993 case OpRayQueryGet##op##KHR: \
14994 flush_variable_declaration(ops[2]); \
14995 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
14996 break
14997#define GLSL_RAY_QUERY_GET_OP2(op) \
14998 case OpRayQueryGet##op##KHR: \
14999 flush_variable_declaration(ops[2]); \
15000 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
15001 break
15002 GLSL_RAY_QUERY_GET_OP(RayTMin);
15003 GLSL_RAY_QUERY_GET_OP(RayFlags);
15004 GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
15005 GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
15006 GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
15007 GLSL_RAY_QUERY_GET_OP2(IntersectionType);
15008 GLSL_RAY_QUERY_GET_OP2(IntersectionT);
15009 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
15010 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
15011 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
15012 GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
15013 GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
15014 GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
15015 GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
15016 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
15017 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
15018 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
15019 GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
15020#undef GLSL_RAY_QUERY_GET_OP
15021#undef GLSL_RAY_QUERY_GET_OP2
15022
15023 case OpConvertUToAccelerationStructureKHR:
15024 {
15025 require_extension_internal(ext: "GL_EXT_ray_tracing");
15026
15027 bool elide_temporary = should_forward(id: ops[2]) && forced_temporaries.count(x: ops[1]) == 0 &&
15028 !hoisted_temporaries.count(x: ops[1]);
15029
15030 if (elide_temporary)
15031 {
15032 GLSL_UFOP(accelerationStructureEXT);
15033 }
15034 else
15035 {
15036 // Force this path in subsequent iterations.
15037 forced_temporaries.insert(x: ops[1]);
15038
15039 // We cannot declare a temporary acceleration structure in GLSL.
15040 // If we get to this point, we'll have to emit a temporary uvec2,
15041 // and cast to RTAS on demand.
15042 statement(ts: declare_temporary(result_type: expression_type_id(id: ops[2]), result_id: ops[1]), ts: to_unpacked_expression(id: ops[2]), ts: ";");
15043 // Use raw SPIRExpression interface to block all usage tracking.
15044 set<SPIRExpression>(id: ops[1], args: join(ts: "accelerationStructureEXT(", ts: to_name(id: ops[1]), ts: ")"), args: ops[0], args: true);
15045 }
15046 break;
15047 }
15048
15049 case OpConvertUToPtr:
15050 {
15051 auto &type = get<SPIRType>(id: ops[0]);
15052 if (type.storage != StorageClassPhysicalStorageBufferEXT)
15053 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
15054
15055 auto &in_type = expression_type(id: ops[2]);
15056 if (in_type.vecsize == 2)
15057 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
15058
15059 auto op = type_to_glsl(type);
15060 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
15061 break;
15062 }
15063
15064 case OpConvertPtrToU:
15065 {
15066 auto &type = get<SPIRType>(id: ops[0]);
15067 auto &ptr_type = expression_type(id: ops[2]);
15068 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
15069 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
15070
15071 if (type.vecsize == 2)
15072 require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2");
15073
15074 auto op = type_to_glsl(type);
15075 emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str());
15076 break;
15077 }
15078
15079 case OpUndef:
15080 // Undefined value has been declared.
15081 break;
15082
15083 case OpLine:
15084 {
15085 emit_line_directive(file_id: ops[0], line_literal: ops[1]);
15086 break;
15087 }
15088
15089 case OpNoLine:
15090 break;
15091
15092 case OpDemoteToHelperInvocationEXT:
15093 if (!options.vulkan_semantics)
15094 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
15095 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
15096 statement(ts&: backend.demote_literal, ts: ";");
15097 break;
15098
15099 case OpIsHelperInvocationEXT:
15100 if (!options.vulkan_semantics)
15101 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
15102 require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation");
15103 // Helper lane state with demote is volatile by nature.
15104 // Do not forward this.
15105 emit_op(result_type: ops[0], result_id: ops[1], rhs: "helperInvocationEXT()", forwarding: false);
15106 break;
15107
15108 case OpBeginInvocationInterlockEXT:
15109 // If the interlock is complex, we emit this elsewhere.
15110 if (!interlocked_is_complex)
15111 {
15112 statement(ts: "SPIRV_Cross_beginInvocationInterlock();");
15113 flush_all_active_variables();
15114 // Make sure forwarding doesn't propagate outside interlock region.
15115 }
15116 break;
15117
15118 case OpEndInvocationInterlockEXT:
15119 // If the interlock is complex, we emit this elsewhere.
15120 if (!interlocked_is_complex)
15121 {
15122 statement(ts: "SPIRV_Cross_endInvocationInterlock();");
15123 flush_all_active_variables();
15124 // Make sure forwarding doesn't propagate outside interlock region.
15125 }
15126 break;
15127
15128 case OpSetMeshOutputsEXT:
15129 statement(ts: "SetMeshOutputsEXT(", ts: to_unpacked_expression(id: ops[0]), ts: ", ", ts: to_unpacked_expression(id: ops[1]), ts: ");");
15130 break;
15131
15132 case OpReadClockKHR:
15133 {
15134 auto &type = get<SPIRType>(id: ops[0]);
15135 auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2]));
15136 const char *op = nullptr;
15137 // Forwarding clock statements leads to a scenario where an SSA value can take on different
15138 // values every time it's evaluated. Block any forwarding attempt.
15139 // We also might want to invalidate all expressions to function as a sort of optimization
15140 // barrier, but might be overkill for now.
15141 if (scope == ScopeDevice)
15142 {
15143 require_extension_internal(ext: "GL_EXT_shader_realtime_clock");
15144 if (type.basetype == SPIRType::BaseType::UInt64)
15145 op = "clockRealtimeEXT()";
15146 else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
15147 op = "clockRealtime2x32EXT()";
15148 else
15149 SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
15150 }
15151 else if (scope == ScopeSubgroup)
15152 {
15153 require_extension_internal(ext: "GL_ARB_shader_clock");
15154 if (type.basetype == SPIRType::BaseType::UInt64)
15155 op = "clockARB()";
15156 else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
15157 op = "clock2x32ARB()";
15158 else
15159 SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
15160 }
15161 else
15162 SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode.");
15163
15164 emit_op(result_type: ops[0], result_id: ops[1], rhs: op, forwarding: false);
15165 break;
15166 }
15167
15168 default:
15169 statement(ts: "// unimplemented op ", ts: instruction.op);
15170 break;
15171 }
15172}
15173
15174// Appends function arguments, mapped from global variables, beyond the specified arg index.
15175// This is used when a function call uses fewer arguments than the function defines.
15176// This situation may occur if the function signature has been dynamically modified to
15177// extract global variables referenced from within the function, and convert them to
15178// function arguments. This is necessary for shader languages that do not support global
15179// access to shader input content from within a function (eg. Metal). Each additional
15180// function args uses the name of the global variable. Function nesting will modify the
15181// functions and function calls all the way up the nesting chain.
15182void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
15183{
15184 auto &args = func.arguments;
15185 uint32_t arg_cnt = uint32_t(args.size());
15186 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
15187 {
15188 auto &arg = args[arg_idx];
15189 assert(arg.alias_global_variable);
15190
15191 // If the underlying variable needs to be declared
15192 // (ie. a local variable with deferred declaration), do so now.
15193 uint32_t var_id = get<SPIRVariable>(id: arg.id).basevariable;
15194 if (var_id)
15195 flush_variable_declaration(id: var_id);
15196
15197 arglist.push_back(t: to_func_call_arg(arg, id: arg.id));
15198 }
15199}
15200
15201string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
15202{
15203 if (type.type_alias != TypeID(0) &&
15204 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
15205 {
15206 return to_member_name(type: get<SPIRType>(id: type.type_alias), index);
15207 }
15208
15209 auto &memb = ir.meta[type.self].members;
15210 if (index < memb.size() && !memb[index].alias.empty())
15211 return memb[index].alias;
15212 else
15213 return join(ts: "_m", ts&: index);
15214}
15215
15216string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
15217{
15218 return join(ts: ".", ts: to_member_name(type, index));
15219}
15220
15221string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
15222{
15223 string ret;
15224 auto *member_type = &type;
15225 for (auto &index : indices)
15226 {
15227 ret += join(ts: ".", ts: to_member_name(type: *member_type, index));
15228 member_type = &get<SPIRType>(id: member_type->member_types[index]);
15229 }
15230 return ret;
15231}
15232
15233void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
15234{
15235 auto &memb = ir.meta[type.self].members;
15236 if (index < memb.size() && !memb[index].alias.empty())
15237 {
15238 auto &name = memb[index].alias;
15239 if (name.empty())
15240 return;
15241
15242 ParsedIR::sanitize_identifier(str&: name, member: true, allow_reserved_prefixes: true);
15243 update_name_cache(cache&: type.member_name_cache, name);
15244 }
15245}
15246
15247// Checks whether the ID is a row_major matrix that requires conversion before use
15248bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
15249{
15250 // Natively supported row-major matrices do not need to be converted.
15251 // Legacy targets do not support row major.
15252 if (backend.native_row_major_matrix && !is_legacy())
15253 return false;
15254
15255 auto *e = maybe_get<SPIRExpression>(id);
15256 if (e)
15257 return e->need_transpose;
15258 else
15259 return has_decoration(id, decoration: DecorationRowMajor);
15260}
15261
15262// Checks whether the member is a row_major matrix that requires conversion before use
15263bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
15264{
15265 // Natively supported row-major matrices do not need to be converted.
15266 if (backend.native_row_major_matrix && !is_legacy())
15267 return false;
15268
15269 // Non-matrix or column-major matrix types do not need to be converted.
15270 if (!has_member_decoration(id: type.self, index, decoration: DecorationRowMajor))
15271 return false;
15272
15273 // Only square row-major matrices can be converted at this time.
15274 // Converting non-square matrices will require defining custom GLSL function that
15275 // swaps matrix elements while retaining the original dimensional form of the matrix.
15276 const auto mbr_type = get<SPIRType>(id: type.member_types[index]);
15277 if (mbr_type.columns != mbr_type.vecsize)
15278 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
15279
15280 return true;
15281}
15282
15283// Checks if we need to remap physical type IDs when declaring the type in a buffer.
15284bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
15285{
15286 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypeID);
15287}
15288
15289// Checks whether the member is in packed data type, that might need to be unpacked.
15290bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
15291{
15292 return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypePacked);
15293}
15294
15295// Wraps the expression string in a function call that converts the
15296// row_major matrix result of the expression to a column_major matrix.
15297// Base implementation uses the standard library transpose() function.
15298// Subclasses may override to use a different function.
15299string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
15300 bool /*is_packed*/, bool relaxed)
15301{
15302 strip_enclosed_expression(expr&: exp_str);
15303 if (!is_matrix(type: exp_type))
15304 {
15305 auto column_index = exp_str.find_last_of(c: '[');
15306 if (column_index == string::npos)
15307 return exp_str;
15308
15309 auto column_expr = exp_str.substr(pos: column_index);
15310 exp_str.resize(n: column_index);
15311
15312 auto end_deferred_index = column_expr.find_last_of(c: ']');
15313 if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size())
15314 {
15315 // If we have any data member fixups, it must be transposed so that it refers to this index.
15316 // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
15317 // and needs to be [1].data[0] instead.
15318 end_deferred_index++;
15319 column_expr = column_expr.substr(pos: end_deferred_index) +
15320 column_expr.substr(pos: 0, n: end_deferred_index);
15321 }
15322
15323 auto transposed_expr = type_to_glsl_constructor(type: exp_type) + "(";
15324
15325 // Loading a column from a row-major matrix. Unroll the load.
15326 for (uint32_t c = 0; c < exp_type.vecsize; c++)
15327 {
15328 transposed_expr += join(ts&: exp_str, ts: '[', ts&: c, ts: ']', ts&: column_expr);
15329 if (c + 1 < exp_type.vecsize)
15330 transposed_expr += ", ";
15331 }
15332
15333 transposed_expr += ")";
15334 return transposed_expr;
15335 }
15336 else if (options.version < 120)
15337 {
15338 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
15339 // these GLSL versions do not support non-square matrices.
15340 if (exp_type.vecsize == 2 && exp_type.columns == 2)
15341 require_polyfill(polyfill: PolyfillTranspose2x2, relaxed);
15342 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
15343 require_polyfill(polyfill: PolyfillTranspose3x3, relaxed);
15344 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
15345 require_polyfill(polyfill: PolyfillTranspose4x4, relaxed);
15346 else
15347 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
15348 return join(ts: "spvTranspose", ts: (options.es && relaxed) ? "MP" : "", ts: "(", ts&: exp_str, ts: ")");
15349 }
15350 else
15351 return join(ts: "transpose(", ts&: exp_str, ts: ")");
15352}
15353
15354string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
15355{
15356 string type_name = type_to_glsl(type, id);
15357 remap_variable_type_name(type, var_name: name, type_name);
15358 return join(ts&: type_name, ts: " ", ts: name, ts: type_to_array_glsl(type, variable_id: id));
15359}
15360
15361bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
15362{
15363 return var.storage == storage;
15364}
15365
15366// Emit a structure member. Subclasses may override to modify output,
15367// or to dynamically add a padding member if needed.
15368void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
15369 const string &qualifier, uint32_t)
15370{
15371 auto &membertype = get<SPIRType>(id: member_type_id);
15372
15373 Bitset memberflags;
15374 auto &memb = ir.meta[type.self].members;
15375 if (index < memb.size())
15376 memberflags = memb[index].decoration_flags;
15377
15378 string qualifiers;
15379 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) ||
15380 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
15381
15382 if (is_block)
15383 qualifiers = to_interpolation_qualifiers(flags: memberflags);
15384
15385 statement(ts: layout_for_member(type, index), ts&: qualifiers, ts: qualifier, ts: flags_to_qualifiers_glsl(type: membertype, flags: memberflags),
15386 ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts: ";");
15387}
15388
15389void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
15390{
15391}
15392
15393string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
15394{
15395 // GL_EXT_buffer_reference variables can be marked as restrict.
15396 if (flags.get(bit: DecorationRestrictPointerEXT))
15397 return "restrict ";
15398
15399 string qual;
15400
15401 if (type_is_floating_point(type) && flags.get(bit: DecorationNoContraction) && backend.support_precise_qualifier)
15402 qual = "precise ";
15403
15404 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
15405 bool type_supports_precision =
15406 type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
15407 type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
15408 type.basetype == SPIRType::Sampler;
15409
15410 if (!type_supports_precision)
15411 return qual;
15412
15413 if (options.es)
15414 {
15415 auto &execution = get_entry_point();
15416
15417 if (type.basetype == SPIRType::UInt && is_legacy_es())
15418 {
15419 // HACK: This is a bool. See comment in type_to_glsl().
15420 qual += "lowp ";
15421 }
15422 else if (flags.get(bit: DecorationRelaxedPrecision))
15423 {
15424 bool implied_fmediump = type.basetype == SPIRType::Float &&
15425 options.fragment.default_float_precision == Options::Mediump &&
15426 execution.model == ExecutionModelFragment;
15427
15428 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
15429 options.fragment.default_int_precision == Options::Mediump &&
15430 execution.model == ExecutionModelFragment;
15431
15432 qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
15433 }
15434 else
15435 {
15436 bool implied_fhighp =
15437 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
15438 execution.model == ExecutionModelFragment) ||
15439 (execution.model != ExecutionModelFragment));
15440
15441 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
15442 ((options.fragment.default_int_precision == Options::Highp &&
15443 execution.model == ExecutionModelFragment) ||
15444 (execution.model != ExecutionModelFragment));
15445
15446 qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
15447 }
15448 }
15449 else if (backend.allow_precision_qualifiers)
15450 {
15451 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
15452 // The default is highp however, so only emit mediump in the rare case that a shader has these.
15453 if (flags.get(bit: DecorationRelaxedPrecision))
15454 qual += "mediump ";
15455 }
15456
15457 return qual;
15458}
15459
15460string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
15461{
15462 auto &type = expression_type(id);
15463 bool use_precision_qualifiers = backend.allow_precision_qualifiers;
15464 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
15465 {
15466 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
15467 auto &result_type = get<SPIRType>(id: type.image.type);
15468 if (result_type.width < 32)
15469 return "mediump ";
15470 }
15471 return flags_to_qualifiers_glsl(type, flags: ir.meta[id].decoration.decoration_flags);
15472}
15473
15474void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
15475{
15476 // Works around weird behavior in glslangValidator where
15477 // a patch out block is translated to just block members getting the decoration.
15478 // To make glslang not complain when we compile again, we have to transform this back to a case where
15479 // the variable itself has Patch decoration, and not members.
15480 // Same for perprimitiveEXT.
15481 auto &type = get<SPIRType>(id: var.basetype);
15482 if (has_decoration(id: type.self, decoration: DecorationBlock))
15483 {
15484 uint32_t member_count = uint32_t(type.member_types.size());
15485 Decoration promoted_decoration = {};
15486 bool do_promote_decoration = false;
15487 for (uint32_t i = 0; i < member_count; i++)
15488 {
15489 if (has_member_decoration(id: type.self, index: i, decoration: DecorationPatch))
15490 {
15491 promoted_decoration = DecorationPatch;
15492 do_promote_decoration = true;
15493 break;
15494 }
15495 else if (has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT))
15496 {
15497 promoted_decoration = DecorationPerPrimitiveEXT;
15498 do_promote_decoration = true;
15499 break;
15500 }
15501 }
15502
15503 if (do_promote_decoration)
15504 {
15505 set_decoration(id: var.self, decoration: promoted_decoration);
15506 for (uint32_t i = 0; i < member_count; i++)
15507 unset_member_decoration(id: type.self, index: i, decoration: promoted_decoration);
15508 }
15509 }
15510}
15511
15512string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
15513{
15514 auto &flags = get_decoration_bitset(id);
15515 string res;
15516
15517 auto *var = maybe_get<SPIRVariable>(id);
15518
15519 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
15520 res += "shared ";
15521 else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
15522 res += "taskPayloadSharedEXT ";
15523
15524 res += to_interpolation_qualifiers(flags);
15525 if (var)
15526 res += to_storage_qualifiers_glsl(var: *var);
15527
15528 auto &type = expression_type(id);
15529 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
15530 {
15531 if (flags.get(bit: DecorationCoherent))
15532 res += "coherent ";
15533 if (flags.get(bit: DecorationRestrict))
15534 res += "restrict ";
15535
15536 if (flags.get(bit: DecorationNonWritable))
15537 res += "readonly ";
15538
15539 bool formatted_load = type.image.format == ImageFormatUnknown;
15540 if (flags.get(bit: DecorationNonReadable))
15541 {
15542 res += "writeonly ";
15543 formatted_load = false;
15544 }
15545
15546 if (formatted_load)
15547 {
15548 if (!options.es)
15549 require_extension_internal(ext: "GL_EXT_shader_image_load_formatted");
15550 else
15551 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
15552 }
15553 }
15554
15555 res += to_precision_qualifiers_glsl(id);
15556
15557 return res;
15558}
15559
15560string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
15561{
15562 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
15563 auto &type = expression_type(id: arg.id);
15564 const char *direction = "";
15565
15566 if (type.pointer)
15567 {
15568 if (arg.write_count && arg.read_count)
15569 direction = "inout ";
15570 else if (arg.write_count)
15571 direction = "out ";
15572 }
15573
15574 return join(ts&: direction, ts: to_qualifiers_glsl(id: arg.id), ts: variable_decl(type, name: to_name(id: arg.id), id: arg.id));
15575}
15576
15577string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
15578{
15579 return to_unpacked_expression(id: var.initializer);
15580}
15581
15582string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
15583{
15584#ifndef NDEBUG
15585 auto &type = get<SPIRType>(id: type_id);
15586 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
15587 type.storage == StorageClassGeneric);
15588#endif
15589 uint32_t id = ir.increase_bound_by(count: 1);
15590 ir.make_constant_null(id, type: type_id, add_to_typed_id_set: false);
15591 return constant_expression(c: get<SPIRConstant>(id));
15592}
15593
15594bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
15595{
15596 if (type.pointer)
15597 return false;
15598
15599 if (!type.array.empty() && options.flatten_multidimensional_arrays)
15600 return false;
15601
15602 for (auto &literal : type.array_size_literal)
15603 if (!literal)
15604 return false;
15605
15606 for (auto &memb : type.member_types)
15607 if (!type_can_zero_initialize(type: get<SPIRType>(id: memb)))
15608 return false;
15609
15610 return true;
15611}
15612
15613string CompilerGLSL::variable_decl(const SPIRVariable &variable)
15614{
15615 // Ignore the pointer type since GLSL doesn't have pointers.
15616 auto &type = get_variable_data_type(var: variable);
15617
15618 if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
15619 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
15620
15621 auto res = join(ts: to_qualifiers_glsl(id: variable.self), ts: variable_decl(type, name: to_name(id: variable.self), id: variable.self));
15622
15623 if (variable.loop_variable && variable.static_expression)
15624 {
15625 uint32_t expr = variable.static_expression;
15626 if (ir.ids[expr].get_type() != TypeUndef)
15627 res += join(ts: " = ", ts: to_unpacked_expression(id: variable.static_expression));
15628 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
15629 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
15630 }
15631 else if (variable.initializer && !variable_decl_is_remapped_storage(var: variable, storage: StorageClassWorkgroup))
15632 {
15633 uint32_t expr = variable.initializer;
15634 if (ir.ids[expr].get_type() != TypeUndef)
15635 res += join(ts: " = ", ts: to_initializer_expression(var: variable));
15636 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
15637 res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable)));
15638 }
15639
15640 return res;
15641}
15642
15643const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
15644{
15645 auto &flags = get_decoration_bitset(id: variable.self);
15646 if (flags.get(bit: DecorationRelaxedPrecision))
15647 return "mediump ";
15648 else
15649 return "highp ";
15650}
15651
15652string CompilerGLSL::pls_decl(const PlsRemap &var)
15653{
15654 auto &variable = get<SPIRVariable>(id: var.id);
15655
15656 auto op_and_basetype = pls_format_to_basetype(format: var.format);
15657
15658 SPIRType type { op_and_basetype.first };
15659 type.basetype = op_and_basetype.second;
15660 auto vecsize = pls_format_to_components(format: var.format);
15661 if (vecsize > 1)
15662 {
15663 type.op = OpTypeVector;
15664 type.vecsize = vecsize;
15665 }
15666
15667 return join(ts: to_pls_layout(format: var.format), ts: to_pls_qualifiers_glsl(variable), ts: type_to_glsl(type), ts: " ",
15668 ts: to_name(id: variable.self));
15669}
15670
15671uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
15672{
15673 return to_array_size_literal(type, index: uint32_t(type.array.size() - 1));
15674}
15675
15676uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
15677{
15678 assert(type.array.size() == type.array_size_literal.size());
15679
15680 if (type.array_size_literal[index])
15681 {
15682 return type.array[index];
15683 }
15684 else
15685 {
15686 // Use the default spec constant value.
15687 // This is the best we can do.
15688 return evaluate_constant_u32(id: type.array[index]);
15689 }
15690}
15691
15692string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
15693{
15694 assert(type.array.size() == type.array_size_literal.size());
15695
15696 auto &size = type.array[index];
15697 if (!type.array_size_literal[index])
15698 return to_expression(id: size);
15699 else if (size)
15700 return convert_to_string(t: size);
15701 else if (!backend.unsized_array_supported)
15702 {
15703 // For runtime-sized arrays, we can work around
15704 // lack of standard support for this by simply having
15705 // a single element array.
15706 //
15707 // Runtime length arrays must always be the last element
15708 // in an interface block.
15709 return "1";
15710 }
15711 else
15712 return "";
15713}
15714
15715string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
15716{
15717 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
15718 {
15719 // We are using a wrapped pointer type, and we should not emit any array declarations here.
15720 return "";
15721 }
15722
15723 if (type.array.empty())
15724 return "";
15725
15726 if (options.flatten_multidimensional_arrays)
15727 {
15728 string res;
15729 res += "[";
15730 for (auto i = uint32_t(type.array.size()); i; i--)
15731 {
15732 res += enclose_expression(expr: to_array_size(type, index: i - 1));
15733 if (i > 1)
15734 res += " * ";
15735 }
15736 res += "]";
15737 return res;
15738 }
15739 else
15740 {
15741 if (type.array.size() > 1)
15742 {
15743 if (!options.es && options.version < 430)
15744 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
15745 else if (options.es && options.version < 310)
15746 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
15747 "Try using --flatten-multidimensional-arrays or set "
15748 "options.flatten_multidimensional_arrays to true.");
15749 }
15750
15751 string res;
15752 for (auto i = uint32_t(type.array.size()); i; i--)
15753 {
15754 res += "[";
15755 res += to_array_size(type, index: i - 1);
15756 res += "]";
15757 }
15758 return res;
15759 }
15760}
15761
15762string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/)
15763{
15764 auto &imagetype = get<SPIRType>(id: type.image.type);
15765 string res;
15766
15767 switch (imagetype.basetype)
15768 {
15769 case SPIRType::Int64:
15770 res = "i64";
15771 require_extension_internal(ext: "GL_EXT_shader_image_int64");
15772 break;
15773 case SPIRType::UInt64:
15774 res = "u64";
15775 require_extension_internal(ext: "GL_EXT_shader_image_int64");
15776 break;
15777 case SPIRType::Int:
15778 case SPIRType::Short:
15779 case SPIRType::SByte:
15780 res = "i";
15781 break;
15782 case SPIRType::UInt:
15783 case SPIRType::UShort:
15784 case SPIRType::UByte:
15785 res = "u";
15786 break;
15787 default:
15788 break;
15789 }
15790
15791 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
15792 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
15793
15794 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
15795 return res + "subpassInput" + (type.image.ms ? "MS" : "");
15796 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
15797 subpass_input_is_framebuffer_fetch(id))
15798 {
15799 SPIRType sampled_type = get<SPIRType>(id: type.image.type);
15800 sampled_type.vecsize = 4;
15801 return type_to_glsl(type: sampled_type);
15802 }
15803
15804 // If we're emulating subpassInput with samplers, force sampler2D
15805 // so we don't have to specify format.
15806 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
15807 {
15808 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
15809 if (type.image.dim == DimBuffer && type.image.sampled == 1)
15810 res += "sampler";
15811 else
15812 res += type.image.sampled == 2 ? "image" : "texture";
15813 }
15814 else
15815 res += "sampler";
15816
15817 switch (type.image.dim)
15818 {
15819 case Dim1D:
15820 // ES doesn't support 1D. Fake it with 2D.
15821 res += options.es ? "2D" : "1D";
15822 break;
15823 case Dim2D:
15824 res += "2D";
15825 break;
15826 case Dim3D:
15827 res += "3D";
15828 break;
15829 case DimCube:
15830 res += "Cube";
15831 break;
15832 case DimRect:
15833 if (options.es)
15834 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
15835
15836 if (is_legacy_desktop())
15837 require_extension_internal(ext: "GL_ARB_texture_rectangle");
15838
15839 res += "2DRect";
15840 break;
15841
15842 case DimBuffer:
15843 if (options.es && options.version < 320)
15844 require_extension_internal(ext: "GL_EXT_texture_buffer");
15845 else if (!options.es && options.version < 300)
15846 require_extension_internal(ext: "GL_EXT_texture_buffer_object");
15847 res += "Buffer";
15848 break;
15849
15850 case DimSubpassData:
15851 res += "2D";
15852 break;
15853 default:
15854 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
15855 }
15856
15857 if (type.image.ms)
15858 res += "MS";
15859 if (type.image.arrayed)
15860 {
15861 if (is_legacy_desktop())
15862 require_extension_internal(ext: "GL_EXT_texture_array");
15863 res += "Array";
15864 }
15865
15866 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
15867 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
15868 is_depth_image(type, id))
15869 {
15870 res += "Shadow";
15871
15872 if (type.image.dim == DimCube && is_legacy())
15873 {
15874 if (!options.es)
15875 require_extension_internal(ext: "GL_EXT_gpu_shader4");
15876 else
15877 {
15878 require_extension_internal(ext: "GL_NV_shadow_samplers_cube");
15879 res += "NV";
15880 }
15881 }
15882 }
15883
15884 return res;
15885}
15886
15887string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
15888{
15889 if (backend.use_array_constructor && type.array.size() > 1)
15890 {
15891 if (options.flatten_multidimensional_arrays)
15892 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
15893 "e.g. float[][]().");
15894 else if (!options.es && options.version < 430)
15895 require_extension_internal(ext: "GL_ARB_arrays_of_arrays");
15896 else if (options.es && options.version < 310)
15897 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
15898 }
15899
15900 auto e = type_to_glsl(type);
15901 if (backend.use_array_constructor)
15902 {
15903 for (uint32_t i = 0; i < type.array.size(); i++)
15904 e += "[]";
15905 }
15906 return e;
15907}
15908
15909// The optional id parameter indicates the object whose type we are trying
15910// to find the description for. It is optional. Most type descriptions do not
15911// depend on a specific object's use of that type.
15912string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
15913{
15914 if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type))
15915 {
15916 // Need to create a magic type name which compacts the entire type information.
15917 auto *parent = &get_pointee_type(type);
15918 string name = type_to_glsl(type: *parent);
15919
15920 uint32_t array_stride = get_decoration(id: type.parent_type, decoration: DecorationArrayStride);
15921
15922 // Resolve all array dimensions in one go since once we lose the pointer type,
15923 // array information is left to to_array_type_glsl. The base type loses array information.
15924 while (is_array(type: *parent))
15925 {
15926 if (parent->array_size_literal.back())
15927 name += join(ts: type.array.back(), ts: "_");
15928 else
15929 name += join(ts: "id", ts: type.array.back(), ts: "_");
15930
15931 name += "stride_" + std::to_string(val: array_stride);
15932
15933 array_stride = get_decoration(id: parent->parent_type, decoration: DecorationArrayStride);
15934 parent = &get<SPIRType>(id: parent->parent_type);
15935 }
15936
15937 name += "Pointer";
15938 return name;
15939 }
15940
15941 switch (type.basetype)
15942 {
15943 case SPIRType::Struct:
15944 // Need OpName lookup here to get a "sensible" name for a struct.
15945 if (backend.explicit_struct_type)
15946 return join(ts: "struct ", ts: to_name(id: type.self));
15947 else
15948 return to_name(id: type.self);
15949
15950 case SPIRType::Image:
15951 case SPIRType::SampledImage:
15952 return image_type_glsl(type, id);
15953
15954 case SPIRType::Sampler:
15955 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
15956 // this distinction into the type system.
15957 return comparison_ids.count(x: id) ? "samplerShadow" : "sampler";
15958
15959 case SPIRType::AccelerationStructure:
15960 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
15961
15962 case SPIRType::RayQuery:
15963 return "rayQueryEXT";
15964
15965 case SPIRType::Void:
15966 return "void";
15967
15968 default:
15969 break;
15970 }
15971
15972 if (type.basetype == SPIRType::UInt && is_legacy())
15973 {
15974 if (options.es)
15975 // HACK: spirv-cross changes bools into uints and generates code which compares them to
15976 // zero. Input code will have already been validated as not to have contained any uints,
15977 // so any remaining uints must in fact be bools. However, simply returning "bool" here
15978 // will result in invalid code. Instead, return an int.
15979 return backend.basic_int_type;
15980 else
15981 require_extension_internal(ext: "GL_EXT_gpu_shader4");
15982 }
15983
15984 if (type.basetype == SPIRType::AtomicCounter)
15985 {
15986 if (options.es && options.version < 310)
15987 SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters.");
15988 else if (!options.es && options.version < 420)
15989 require_extension_internal(ext: "GL_ARB_shader_atomic_counters");
15990 }
15991
15992 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
15993 {
15994 switch (type.basetype)
15995 {
15996 case SPIRType::Boolean:
15997 return "bool";
15998 case SPIRType::SByte:
15999 return backend.basic_int8_type;
16000 case SPIRType::UByte:
16001 return backend.basic_uint8_type;
16002 case SPIRType::Short:
16003 return backend.basic_int16_type;
16004 case SPIRType::UShort:
16005 return backend.basic_uint16_type;
16006 case SPIRType::Int:
16007 return backend.basic_int_type;
16008 case SPIRType::UInt:
16009 return backend.basic_uint_type;
16010 case SPIRType::AtomicCounter:
16011 return "atomic_uint";
16012 case SPIRType::Half:
16013 return "float16_t";
16014 case SPIRType::Float:
16015 return "float";
16016 case SPIRType::Double:
16017 return "double";
16018 case SPIRType::Int64:
16019 return "int64_t";
16020 case SPIRType::UInt64:
16021 return "uint64_t";
16022 default:
16023 return "???";
16024 }
16025 }
16026 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
16027 {
16028 switch (type.basetype)
16029 {
16030 case SPIRType::Boolean:
16031 return join(ts: "bvec", ts: type.vecsize);
16032 case SPIRType::SByte:
16033 return join(ts: "i8vec", ts: type.vecsize);
16034 case SPIRType::UByte:
16035 return join(ts: "u8vec", ts: type.vecsize);
16036 case SPIRType::Short:
16037 return join(ts: "i16vec", ts: type.vecsize);
16038 case SPIRType::UShort:
16039 return join(ts: "u16vec", ts: type.vecsize);
16040 case SPIRType::Int:
16041 return join(ts: "ivec", ts: type.vecsize);
16042 case SPIRType::UInt:
16043 return join(ts: "uvec", ts: type.vecsize);
16044 case SPIRType::Half:
16045 return join(ts: "f16vec", ts: type.vecsize);
16046 case SPIRType::Float:
16047 return join(ts: "vec", ts: type.vecsize);
16048 case SPIRType::Double:
16049 return join(ts: "dvec", ts: type.vecsize);
16050 case SPIRType::Int64:
16051 return join(ts: "i64vec", ts: type.vecsize);
16052 case SPIRType::UInt64:
16053 return join(ts: "u64vec", ts: type.vecsize);
16054 default:
16055 return "???";
16056 }
16057 }
16058 else if (type.vecsize == type.columns) // Simple Matrix builtin
16059 {
16060 switch (type.basetype)
16061 {
16062 case SPIRType::Boolean:
16063 return join(ts: "bmat", ts: type.vecsize);
16064 case SPIRType::Int:
16065 return join(ts: "imat", ts: type.vecsize);
16066 case SPIRType::UInt:
16067 return join(ts: "umat", ts: type.vecsize);
16068 case SPIRType::Half:
16069 return join(ts: "f16mat", ts: type.vecsize);
16070 case SPIRType::Float:
16071 return join(ts: "mat", ts: type.vecsize);
16072 case SPIRType::Double:
16073 return join(ts: "dmat", ts: type.vecsize);
16074 // Matrix types not supported for int64/uint64.
16075 default:
16076 return "???";
16077 }
16078 }
16079 else
16080 {
16081 switch (type.basetype)
16082 {
16083 case SPIRType::Boolean:
16084 return join(ts: "bmat", ts: type.columns, ts: "x", ts: type.vecsize);
16085 case SPIRType::Int:
16086 return join(ts: "imat", ts: type.columns, ts: "x", ts: type.vecsize);
16087 case SPIRType::UInt:
16088 return join(ts: "umat", ts: type.columns, ts: "x", ts: type.vecsize);
16089 case SPIRType::Half:
16090 return join(ts: "f16mat", ts: type.columns, ts: "x", ts: type.vecsize);
16091 case SPIRType::Float:
16092 return join(ts: "mat", ts: type.columns, ts: "x", ts: type.vecsize);
16093 case SPIRType::Double:
16094 return join(ts: "dmat", ts: type.columns, ts: "x", ts: type.vecsize);
16095 // Matrix types not supported for int64/uint64.
16096 default:
16097 return "???";
16098 }
16099 }
16100}
16101
16102void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
16103 const unordered_set<string> &variables_secondary, string &name)
16104{
16105 if (name.empty())
16106 return;
16107
16108 ParsedIR::sanitize_underscores(str&: name);
16109 if (ParsedIR::is_globally_reserved_identifier(str&: name, allow_reserved_prefixes: true))
16110 {
16111 name.clear();
16112 return;
16113 }
16114
16115 update_name_cache(cache_primary&: variables_primary, cache_secondary: variables_secondary, name);
16116}
16117
16118void CompilerGLSL::add_local_variable_name(uint32_t id)
16119{
16120 add_variable(variables_primary&: local_variable_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
16121}
16122
16123void CompilerGLSL::add_resource_name(uint32_t id)
16124{
16125 add_variable(variables_primary&: resource_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias);
16126}
16127
16128void CompilerGLSL::add_header_line(const std::string &line)
16129{
16130 header_lines.push_back(t: line);
16131}
16132
16133bool CompilerGLSL::has_extension(const std::string &ext) const
16134{
16135 auto itr = find(first: begin(cont: forced_extensions), last: end(cont: forced_extensions), val: ext);
16136 return itr != end(cont: forced_extensions);
16137}
16138
16139void CompilerGLSL::require_extension(const std::string &ext)
16140{
16141 if (!has_extension(ext))
16142 forced_extensions.push_back(t: ext);
16143}
16144
16145const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const
16146{
16147 return forced_extensions;
16148}
16149
16150void CompilerGLSL::require_extension_internal(const string &ext)
16151{
16152 if (backend.supports_extensions && !has_extension(ext))
16153 {
16154 forced_extensions.push_back(t: ext);
16155 force_recompile();
16156 }
16157}
16158
16159void CompilerGLSL::flatten_buffer_block(VariableID id)
16160{
16161 auto &var = get<SPIRVariable>(id);
16162 auto &type = get<SPIRType>(id: var.basetype);
16163 auto name = to_name(id: type.self, allow_alias: false);
16164 auto &flags = get_decoration_bitset(id: type.self);
16165
16166 if (!type.array.empty())
16167 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
16168 if (type.basetype != SPIRType::Struct)
16169 SPIRV_CROSS_THROW(name + " is not a struct.");
16170 if (!flags.get(bit: DecorationBlock))
16171 SPIRV_CROSS_THROW(name + " is not a block.");
16172 if (type.member_types.empty())
16173 SPIRV_CROSS_THROW(name + " is an empty struct.");
16174
16175 flattened_buffer_blocks.insert(x: id);
16176}
16177
16178bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
16179{
16180 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
16181}
16182
16183bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const
16184{
16185 return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources.
16186}
16187
16188bool CompilerGLSL::check_atomic_image(uint32_t id)
16189{
16190 auto &type = expression_type(id);
16191 if (type.storage == StorageClassImage)
16192 {
16193 if (options.es && options.version < 320)
16194 require_extension_internal(ext: "GL_OES_shader_image_atomic");
16195
16196 auto *var = maybe_get_backing_variable(chain: id);
16197 if (var)
16198 {
16199 if (has_decoration(id: var->self, decoration: DecorationNonWritable) || has_decoration(id: var->self, decoration: DecorationNonReadable))
16200 {
16201 unset_decoration(id: var->self, decoration: DecorationNonWritable);
16202 unset_decoration(id: var->self, decoration: DecorationNonReadable);
16203 force_recompile();
16204 }
16205 }
16206 return true;
16207 }
16208 else
16209 return false;
16210}
16211
16212void CompilerGLSL::add_function_overload(const SPIRFunction &func)
16213{
16214 Hasher hasher;
16215 for (auto &arg : func.arguments)
16216 {
16217 // Parameters can vary with pointer type or not,
16218 // but that will not change the signature in GLSL/HLSL,
16219 // so strip the pointer type before hashing.
16220 uint32_t type_id = get_pointee_type_id(type_id: arg.type);
16221 auto &type = get<SPIRType>(id: type_id);
16222
16223 if (!combined_image_samplers.empty())
16224 {
16225 // If we have combined image samplers, we cannot really trust the image and sampler arguments
16226 // we pass down to callees, because they may be shuffled around.
16227 // Ignore these arguments, to make sure that functions need to differ in some other way
16228 // to be considered different overloads.
16229 if (type.basetype == SPIRType::SampledImage ||
16230 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
16231 {
16232 continue;
16233 }
16234 }
16235
16236 hasher.u32(value: type_id);
16237 }
16238 uint64_t types_hash = hasher.get();
16239
16240 auto function_name = to_name(id: func.self);
16241 auto itr = function_overloads.find(x: function_name);
16242 if (itr != end(cont&: function_overloads))
16243 {
16244 // There exists a function with this name already.
16245 auto &overloads = itr->second;
16246 if (overloads.count(x: types_hash) != 0)
16247 {
16248 // Overload conflict, assign a new name.
16249 add_resource_name(id: func.self);
16250 function_overloads[to_name(id: func.self)].insert(x: types_hash);
16251 }
16252 else
16253 {
16254 // Can reuse the name.
16255 overloads.insert(x: types_hash);
16256 }
16257 }
16258 else
16259 {
16260 // First time we see this function name.
16261 add_resource_name(id: func.self);
16262 function_overloads[to_name(id: func.self)].insert(x: types_hash);
16263 }
16264}
16265
16266void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
16267{
16268 if (func.self != ir.default_entry_point)
16269 add_function_overload(func);
16270
16271 // Avoid shadow declarations.
16272 local_variable_names = resource_names;
16273
16274 string decl;
16275
16276 auto &type = get<SPIRType>(id: func.return_type);
16277 decl += flags_to_qualifiers_glsl(type, flags: return_flags);
16278 decl += type_to_glsl(type);
16279 decl += type_to_array_glsl(type, 0);
16280 decl += " ";
16281
16282 if (func.self == ir.default_entry_point)
16283 {
16284 // If we need complex fallback in GLSL, we just wrap main() in a function
16285 // and interlock the entire shader ...
16286 if (interlocked_is_complex)
16287 decl += "spvMainInterlockedBody";
16288 else
16289 decl += "main";
16290
16291 processing_entry_point = true;
16292 }
16293 else
16294 decl += to_name(id: func.self);
16295
16296 decl += "(";
16297 SmallVector<string> arglist;
16298 for (auto &arg : func.arguments)
16299 {
16300 // Do not pass in separate images or samplers if we're remapping
16301 // to combined image samplers.
16302 if (skip_argument(id: arg.id))
16303 continue;
16304
16305 // Might change the variable name if it already exists in this function.
16306 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
16307 // to use same name for variables.
16308 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
16309 add_local_variable_name(id: arg.id);
16310
16311 arglist.push_back(t: argument_decl(arg));
16312
16313 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
16314 auto *var = maybe_get<SPIRVariable>(id: arg.id);
16315 if (var)
16316 var->parameter = &arg;
16317 }
16318
16319 for (auto &arg : func.shadow_arguments)
16320 {
16321 // Might change the variable name if it already exists in this function.
16322 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
16323 // to use same name for variables.
16324 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
16325 add_local_variable_name(id: arg.id);
16326
16327 arglist.push_back(t: argument_decl(arg));
16328
16329 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
16330 auto *var = maybe_get<SPIRVariable>(id: arg.id);
16331 if (var)
16332 var->parameter = &arg;
16333 }
16334
16335 decl += merge(list: arglist);
16336 decl += ")";
16337 statement(ts&: decl);
16338}
16339
16340void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
16341{
16342 // Avoid potential cycles.
16343 if (func.active)
16344 return;
16345 func.active = true;
16346
16347 // If we depend on a function, emit that function before we emit our own function.
16348 for (auto block : func.blocks)
16349 {
16350 auto &b = get<SPIRBlock>(id: block);
16351 for (auto &i : b.ops)
16352 {
16353 auto ops = stream(instr: i);
16354 auto op = static_cast<Op>(i.op);
16355
16356 if (op == OpFunctionCall)
16357 {
16358 // Recursively emit functions which are called.
16359 uint32_t id = ops[2];
16360 emit_function(func&: get<SPIRFunction>(id), return_flags: ir.meta[ops[1]].decoration.decoration_flags);
16361 }
16362 }
16363 }
16364
16365 if (func.entry_line.file_id != 0)
16366 emit_line_directive(file_id: func.entry_line.file_id, line_literal: func.entry_line.line_literal);
16367 emit_function_prototype(func, return_flags);
16368 begin_scope();
16369
16370 if (func.self == ir.default_entry_point)
16371 emit_entry_point_declarations();
16372
16373 current_function = &func;
16374 auto &entry_block = get<SPIRBlock>(id: func.entry_block);
16375
16376 sort(first: begin(cont&: func.constant_arrays_needed_on_stack), last: end(cont&: func.constant_arrays_needed_on_stack));
16377 for (auto &array : func.constant_arrays_needed_on_stack)
16378 {
16379 auto &c = get<SPIRConstant>(id: array);
16380 auto &type = get<SPIRType>(id: c.constant_type);
16381 statement(ts: variable_decl(type, name: join(ts: "_", ts&: array, ts: "_array_copy")), ts: " = ", ts: constant_expression(c), ts: ";");
16382 }
16383
16384 for (auto &v : func.local_variables)
16385 {
16386 auto &var = get<SPIRVariable>(id: v);
16387 var.deferred_declaration = false;
16388
16389 if (variable_decl_is_remapped_storage(var, storage: StorageClassWorkgroup))
16390 {
16391 // Special variable type which cannot have initializer,
16392 // need to be declared as standalone variables.
16393 // Comes from MSL which can push global variables as local variables in main function.
16394 add_local_variable_name(id: var.self);
16395 statement(ts: variable_decl(variable: var), ts: ";");
16396 var.deferred_declaration = false;
16397 }
16398 else if (var.storage == StorageClassPrivate)
16399 {
16400 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
16401 // Comes from MSL which can push global variables as local variables in main function.
16402 // We could just declare them right now, but we would miss out on an important initialization case which is
16403 // LUT declaration in MSL.
16404 // If we don't declare the variable when it is assigned we're forced to go through a helper function
16405 // which copies elements one by one.
16406 add_local_variable_name(id: var.self);
16407
16408 if (var.initializer)
16409 {
16410 statement(ts: variable_decl(variable: var), ts: ";");
16411 var.deferred_declaration = false;
16412 }
16413 else
16414 {
16415 auto &dominated = entry_block.dominated_variables;
16416 if (find(first: begin(cont&: dominated), last: end(cont&: dominated), val: var.self) == end(cont&: dominated))
16417 entry_block.dominated_variables.push_back(t: var.self);
16418 var.deferred_declaration = true;
16419 }
16420 }
16421 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
16422 {
16423 // No need to declare this variable, it has a static expression.
16424 var.deferred_declaration = false;
16425 }
16426 else if (expression_is_lvalue(id: v))
16427 {
16428 add_local_variable_name(id: var.self);
16429
16430 // Loop variables should never be declared early, they are explicitly emitted in a loop.
16431 if (var.initializer && !var.loop_variable)
16432 statement(ts: variable_decl_function_local(var), ts: ";");
16433 else
16434 {
16435 // Don't declare variable until first use to declutter the GLSL output quite a lot.
16436 // If we don't touch the variable before first branch,
16437 // declare it then since we need variable declaration to be in top scope.
16438 var.deferred_declaration = true;
16439 }
16440 }
16441 else
16442 {
16443 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
16444 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
16445 // This means that when we OpStore to these variables, we just write in the expression ID directly.
16446 // This breaks any kind of branching, since the variable must be statically assigned.
16447 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
16448 var.statically_assigned = true;
16449 }
16450
16451 var.loop_variable_enable = false;
16452
16453 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
16454 if (var.loop_variable)
16455 {
16456 var.deferred_declaration = false;
16457 // Need to reset the static expression so we can fallback to initializer if need be.
16458 var.static_expression = 0;
16459 }
16460 }
16461
16462 // Enforce declaration order for regression testing purposes.
16463 for (auto &block_id : func.blocks)
16464 {
16465 auto &block = get<SPIRBlock>(id: block_id);
16466 sort(first: begin(cont&: block.dominated_variables), last: end(cont&: block.dominated_variables));
16467 }
16468
16469 for (auto &line : current_function->fixup_hooks_in)
16470 line();
16471
16472 emit_block_chain(block&: entry_block);
16473
16474 end_scope();
16475 processing_entry_point = false;
16476 statement(ts: "");
16477
16478 // Make sure deferred declaration state for local variables is cleared when we are done with function.
16479 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
16480 for (auto &v : func.local_variables)
16481 {
16482 auto &var = get<SPIRVariable>(id: v);
16483 var.deferred_declaration = false;
16484 }
16485}
16486
16487void CompilerGLSL::emit_fixup()
16488{
16489 if (is_vertex_like_shader())
16490 {
16491 if (options.vertex.fixup_clipspace)
16492 {
16493 const char *suffix = backend.float_literal_suffix ? "f" : "";
16494 statement(ts: "gl_Position.z = 2.0", ts&: suffix, ts: " * gl_Position.z - gl_Position.w;");
16495 }
16496
16497 if (options.vertex.flip_vert_y)
16498 statement(ts: "gl_Position.y = -gl_Position.y;");
16499 }
16500}
16501
16502void CompilerGLSL::flush_phi(BlockID from, BlockID to)
16503{
16504 auto &child = get<SPIRBlock>(id: to);
16505 if (child.ignore_phi_from_block == from)
16506 return;
16507
16508 unordered_set<uint32_t> temporary_phi_variables;
16509
16510 for (auto itr = begin(cont&: child.phi_variables); itr != end(cont&: child.phi_variables); ++itr)
16511 {
16512 auto &phi = *itr;
16513
16514 if (phi.parent == from)
16515 {
16516 auto &var = get<SPIRVariable>(id: phi.function_variable);
16517
16518 // A Phi variable might be a loop variable, so flush to static expression.
16519 if (var.loop_variable && !var.loop_variable_enable)
16520 var.static_expression = phi.local_variable;
16521 else
16522 {
16523 flush_variable_declaration(id: phi.function_variable);
16524
16525 // Check if we are going to write to a Phi variable that another statement will read from
16526 // as part of another Phi node in our target block.
16527 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
16528 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
16529 bool need_saved_temporary =
16530 find_if(first: itr + 1, last: end(cont&: child.phi_variables), pred: [&](const SPIRBlock::Phi &future_phi) -> bool {
16531 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
16532 }) != end(cont&: child.phi_variables);
16533
16534 if (need_saved_temporary)
16535 {
16536 // Need to make sure we declare the phi variable with a copy at the right scope.
16537 // We cannot safely declare a temporary here since we might be inside a continue block.
16538 if (!var.allocate_temporary_copy)
16539 {
16540 var.allocate_temporary_copy = true;
16541 force_recompile();
16542 }
16543 statement(ts: "_", ts&: phi.function_variable, ts: "_copy", ts: " = ", ts: to_name(id: phi.function_variable), ts: ";");
16544 temporary_phi_variables.insert(x: phi.function_variable);
16545 }
16546
16547 // This might be called in continue block, so make sure we
16548 // use this to emit ESSL 1.0 compliant increments/decrements.
16549 auto lhs = to_expression(id: phi.function_variable);
16550
16551 string rhs;
16552 if (temporary_phi_variables.count(x: phi.local_variable))
16553 rhs = join(ts: "_", ts&: phi.local_variable, ts: "_copy");
16554 else
16555 rhs = to_pointer_expression(id: phi.local_variable);
16556
16557 if (!optimize_read_modify_write(type: get<SPIRType>(id: var.basetype), lhs, rhs))
16558 statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";");
16559 }
16560
16561 register_write(chain: phi.function_variable);
16562 }
16563 }
16564}
16565
16566void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
16567{
16568 auto &to_block = get<SPIRBlock>(id: to);
16569 if (from == to)
16570 return;
16571
16572 assert(is_continue(to));
16573 if (to_block.complex_continue)
16574 {
16575 // Just emit the whole block chain as is.
16576 auto usage_counts = expression_usage_counts;
16577
16578 emit_block_chain(block&: to_block);
16579
16580 // Expression usage counts are moot after returning from the continue block.
16581 expression_usage_counts = usage_counts;
16582 }
16583 else
16584 {
16585 auto &from_block = get<SPIRBlock>(id: from);
16586 bool outside_control_flow = false;
16587 uint32_t loop_dominator = 0;
16588
16589 // FIXME: Refactor this to not use the old loop_dominator tracking.
16590 if (from_block.merge_block)
16591 {
16592 // If we are a loop header, we don't set the loop dominator,
16593 // so just use "self" here.
16594 loop_dominator = from;
16595 }
16596 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
16597 {
16598 loop_dominator = from_block.loop_dominator;
16599 }
16600
16601 if (loop_dominator != 0)
16602 {
16603 auto &cfg = get_cfg_for_current_function();
16604
16605 // For non-complex continue blocks, we implicitly branch to the continue block
16606 // by having the continue block be part of the loop header in for (; ; continue-block).
16607 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: loop_dominator, to: from);
16608 }
16609
16610 // Some simplification for for-loops. We always end up with a useless continue;
16611 // statement since we branch to a loop block.
16612 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
16613 // we can avoid writing out an explicit continue statement.
16614 // Similar optimization to return statements if we know we're outside flow control.
16615 if (!outside_control_flow)
16616 statement(ts: "continue;");
16617 }
16618}
16619
16620void CompilerGLSL::branch(BlockID from, BlockID to)
16621{
16622 flush_phi(from, to);
16623 flush_control_dependent_expressions(block: from);
16624
16625 bool to_is_continue = is_continue(next: to);
16626
16627 // This is only a continue if we branch to our loop dominator.
16628 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(id: from).loop_dominator == to)
16629 {
16630 // This can happen if we had a complex continue block which was emitted.
16631 // Once the continue block tries to branch to the loop header, just emit continue;
16632 // and end the chain here.
16633 statement(ts: "continue;");
16634 }
16635 else if (from != to && is_break(next: to))
16636 {
16637 // We cannot break to ourselves, so check explicitly for from != to.
16638 // This case can trigger if a loop header is all three of these things:
16639 // - Continue block
16640 // - Loop header
16641 // - Break merge target all at once ...
16642
16643 // Very dirty workaround.
16644 // Switch constructs are able to break, but they cannot break out of a loop at the same time,
16645 // yet SPIR-V allows it.
16646 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
16647 // write to the ladder here, and defer the break.
16648 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
16649 if (is_loop_break(next: to))
16650 {
16651 for (size_t n = current_emitting_switch_stack.size(); n; n--)
16652 {
16653 auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
16654
16655 if (current_emitting_switch &&
16656 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
16657 get<SPIRBlock>(id: current_emitting_switch->loop_dominator).merge_block == to)
16658 {
16659 if (!current_emitting_switch->need_ladder_break)
16660 {
16661 force_recompile();
16662 current_emitting_switch->need_ladder_break = true;
16663 }
16664
16665 statement(ts: "_", ts&: current_emitting_switch->self, ts: "_ladder_break = true;");
16666 }
16667 else
16668 break;
16669 }
16670 }
16671 statement(ts: "break;");
16672 }
16673 else if (to_is_continue || from == to)
16674 {
16675 // For from == to case can happen for a do-while loop which branches into itself.
16676 // We don't mark these cases as continue blocks, but the only possible way to branch into
16677 // ourselves is through means of continue blocks.
16678
16679 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
16680 // We can branch to the continue block after we merge execution.
16681
16682 // Here we make use of structured control flow rules from spec:
16683 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
16684 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
16685 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
16686 auto &block_meta = ir.block_meta[to];
16687 bool branching_to_merge =
16688 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
16689 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
16690 if (!to_is_continue || !branching_to_merge)
16691 branch_to_continue(from, to);
16692 }
16693 else if (!is_conditional(next: to))
16694 emit_block_chain(block&: get<SPIRBlock>(id: to));
16695
16696 // It is important that we check for break before continue.
16697 // A block might serve two purposes, a break block for the inner scope, and
16698 // a continue block in the outer scope.
16699 // Inner scope always takes precedence.
16700}
16701
16702void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
16703{
16704 auto &from_block = get<SPIRBlock>(id: from);
16705 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
16706
16707 // If we branch directly to our selection merge target, we don't need a code path.
16708 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, to: true_block);
16709 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, to: false_block);
16710
16711 if (!true_block_needs_code && !false_block_needs_code)
16712 return;
16713
16714 // We might have a loop merge here. Only consider selection flattening constructs.
16715 // Loop hints are handled explicitly elsewhere.
16716 if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
16717 emit_block_hints(block: from_block);
16718
16719 if (true_block_needs_code)
16720 {
16721 statement(ts: "if (", ts: to_expression(id: cond), ts: ")");
16722 begin_scope();
16723 branch(from, to: true_block);
16724 end_scope();
16725
16726 if (false_block_needs_code)
16727 {
16728 statement(ts: "else");
16729 begin_scope();
16730 branch(from, to: false_block);
16731 end_scope();
16732 }
16733 }
16734 else if (false_block_needs_code)
16735 {
16736 // Only need false path, use negative conditional.
16737 statement(ts: "if (!", ts: to_enclosed_expression(id: cond), ts: ")");
16738 begin_scope();
16739 branch(from, to: false_block);
16740 end_scope();
16741 }
16742}
16743
16744// FIXME: This currently cannot handle complex continue blocks
16745// as in do-while.
16746// This should be seen as a "trivial" continue block.
16747string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
16748{
16749 auto *block = &get<SPIRBlock>(id: continue_block);
16750
16751 // While emitting the continue block, declare_temporary will check this
16752 // if we have to emit temporaries.
16753 current_continue_block = block;
16754
16755 SmallVector<string> statements;
16756
16757 // Capture all statements into our list.
16758 auto *old = redirect_statement;
16759 redirect_statement = &statements;
16760
16761 // Stamp out all blocks one after each other.
16762 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
16763 {
16764 // Write out all instructions we have in this block.
16765 emit_block_instructions(block&: *block);
16766
16767 // For plain branchless for/while continue blocks.
16768 if (block->next_block)
16769 {
16770 flush_phi(from: continue_block, to: block->next_block);
16771 block = &get<SPIRBlock>(id: block->next_block);
16772 }
16773 // For do while blocks. The last block will be a select block.
16774 else if (block->true_block && follow_true_block)
16775 {
16776 flush_phi(from: continue_block, to: block->true_block);
16777 block = &get<SPIRBlock>(id: block->true_block);
16778 }
16779 else if (block->false_block && follow_false_block)
16780 {
16781 flush_phi(from: continue_block, to: block->false_block);
16782 block = &get<SPIRBlock>(id: block->false_block);
16783 }
16784 else
16785 {
16786 SPIRV_CROSS_THROW("Invalid continue block detected!");
16787 }
16788 }
16789
16790 // Restore old pointer.
16791 redirect_statement = old;
16792
16793 // Somewhat ugly, strip off the last ';' since we use ',' instead.
16794 // Ideally, we should select this behavior in statement().
16795 for (auto &s : statements)
16796 {
16797 if (!s.empty() && s.back() == ';')
16798 s.erase(pos: s.size() - 1, n: 1);
16799 }
16800
16801 current_continue_block = nullptr;
16802 return merge(list: statements);
16803}
16804
16805void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
16806{
16807 // While loops do not take initializers, so declare all of them outside.
16808 for (auto &loop_var : block.loop_variables)
16809 {
16810 auto &var = get<SPIRVariable>(id: loop_var);
16811 statement(ts: variable_decl(variable: var), ts: ";");
16812 }
16813}
16814
16815string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
16816{
16817 if (block.loop_variables.empty())
16818 return "";
16819
16820 bool same_types = for_loop_initializers_are_same_type(block);
16821 // We can only declare for loop initializers if all variables are of same type.
16822 // If we cannot do this, declare individual variables before the loop header.
16823
16824 // We might have a loop variable candidate which was not assigned to for some reason.
16825 uint32_t missing_initializers = 0;
16826 for (auto &variable : block.loop_variables)
16827 {
16828 uint32_t expr = get<SPIRVariable>(id: variable).static_expression;
16829
16830 // Sometimes loop variables are initialized with OpUndef, but we can just declare
16831 // a plain variable without initializer in this case.
16832 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
16833 missing_initializers++;
16834 }
16835
16836 if (block.loop_variables.size() == 1 && missing_initializers == 0)
16837 {
16838 return variable_decl(variable: get<SPIRVariable>(id: block.loop_variables.front()));
16839 }
16840 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
16841 {
16842 for (auto &loop_var : block.loop_variables)
16843 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
16844 return "";
16845 }
16846 else
16847 {
16848 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
16849 // Separate the two streams.
16850 string expr;
16851
16852 for (auto &loop_var : block.loop_variables)
16853 {
16854 uint32_t static_expr = get<SPIRVariable>(id: loop_var).static_expression;
16855 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
16856 {
16857 statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";");
16858 }
16859 else
16860 {
16861 auto &var = get<SPIRVariable>(id: loop_var);
16862 auto &type = get_variable_data_type(var);
16863 if (expr.empty())
16864 {
16865 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
16866 expr = join(ts: to_qualifiers_glsl(id: var.self), ts: type_to_glsl(type), ts: " ");
16867 }
16868 else
16869 {
16870 expr += ", ";
16871 // In MSL, being based on C++, the asterisk marking a pointer
16872 // binds to the identifier, not the type.
16873 if (type.pointer)
16874 expr += "* ";
16875 }
16876
16877 expr += join(ts: to_name(id: loop_var), ts: " = ", ts: to_pointer_expression(id: var.static_expression));
16878 }
16879 }
16880 return expr;
16881 }
16882}
16883
16884bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
16885{
16886 if (block.loop_variables.size() <= 1)
16887 return true;
16888
16889 uint32_t expected = 0;
16890 Bitset expected_flags;
16891 for (auto &var : block.loop_variables)
16892 {
16893 // Don't care about uninitialized variables as they will not be part of the initializers.
16894 uint32_t expr = get<SPIRVariable>(id: var).static_expression;
16895 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
16896 continue;
16897
16898 if (expected == 0)
16899 {
16900 expected = get<SPIRVariable>(id: var).basetype;
16901 expected_flags = get_decoration_bitset(id: var);
16902 }
16903 else if (expected != get<SPIRVariable>(id: var).basetype)
16904 return false;
16905
16906 // Precision flags and things like that must also match.
16907 if (expected_flags != get_decoration_bitset(id: var))
16908 return false;
16909 }
16910
16911 return true;
16912}
16913
16914void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block)
16915{
16916 // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise,
16917 // which breaks loop optimizations.
16918 // Any line directive would be declared outside the loop body, which would just be confusing either way.
16919 bool old_block_debug_directives = block_debug_directives;
16920 block_debug_directives = true;
16921 emit_block_instructions(block);
16922 block_debug_directives = old_block_debug_directives;
16923}
16924
16925bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
16926{
16927 SPIRBlock::ContinueBlockType continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
16928
16929 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
16930 {
16931 uint32_t current_count = statement_count;
16932 // If we're trying to create a true for loop,
16933 // we need to make sure that all opcodes before branch statement do not actually emit any code.
16934 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
16935 emit_block_instructions_with_masked_debug(block);
16936
16937 bool condition_is_temporary = forced_temporaries.find(x: block.condition) == end(cont&: forced_temporaries);
16938
16939 bool flushes_phi = flush_phi_required(from: block.self, to: block.true_block) ||
16940 flush_phi_required(from: block.self, to: block.false_block);
16941
16942 // This can work! We only did trivial things which could be forwarded in block body!
16943 if (!flushes_phi && current_count == statement_count && condition_is_temporary)
16944 {
16945 switch (continue_type)
16946 {
16947 case SPIRBlock::ForLoop:
16948 {
16949 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
16950 flush_undeclared_variables(block);
16951
16952 // Important that we do this in this order because
16953 // emitting the continue block can invalidate the condition expression.
16954 auto initializer = emit_for_loop_initializers(block);
16955 auto condition = to_expression(id: block.condition);
16956
16957 // Condition might have to be inverted.
16958 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
16959 condition = join(ts: "!", ts: enclose_expression(expr: condition));
16960
16961 emit_block_hints(block);
16962 if (method != SPIRBlock::MergeToSelectContinueForLoop)
16963 {
16964 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
16965 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
16966 }
16967 else
16968 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; )");
16969 break;
16970 }
16971
16972 case SPIRBlock::WhileLoop:
16973 {
16974 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
16975 flush_undeclared_variables(block);
16976 emit_while_loop_initializers(block);
16977 emit_block_hints(block);
16978
16979 auto condition = to_expression(id: block.condition);
16980 // Condition might have to be inverted.
16981 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
16982 condition = join(ts: "!", ts: enclose_expression(expr: condition));
16983
16984 statement(ts: "while (", ts&: condition, ts: ")");
16985 break;
16986 }
16987
16988 default:
16989 block.disable_block_optimization = true;
16990 force_recompile();
16991 begin_scope(); // We'll see an end_scope() later.
16992 return false;
16993 }
16994
16995 begin_scope();
16996 return true;
16997 }
16998 else
16999 {
17000 block.disable_block_optimization = true;
17001 force_recompile();
17002 begin_scope(); // We'll see an end_scope() later.
17003 return false;
17004 }
17005 }
17006 else if (method == SPIRBlock::MergeToDirectForLoop)
17007 {
17008 auto &child = get<SPIRBlock>(id: block.next_block);
17009
17010 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
17011 flush_undeclared_variables(block&: child);
17012
17013 uint32_t current_count = statement_count;
17014
17015 // If we're trying to create a true for loop,
17016 // we need to make sure that all opcodes before branch statement do not actually emit any code.
17017 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
17018 emit_block_instructions_with_masked_debug(block&: child);
17019
17020 bool condition_is_temporary = forced_temporaries.find(x: child.condition) == end(cont&: forced_temporaries);
17021
17022 bool flushes_phi = flush_phi_required(from: child.self, to: child.true_block) ||
17023 flush_phi_required(from: child.self, to: child.false_block);
17024
17025 if (!flushes_phi && current_count == statement_count && condition_is_temporary)
17026 {
17027 uint32_t target_block = child.true_block;
17028
17029 switch (continue_type)
17030 {
17031 case SPIRBlock::ForLoop:
17032 {
17033 // Important that we do this in this order because
17034 // emitting the continue block can invalidate the condition expression.
17035 auto initializer = emit_for_loop_initializers(block);
17036 auto condition = to_expression(id: child.condition);
17037
17038 // Condition might have to be inverted.
17039 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17040 {
17041 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17042 target_block = child.false_block;
17043 }
17044
17045 auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false);
17046 emit_block_hints(block);
17047 statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")");
17048 break;
17049 }
17050
17051 case SPIRBlock::WhileLoop:
17052 {
17053 emit_while_loop_initializers(block);
17054 emit_block_hints(block);
17055
17056 auto condition = to_expression(id: child.condition);
17057 // Condition might have to be inverted.
17058 if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17059 {
17060 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17061 target_block = child.false_block;
17062 }
17063
17064 statement(ts: "while (", ts&: condition, ts: ")");
17065 break;
17066 }
17067
17068 default:
17069 block.disable_block_optimization = true;
17070 force_recompile();
17071 begin_scope(); // We'll see an end_scope() later.
17072 return false;
17073 }
17074
17075 begin_scope();
17076 branch(from: child.self, to: target_block);
17077 return true;
17078 }
17079 else
17080 {
17081 block.disable_block_optimization = true;
17082 force_recompile();
17083 begin_scope(); // We'll see an end_scope() later.
17084 return false;
17085 }
17086 }
17087 else
17088 return false;
17089}
17090
17091void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
17092{
17093 for (auto &v : block.dominated_variables)
17094 flush_variable_declaration(id: v);
17095}
17096
17097void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
17098{
17099 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
17100 // Need to sort these to ensure that reference output is stable.
17101 sort(first: begin(cont&: temporaries), last: end(cont&: temporaries),
17102 comp: [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
17103
17104 for (auto &tmp : temporaries)
17105 {
17106 auto &type = get<SPIRType>(id: tmp.first);
17107
17108 // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
17109 // This should be ignored unless we're doing actual variable pointers and backend supports it.
17110 // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
17111 if (type.pointer && !backend.native_pointers)
17112 continue;
17113
17114 add_local_variable_name(id: tmp.second);
17115 auto &flags = get_decoration_bitset(id: tmp.second);
17116
17117 // Not all targets support pointer literals, so don't bother with that case.
17118 string initializer;
17119 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
17120 initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: tmp.first));
17121
17122 statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: tmp.second)), ts&: initializer, ts: ";");
17123
17124 hoisted_temporaries.insert(x: tmp.second);
17125 forced_temporaries.insert(x: tmp.second);
17126
17127 // The temporary might be read from before it's assigned, set up the expression now.
17128 set<SPIRExpression>(id: tmp.second, args: to_name(id: tmp.second), args&: tmp.first, args: true);
17129
17130 // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
17131 // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
17132 auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: tmp.second);
17133 if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
17134 {
17135 uint32_t mirror_id = mirrored_precision_itr->second;
17136 auto &mirror_flags = get_decoration_bitset(id: mirror_id);
17137 statement(ts: flags_to_qualifiers_glsl(type, flags: mirror_flags),
17138 ts: variable_decl(type, name: to_name(id: mirror_id)),
17139 ts&: initializer, ts: ";");
17140 // The temporary might be read from before it's assigned, set up the expression now.
17141 set<SPIRExpression>(id: mirror_id, args: to_name(id: mirror_id), args&: tmp.first, args: true);
17142 hoisted_temporaries.insert(x: mirror_id);
17143 }
17144 }
17145}
17146
17147void CompilerGLSL::emit_block_chain(SPIRBlock &block)
17148{
17149 bool select_branch_to_true_block = false;
17150 bool select_branch_to_false_block = false;
17151 bool skip_direct_branch = false;
17152 bool emitted_loop_header_variables = false;
17153 bool force_complex_continue_block = false;
17154 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
17155
17156 if (block.merge == SPIRBlock::MergeLoop)
17157 add_loop_level();
17158
17159 // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries.
17160 for (auto var_id : block.dominated_variables)
17161 {
17162 auto &var = get<SPIRVariable>(id: var_id);
17163 if (var.phi_variable)
17164 {
17165 auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: var_id);
17166 if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() &&
17167 find_if(first: block.declare_temporary.begin(), last: block.declare_temporary.end(),
17168 pred: [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) {
17169 return p.second == mirrored_precision_itr->second;
17170 }) == block.declare_temporary.end())
17171 {
17172 block.declare_temporary.push_back(t: { var.basetype, mirrored_precision_itr->second });
17173 }
17174 }
17175 }
17176
17177 emit_hoisted_temporaries(temporaries&: block.declare_temporary);
17178
17179 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
17180 if (block.continue_block)
17181 {
17182 continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block));
17183 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
17184 if (continue_type == SPIRBlock::ComplexLoop)
17185 block.complex_continue = true;
17186 }
17187
17188 // If we have loop variables, stop masking out access to the variable now.
17189 for (auto var_id : block.loop_variables)
17190 {
17191 auto &var = get<SPIRVariable>(id: var_id);
17192 var.loop_variable_enable = true;
17193 // We're not going to declare the variable directly, so emit a copy here.
17194 emit_variable_temporary_copies(var);
17195 }
17196
17197 // Remember deferred declaration state. We will restore it before returning.
17198 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
17199 for (size_t i = 0; i < block.dominated_variables.size(); i++)
17200 {
17201 uint32_t var_id = block.dominated_variables[i];
17202 auto &var = get<SPIRVariable>(id: var_id);
17203 rearm_dominated_variables[i] = var.deferred_declaration;
17204 }
17205
17206 // This is the method often used by spirv-opt to implement loops.
17207 // The loop header goes straight into the continue block.
17208 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
17209 // it *MUST* be used in the continue block. This loop method will not work.
17210 if (!is_legacy_es() && block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectContinueForLoop))
17211 {
17212 flush_undeclared_variables(block);
17213 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectContinueForLoop))
17214 {
17215 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17216 select_branch_to_false_block = true;
17217 else
17218 select_branch_to_true_block = true;
17219
17220 emitted_loop_header_variables = true;
17221 force_complex_continue_block = true;
17222 }
17223 }
17224 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
17225 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectForLoop))
17226 {
17227 flush_undeclared_variables(block);
17228 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectForLoop))
17229 {
17230 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
17231 if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block)))
17232 select_branch_to_false_block = true;
17233 else
17234 select_branch_to_true_block = true;
17235
17236 emitted_loop_header_variables = true;
17237 }
17238 }
17239 // This is the newer loop behavior in glslang which branches from Loop header directly to
17240 // a new block, which in turn has a OpBranchSelection without a selection merge.
17241 else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToDirectForLoop))
17242 {
17243 flush_undeclared_variables(block);
17244 if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToDirectForLoop))
17245 {
17246 skip_direct_branch = true;
17247 emitted_loop_header_variables = true;
17248 }
17249 }
17250 else if (continue_type == SPIRBlock::DoWhileLoop)
17251 {
17252 flush_undeclared_variables(block);
17253 emit_while_loop_initializers(block);
17254 emitted_loop_header_variables = true;
17255 // We have some temporaries where the loop header is the dominator.
17256 // We risk a case where we have code like:
17257 // for (;;) { create-temporary; break; } consume-temporary;
17258 // so force-declare temporaries here.
17259 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
17260 statement(ts: "do");
17261 begin_scope();
17262
17263 emit_block_instructions(block);
17264 }
17265 else if (block.merge == SPIRBlock::MergeLoop)
17266 {
17267 flush_undeclared_variables(block);
17268 emit_while_loop_initializers(block);
17269 emitted_loop_header_variables = true;
17270
17271 // We have a generic loop without any distinguishable pattern like for, while or do while.
17272 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
17273 continue_type = SPIRBlock::ComplexLoop;
17274
17275 // We have some temporaries where the loop header is the dominator.
17276 // We risk a case where we have code like:
17277 // for (;;) { create-temporary; break; } consume-temporary;
17278 // so force-declare temporaries here.
17279 emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary);
17280 emit_block_hints(block);
17281 statement(ts: "for (;;)");
17282 begin_scope();
17283
17284 emit_block_instructions(block);
17285 }
17286 else
17287 {
17288 emit_block_instructions(block);
17289 }
17290
17291 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
17292 // as writes to said loop variables might have been masked out, we need a recompile.
17293 if (!emitted_loop_header_variables && !block.loop_variables.empty())
17294 {
17295 force_recompile_guarantee_forward_progress();
17296 for (auto var : block.loop_variables)
17297 get<SPIRVariable>(id: var).loop_variable = false;
17298 block.loop_variables.clear();
17299 }
17300
17301 flush_undeclared_variables(block);
17302 bool emit_next_block = true;
17303
17304 // Handle end of block.
17305 switch (block.terminator)
17306 {
17307 case SPIRBlock::Direct:
17308 // True when emitting complex continue block.
17309 if (block.loop_dominator == block.next_block)
17310 {
17311 branch(from: block.self, to: block.next_block);
17312 emit_next_block = false;
17313 }
17314 // True if MergeToDirectForLoop succeeded.
17315 else if (skip_direct_branch)
17316 emit_next_block = false;
17317 else if (is_continue(next: block.next_block) || is_break(next: block.next_block) || is_conditional(next: block.next_block))
17318 {
17319 branch(from: block.self, to: block.next_block);
17320 emit_next_block = false;
17321 }
17322 break;
17323
17324 case SPIRBlock::Select:
17325 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
17326 if (select_branch_to_true_block)
17327 {
17328 if (force_complex_continue_block)
17329 {
17330 assert(block.true_block == block.continue_block);
17331
17332 // We're going to emit a continue block directly here, so make sure it's marked as complex.
17333 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
17334 bool old_complex = complex_continue;
17335 complex_continue = true;
17336 branch(from: block.self, to: block.true_block);
17337 complex_continue = old_complex;
17338 }
17339 else
17340 branch(from: block.self, to: block.true_block);
17341 }
17342 else if (select_branch_to_false_block)
17343 {
17344 if (force_complex_continue_block)
17345 {
17346 assert(block.false_block == block.continue_block);
17347
17348 // We're going to emit a continue block directly here, so make sure it's marked as complex.
17349 auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue;
17350 bool old_complex = complex_continue;
17351 complex_continue = true;
17352 branch(from: block.self, to: block.false_block);
17353 complex_continue = old_complex;
17354 }
17355 else
17356 branch(from: block.self, to: block.false_block);
17357 }
17358 else
17359 branch(from: block.self, cond: block.condition, true_block: block.true_block, false_block: block.false_block);
17360 break;
17361
17362 case SPIRBlock::MultiSelect:
17363 {
17364 auto &type = expression_type(id: block.condition);
17365 bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
17366 type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
17367
17368 if (block.merge == SPIRBlock::MergeNone)
17369 SPIRV_CROSS_THROW("Switch statement is not structured");
17370
17371 if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
17372 {
17373 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
17374 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
17375 }
17376
17377 const char *label_suffix = "";
17378 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
17379 label_suffix = "u";
17380 else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
17381 label_suffix = "l";
17382 else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
17383 label_suffix = "ul";
17384 else if (type.basetype == SPIRType::UShort)
17385 label_suffix = backend.uint16_t_literal_suffix;
17386 else if (type.basetype == SPIRType::Short)
17387 label_suffix = backend.int16_t_literal_suffix;
17388
17389 current_emitting_switch_stack.push_back(t: &block);
17390
17391 if (block.need_ladder_break)
17392 statement(ts: "bool _", ts&: block.self, ts: "_ladder_break = false;");
17393
17394 // Find all unique case constructs.
17395 unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
17396 SmallVector<uint32_t> block_declaration_order;
17397 SmallVector<uint64_t> literals_to_merge;
17398
17399 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
17400 // and let the default: block handle it.
17401 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
17402 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
17403 auto &cases = get_case_list(block);
17404 for (auto &c : cases)
17405 {
17406 if (c.block != block.next_block && c.block != block.default_block)
17407 {
17408 if (!case_constructs.count(x: c.block))
17409 block_declaration_order.push_back(t: c.block);
17410 case_constructs[c.block].push_back(t: c.value);
17411 }
17412 else if (c.block == block.next_block && block.default_block != block.next_block)
17413 {
17414 // We might have to flush phi inside specific case labels.
17415 // If we can piggyback on default:, do so instead.
17416 literals_to_merge.push_back(t: c.value);
17417 }
17418 }
17419
17420 // Empty literal array -> default.
17421 if (block.default_block != block.next_block)
17422 {
17423 auto &default_block = get<SPIRBlock>(id: block.default_block);
17424
17425 // We need to slide in the default block somewhere in this chain
17426 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
17427 // Only consider trivial fall-through cases here.
17428 size_t num_blocks = block_declaration_order.size();
17429 bool injected_block = false;
17430
17431 for (size_t i = 0; i < num_blocks; i++)
17432 {
17433 auto &case_block = get<SPIRBlock>(id: block_declaration_order[i]);
17434 if (execution_is_direct_branch(from: case_block, to: default_block))
17435 {
17436 // Fallthrough to default block, we must inject the default block here.
17437 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i + 1, value: block.default_block);
17438 injected_block = true;
17439 break;
17440 }
17441 else if (execution_is_direct_branch(from: default_block, to: case_block))
17442 {
17443 // Default case is falling through to another case label, we must inject the default block here.
17444 block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i, value: block.default_block);
17445 injected_block = true;
17446 break;
17447 }
17448 }
17449
17450 // Order does not matter.
17451 if (!injected_block)
17452 block_declaration_order.push_back(t: block.default_block);
17453 else if (is_legacy_es())
17454 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
17455
17456 case_constructs[block.default_block] = {};
17457 }
17458
17459 size_t num_blocks = block_declaration_order.size();
17460
17461 const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
17462 {
17463 if (is_unsigned_case)
17464 return convert_to_string(t: literal);
17465
17466 // For smaller cases, the literals are compiled as 32 bit wide
17467 // literals so we don't need to care for all sizes specifically.
17468 if (width <= 32)
17469 {
17470 return convert_to_string(t: int64_t(int32_t(literal)));
17471 }
17472
17473 return convert_to_string(t: int64_t(literal));
17474 };
17475
17476 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
17477 const char *suffix) -> string {
17478 string ret;
17479 size_t count = labels.size();
17480 for (size_t i = 0; i < count; i++)
17481 {
17482 if (i)
17483 ret += " || ";
17484 ret += join(ts: count > 1 ? "(" : "", ts: to_enclosed_expression(id: condition), ts: " == ", ts: labels[i], ts&: suffix,
17485 ts: count > 1 ? ")" : "");
17486 }
17487 return ret;
17488 };
17489
17490 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
17491 // we need to flush phi nodes outside the switch block in a branch,
17492 // and skip any Phi handling inside the case label to make fall-through work as expected.
17493 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
17494 // inside the case label if at all possible.
17495 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
17496 {
17497 if (flush_phi_required(from: block.self, to: block_declaration_order[i]) &&
17498 flush_phi_required(from: block_declaration_order[i - 1], to: block_declaration_order[i]))
17499 {
17500 uint32_t target_block = block_declaration_order[i];
17501
17502 // Make sure we flush Phi, it might have been marked to be ignored earlier.
17503 get<SPIRBlock>(id: target_block).ignore_phi_from_block = 0;
17504
17505 auto &literals = case_constructs[target_block];
17506
17507 if (literals.empty())
17508 {
17509 // Oh boy, gotta make a complete negative test instead! o.o
17510 // Find all possible literals that would *not* make us enter the default block.
17511 // If none of those literals match, we flush Phi ...
17512 SmallVector<string> conditions;
17513 for (size_t j = 0; j < num_blocks; j++)
17514 {
17515 auto &negative_literals = case_constructs[block_declaration_order[j]];
17516 for (auto &case_label : negative_literals)
17517 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
17518 ts: " != ", ts: to_case_label(case_label, type.width, unsigned_case)));
17519 }
17520
17521 statement(ts: "if (", ts: merge(list: conditions, between: " && "), ts: ")");
17522 begin_scope();
17523 flush_phi(from: block.self, to: target_block);
17524 end_scope();
17525 }
17526 else
17527 {
17528 SmallVector<string> conditions;
17529 conditions.reserve(count: literals.size());
17530 for (auto &case_label : literals)
17531 conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition),
17532 ts: " == ", ts: to_case_label(case_label, type.width, unsigned_case)));
17533 statement(ts: "if (", ts: merge(list: conditions, between: " || "), ts: ")");
17534 begin_scope();
17535 flush_phi(from: block.self, to: target_block);
17536 end_scope();
17537 }
17538
17539 // Mark the block so that we don't flush Phi from header to case label.
17540 get<SPIRBlock>(id: target_block).ignore_phi_from_block = block.self;
17541 }
17542 }
17543
17544 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
17545 // non-structured exits with the help of a switch block.
17546 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
17547 bool block_like_switch = cases.empty();
17548
17549 // If this is true, the switch is completely meaningless, and we should just avoid it.
17550 bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
17551
17552 if (!collapsed_switch)
17553 {
17554 if (block_like_switch || is_legacy_es())
17555 {
17556 // ESSL 1.0 is not guaranteed to support do/while.
17557 if (is_legacy_es())
17558 {
17559 uint32_t counter = statement_count;
17560 statement(ts: "for (int spvDummy", ts&: counter, ts: " = 0; spvDummy", ts&: counter, ts: " < 1; spvDummy", ts&: counter,
17561 ts: "++)");
17562 }
17563 else
17564 statement(ts: "do");
17565 }
17566 else
17567 {
17568 emit_block_hints(block);
17569 statement(ts: "switch (", ts: to_unpacked_expression(id: block.condition), ts: ")");
17570 }
17571 begin_scope();
17572 }
17573
17574 for (size_t i = 0; i < num_blocks; i++)
17575 {
17576 uint32_t target_block = block_declaration_order[i];
17577 auto &literals = case_constructs[target_block];
17578
17579 if (literals.empty())
17580 {
17581 // Default case.
17582 if (!block_like_switch)
17583 {
17584 if (is_legacy_es())
17585 statement(ts: "else");
17586 else
17587 statement(ts: "default:");
17588 }
17589 }
17590 else
17591 {
17592 if (is_legacy_es())
17593 {
17594 statement(ts: (i ? "else " : ""), ts: "if (", ts: to_legacy_case_label(block.condition, literals, label_suffix),
17595 ts: ")");
17596 }
17597 else
17598 {
17599 for (auto &case_literal : literals)
17600 {
17601 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
17602 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
17603 }
17604 }
17605 }
17606
17607 auto &case_block = get<SPIRBlock>(id: target_block);
17608 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
17609 execution_is_direct_branch(from: case_block, to: get<SPIRBlock>(id: block_declaration_order[i + 1])))
17610 {
17611 // We will fall through here, so just terminate the block chain early.
17612 // We still need to deal with Phi potentially.
17613 // No need for a stack-like thing here since we only do fall-through when there is a
17614 // single trivial branch to fall-through target..
17615 current_emitting_switch_fallthrough = true;
17616 }
17617 else
17618 current_emitting_switch_fallthrough = false;
17619
17620 if (!block_like_switch)
17621 begin_scope();
17622 branch(from: block.self, to: target_block);
17623 if (!block_like_switch)
17624 end_scope();
17625
17626 current_emitting_switch_fallthrough = false;
17627 }
17628
17629 // Might still have to flush phi variables if we branch from loop header directly to merge target.
17630 // This is supposed to emit all cases where we branch from header to merge block directly.
17631 // There are two main scenarios where cannot rely on default fallthrough.
17632 // - There is an explicit default: label already.
17633 // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
17634 // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
17635 bool header_merge_requires_phi = flush_phi_required(from: block.self, to: block.next_block);
17636 bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
17637 if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
17638 {
17639 for (auto &case_literal : literals_to_merge)
17640 statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":");
17641
17642 if (block.default_block == block.next_block)
17643 {
17644 if (is_legacy_es())
17645 statement(ts: "else");
17646 else
17647 statement(ts: "default:");
17648 }
17649
17650 begin_scope();
17651 flush_phi(from: block.self, to: block.next_block);
17652 statement(ts: "break;");
17653 end_scope();
17654 }
17655
17656 if (!collapsed_switch)
17657 {
17658 if (block_like_switch && !is_legacy_es())
17659 end_scope_decl(decl: "while(false)");
17660 else
17661 end_scope();
17662 }
17663 else
17664 flush_phi(from: block.self, to: block.next_block);
17665
17666 if (block.need_ladder_break)
17667 {
17668 statement(ts: "if (_", ts&: block.self, ts: "_ladder_break)");
17669 begin_scope();
17670 statement(ts: "break;");
17671 end_scope();
17672 }
17673
17674 current_emitting_switch_stack.pop_back();
17675 break;
17676 }
17677
17678 case SPIRBlock::Return:
17679 {
17680 for (auto &line : current_function->fixup_hooks_out)
17681 line();
17682
17683 if (processing_entry_point)
17684 emit_fixup();
17685
17686 auto &cfg = get_cfg_for_current_function();
17687
17688 if (block.return_value)
17689 {
17690 auto &type = expression_type(id: block.return_value);
17691 if (!type.array.empty() && !backend.can_return_array)
17692 {
17693 // If we cannot return arrays, we will have a special out argument we can write to instead.
17694 // The backend is responsible for setting this up, and redirection the return values as appropriate.
17695 if (ir.ids[block.return_value].get_type() != TypeUndef)
17696 {
17697 emit_array_copy(expr: "spvReturnValue", lhs_id: 0, rhs_id: block.return_value, lhs_storage: StorageClassFunction,
17698 rhs_storage: get_expression_effective_storage_class(ptr: block.return_value));
17699 }
17700
17701 if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
17702 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
17703 {
17704 statement(ts: "return;");
17705 }
17706 }
17707 else
17708 {
17709 // OpReturnValue can return Undef, so don't emit anything for this case.
17710 if (ir.ids[block.return_value].get_type() != TypeUndef)
17711 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
17712 }
17713 }
17714 else if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) ||
17715 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
17716 {
17717 // If this block is the very final block and not called from control flow,
17718 // we do not need an explicit return which looks out of place. Just end the function here.
17719 // In the very weird case of for(;;) { return; } executing return is unconditional,
17720 // but we actually need a return here ...
17721 statement(ts: "return;");
17722 }
17723 break;
17724 }
17725
17726 // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
17727 case SPIRBlock::Kill:
17728 statement(ts&: backend.discard_literal, ts: ";");
17729 if (block.return_value)
17730 statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";");
17731 break;
17732
17733 case SPIRBlock::Unreachable:
17734 {
17735 // Avoid emitting false fallthrough, which can happen for
17736 // if (cond) break; else discard; inside a case label.
17737 // Discard is not always implementable as a terminator.
17738
17739 auto &cfg = get_cfg_for_current_function();
17740 bool inner_dominator_is_switch = false;
17741 ID id = block.self;
17742
17743 while (id)
17744 {
17745 auto &iter_block = get<SPIRBlock>(id);
17746 if (iter_block.terminator == SPIRBlock::MultiSelect ||
17747 iter_block.merge == SPIRBlock::MergeLoop)
17748 {
17749 ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
17750 iter_block.merge_block : iter_block.next_block;
17751 bool outside_construct = next_block && cfg.find_common_dominator(a: next_block, b: block.self) == next_block;
17752 if (!outside_construct)
17753 {
17754 inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
17755 break;
17756 }
17757 }
17758
17759 if (cfg.get_preceding_edges(block: id).empty())
17760 break;
17761
17762 id = cfg.get_immediate_dominator(block: id);
17763 }
17764
17765 if (inner_dominator_is_switch)
17766 statement(ts: "break; // unreachable workaround");
17767
17768 emit_next_block = false;
17769 break;
17770 }
17771
17772 case SPIRBlock::IgnoreIntersection:
17773 statement(ts: "ignoreIntersectionEXT;");
17774 break;
17775
17776 case SPIRBlock::TerminateRay:
17777 statement(ts: "terminateRayEXT;");
17778 break;
17779
17780 case SPIRBlock::EmitMeshTasks:
17781 emit_mesh_tasks(block);
17782 break;
17783
17784 default:
17785 SPIRV_CROSS_THROW("Unimplemented block terminator.");
17786 }
17787
17788 if (block.next_block && emit_next_block)
17789 {
17790 // If we hit this case, we're dealing with an unconditional branch, which means we will output
17791 // that block after this. If we had selection merge, we already flushed phi variables.
17792 if (block.merge != SPIRBlock::MergeSelection)
17793 {
17794 flush_phi(from: block.self, to: block.next_block);
17795 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
17796 get<SPIRBlock>(id: block.next_block).invalidate_expressions = block.invalidate_expressions;
17797 }
17798
17799 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
17800 if (!current_emitting_switch_fallthrough)
17801 {
17802 // For merge selects we might have ignored the fact that a merge target
17803 // could have been a break; or continue;
17804 // We will need to deal with it here.
17805 if (is_loop_break(next: block.next_block))
17806 {
17807 // Cannot check for just break, because switch statements will also use break.
17808 assert(block.merge == SPIRBlock::MergeSelection);
17809 statement(ts: "break;");
17810 }
17811 else if (is_continue(next: block.next_block))
17812 {
17813 assert(block.merge == SPIRBlock::MergeSelection);
17814 branch_to_continue(from: block.self, to: block.next_block);
17815 }
17816 else if (BlockID(block.self) != block.next_block)
17817 emit_block_chain(block&: get<SPIRBlock>(id: block.next_block));
17818 }
17819 }
17820
17821 if (block.merge == SPIRBlock::MergeLoop)
17822 {
17823 if (continue_type == SPIRBlock::DoWhileLoop)
17824 {
17825 // Make sure that we run the continue block to get the expressions set, but this
17826 // should become an empty string.
17827 // We have no fallbacks if we cannot forward everything to temporaries ...
17828 const auto &continue_block = get<SPIRBlock>(id: block.continue_block);
17829 bool positive_test = execution_is_noop(from: get<SPIRBlock>(id: continue_block.true_block),
17830 to: get<SPIRBlock>(id: continue_block.loop_dominator));
17831
17832 uint32_t current_count = statement_count;
17833 auto statements = emit_continue_block(continue_block: block.continue_block, follow_true_block: positive_test, follow_false_block: !positive_test);
17834 if (statement_count != current_count)
17835 {
17836 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
17837 get<SPIRBlock>(id: block.continue_block).complex_continue = true;
17838 force_recompile();
17839 }
17840
17841 // Might have to invert the do-while test here.
17842 auto condition = to_expression(id: continue_block.condition);
17843 if (!positive_test)
17844 condition = join(ts: "!", ts: enclose_expression(expr: condition));
17845
17846 end_scope_decl(decl: join(ts: "while (", ts&: condition, ts: ")"));
17847 }
17848 else
17849 end_scope();
17850
17851 loop_level_saver.release();
17852
17853 // We cannot break out of two loops at once, so don't check for break; here.
17854 // Using block.self as the "from" block isn't quite right, but it has the same scope
17855 // and dominance structure, so it's fine.
17856 if (is_continue(next: block.merge_block))
17857 branch_to_continue(from: block.self, to: block.merge_block);
17858 else
17859 emit_block_chain(block&: get<SPIRBlock>(id: block.merge_block));
17860 }
17861
17862 // Forget about control dependent expressions now.
17863 block.invalidate_expressions.clear();
17864
17865 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
17866 // re-declare variables if necessary.
17867 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
17868 for (size_t i = 0; i < block.dominated_variables.size(); i++)
17869 {
17870 uint32_t var = block.dominated_variables[i];
17871 get<SPIRVariable>(id: var).deferred_declaration = rearm_dominated_variables[i];
17872 }
17873
17874 // Just like for deferred declaration, we need to forget about loop variable enable
17875 // if our block chain is reinstantiated later.
17876 for (auto &var_id : block.loop_variables)
17877 get<SPIRVariable>(id: var_id).loop_variable_enable = false;
17878}
17879
17880void CompilerGLSL::begin_scope()
17881{
17882 statement(ts: "{");
17883 indent++;
17884}
17885
17886void CompilerGLSL::end_scope()
17887{
17888 if (!indent)
17889 SPIRV_CROSS_THROW("Popping empty indent stack.");
17890 indent--;
17891 statement(ts: "}");
17892}
17893
17894void CompilerGLSL::end_scope(const string &trailer)
17895{
17896 if (!indent)
17897 SPIRV_CROSS_THROW("Popping empty indent stack.");
17898 indent--;
17899 statement(ts: "}", ts: trailer);
17900}
17901
17902void CompilerGLSL::end_scope_decl()
17903{
17904 if (!indent)
17905 SPIRV_CROSS_THROW("Popping empty indent stack.");
17906 indent--;
17907 statement(ts: "};");
17908}
17909
17910void CompilerGLSL::end_scope_decl(const string &decl)
17911{
17912 if (!indent)
17913 SPIRV_CROSS_THROW("Popping empty indent stack.");
17914 indent--;
17915 statement(ts: "} ", ts: decl, ts: ";");
17916}
17917
17918void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
17919{
17920 // If our variable is remapped, and we rely on type-remapping information as
17921 // well, then we cannot pass the variable as a function parameter.
17922 // Fixing this is non-trivial without stamping out variants of the same function,
17923 // so for now warn about this and suggest workarounds instead.
17924 for (uint32_t i = 0; i < length; i++)
17925 {
17926 auto *var = maybe_get<SPIRVariable>(id: args[i]);
17927 if (!var || !var->remapped_variable)
17928 continue;
17929
17930 auto &type = get<SPIRType>(id: var->basetype);
17931 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
17932 {
17933 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
17934 "This will not work correctly because type-remapping information is lost. "
17935 "To workaround, please consider not passing the subpass input as a function parameter, "
17936 "or use in/out variables instead which do not need type remapping information.");
17937 }
17938 }
17939}
17940
17941const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
17942{
17943 // FIXME: This is kind of hacky. There should be a cleaner way.
17944 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
17945 if ((offset + 1) < current_emitting_block->ops.size())
17946 return &current_emitting_block->ops[offset + 1];
17947 else
17948 return nullptr;
17949}
17950
17951uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
17952{
17953 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
17954 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
17955 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
17956}
17957
17958bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass)
17959{
17960 string lhs;
17961 if (expr)
17962 lhs = expr;
17963 else
17964 lhs = to_expression(id: lhs_id);
17965
17966 statement(ts&: lhs, ts: " = ", ts: to_expression(id: rhs_id), ts: ";");
17967 return true;
17968}
17969
17970bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
17971{
17972 if (!backend.force_gl_in_out_block)
17973 return false;
17974 // This path is only relevant for GL backends.
17975
17976 auto *var = maybe_get<SPIRVariable>(id: target_id);
17977 if (!var || var->storage != StorageClassOutput)
17978 return false;
17979
17980 if (!is_builtin_variable(var: *var) || BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)) != BuiltInSampleMask)
17981 return false;
17982
17983 auto &type = expression_type(id: source_id);
17984 string array_expr;
17985 if (type.array_size_literal.back())
17986 {
17987 array_expr = convert_to_string(t: type.array.back());
17988 if (type.array.back() == 0)
17989 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
17990 }
17991 else
17992 array_expr = to_expression(id: type.array.back());
17993
17994 SPIRType target_type { OpTypeInt };
17995 target_type.basetype = SPIRType::Int;
17996
17997 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
17998 begin_scope();
17999 statement(ts: to_expression(id: target_id), ts: "[i] = ",
18000 ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts: to_expression(id: source_id), ts: "[i]")),
18001 ts: ";");
18002 end_scope();
18003
18004 return true;
18005}
18006
18007void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
18008{
18009 if (!backend.force_gl_in_out_block)
18010 return;
18011 // This path is only relevant for GL backends.
18012
18013 auto *var = maybe_get<SPIRVariable>(id: source_id);
18014 if (!var)
18015 return;
18016
18017 if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
18018 return;
18019
18020 auto &type = get_variable_data_type(var: *var);
18021 if (type.array.empty())
18022 return;
18023
18024 auto builtin = BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn));
18025 bool is_builtin = is_builtin_variable(var: *var) &&
18026 (builtin == BuiltInPointSize ||
18027 builtin == BuiltInPosition ||
18028 builtin == BuiltInSampleMask);
18029 bool is_tess = is_tessellation_shader();
18030 bool is_patch = has_decoration(id: var->self, decoration: DecorationPatch);
18031 bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
18032
18033 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
18034 // We must unroll the array load.
18035 // For builtins, we couldn't catch this case normally,
18036 // because this is resolved in the OpAccessChain in most cases.
18037 // If we load the entire array, we have no choice but to unroll here.
18038 if (!is_patch && (is_builtin || is_tess))
18039 {
18040 auto new_expr = join(ts: "_", ts&: target_id, ts: "_unrolled");
18041 statement(ts: variable_decl(type, name: new_expr, id: target_id), ts: ";");
18042 string array_expr;
18043 if (type.array_size_literal.back())
18044 {
18045 array_expr = convert_to_string(t: type.array.back());
18046 if (type.array.back() == 0)
18047 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
18048 }
18049 else
18050 array_expr = to_expression(id: type.array.back());
18051
18052 // The array size might be a specialization constant, so use a for-loop instead.
18053 statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)");
18054 begin_scope();
18055 if (is_builtin && !is_sample_mask)
18056 statement(ts&: new_expr, ts: "[i] = gl_in[i].", ts&: expr, ts: ";");
18057 else if (is_sample_mask)
18058 {
18059 SPIRType target_type { OpTypeInt };
18060 target_type.basetype = SPIRType::Int;
18061 statement(ts&: new_expr, ts: "[i] = ", ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts&: expr, ts: "[i]")), ts: ";");
18062 }
18063 else
18064 statement(ts&: new_expr, ts: "[i] = ", ts&: expr, ts: "[i];");
18065 end_scope();
18066
18067 expr = std::move(new_expr);
18068 }
18069}
18070
18071void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
18072{
18073 // We will handle array cases elsewhere.
18074 if (!expr_type.array.empty())
18075 return;
18076
18077 auto *var = maybe_get_backing_variable(chain: source_id);
18078 if (var)
18079 source_id = var->self;
18080
18081 // Only interested in standalone builtin variables.
18082 if (!has_decoration(id: source_id, decoration: DecorationBuiltIn))
18083 {
18084 // Except for int attributes in legacy GLSL, which are cast from float.
18085 if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput)
18086 expr = join(ts: type_to_glsl(type: expr_type), ts: "(", ts&: expr, ts: ")");
18087 return;
18088 }
18089
18090 auto builtin = static_cast<BuiltIn>(get_decoration(id: source_id, decoration: DecorationBuiltIn));
18091 auto expected_type = expr_type.basetype;
18092
18093 // TODO: Fill in for more builtins.
18094 switch (builtin)
18095 {
18096 case BuiltInLayer:
18097 case BuiltInPrimitiveId:
18098 case BuiltInViewportIndex:
18099 case BuiltInInstanceId:
18100 case BuiltInInstanceIndex:
18101 case BuiltInVertexId:
18102 case BuiltInVertexIndex:
18103 case BuiltInSampleId:
18104 case BuiltInBaseVertex:
18105 case BuiltInBaseInstance:
18106 case BuiltInDrawIndex:
18107 case BuiltInFragStencilRefEXT:
18108 case BuiltInInstanceCustomIndexNV:
18109 case BuiltInSampleMask:
18110 case BuiltInPrimitiveShadingRateKHR:
18111 case BuiltInShadingRateKHR:
18112 expected_type = SPIRType::Int;
18113 break;
18114
18115 case BuiltInGlobalInvocationId:
18116 case BuiltInLocalInvocationId:
18117 case BuiltInWorkgroupId:
18118 case BuiltInLocalInvocationIndex:
18119 case BuiltInWorkgroupSize:
18120 case BuiltInNumWorkgroups:
18121 case BuiltInIncomingRayFlagsNV:
18122 case BuiltInLaunchIdNV:
18123 case BuiltInLaunchSizeNV:
18124 case BuiltInPrimitiveTriangleIndicesEXT:
18125 case BuiltInPrimitiveLineIndicesEXT:
18126 case BuiltInPrimitivePointIndicesEXT:
18127 expected_type = SPIRType::UInt;
18128 break;
18129
18130 default:
18131 break;
18132 }
18133
18134 if (expected_type != expr_type.basetype)
18135 expr = bitcast_expression(target_type: expr_type, expr_type: expected_type, expr);
18136}
18137
18138SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type)
18139{
18140 // TODO: Fill in for more builtins.
18141 switch (builtin)
18142 {
18143 case BuiltInLayer:
18144 case BuiltInPrimitiveId:
18145 case BuiltInViewportIndex:
18146 case BuiltInFragStencilRefEXT:
18147 case BuiltInSampleMask:
18148 case BuiltInPrimitiveShadingRateKHR:
18149 case BuiltInShadingRateKHR:
18150 return SPIRType::Int;
18151
18152 default:
18153 return default_type;
18154 }
18155}
18156
18157void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
18158{
18159 auto *var = maybe_get_backing_variable(chain: target_id);
18160 if (var)
18161 target_id = var->self;
18162
18163 // Only interested in standalone builtin variables.
18164 if (!has_decoration(id: target_id, decoration: DecorationBuiltIn))
18165 return;
18166
18167 auto builtin = static_cast<BuiltIn>(get_decoration(id: target_id, decoration: DecorationBuiltIn));
18168 auto expected_type = get_builtin_basetype(builtin, default_type: expr_type.basetype);
18169
18170 if (expected_type != expr_type.basetype)
18171 {
18172 auto type = expr_type;
18173 type.basetype = expected_type;
18174 expr = bitcast_expression(target_type: type, expr_type: expr_type.basetype, expr);
18175 }
18176}
18177
18178void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
18179{
18180 if (*backend.nonuniform_qualifier == '\0')
18181 return;
18182
18183 auto *var = maybe_get_backing_variable(chain: ptr_id);
18184 if (!var)
18185 return;
18186
18187 if (var->storage != StorageClassUniformConstant &&
18188 var->storage != StorageClassStorageBuffer &&
18189 var->storage != StorageClassUniform)
18190 return;
18191
18192 auto &backing_type = get<SPIRType>(id: var->basetype);
18193 if (backing_type.array.empty())
18194 return;
18195
18196 // If we get here, we know we're accessing an arrayed resource which
18197 // might require nonuniform qualifier.
18198
18199 auto start_array_index = expr.find_first_of(c: '[');
18200
18201 if (start_array_index == string::npos)
18202 return;
18203
18204 // We've opened a bracket, track expressions until we can close the bracket.
18205 // This must be our resource index.
18206 size_t end_array_index = string::npos;
18207 unsigned bracket_count = 1;
18208 for (size_t index = start_array_index + 1; index < expr.size(); index++)
18209 {
18210 if (expr[index] == ']')
18211 {
18212 if (--bracket_count == 0)
18213 {
18214 end_array_index = index;
18215 break;
18216 }
18217 }
18218 else if (expr[index] == '[')
18219 bracket_count++;
18220 }
18221
18222 assert(bracket_count == 0);
18223
18224 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
18225 // nothing we can do here to express that.
18226 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
18227 return;
18228
18229 start_array_index++;
18230
18231 expr = join(ts: expr.substr(pos: 0, n: start_array_index), ts&: backend.nonuniform_qualifier, ts: "(",
18232 ts: expr.substr(pos: start_array_index, n: end_array_index - start_array_index), ts: ")",
18233 ts: expr.substr(pos: end_array_index, n: string::npos));
18234}
18235
18236void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
18237{
18238 if ((options.es && options.version < 310) || (!options.es && options.version < 140))
18239 return;
18240
18241 switch (block.hint)
18242 {
18243 case SPIRBlock::HintFlatten:
18244 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18245 statement(ts: "SPIRV_CROSS_FLATTEN");
18246 break;
18247 case SPIRBlock::HintDontFlatten:
18248 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18249 statement(ts: "SPIRV_CROSS_BRANCH");
18250 break;
18251 case SPIRBlock::HintUnroll:
18252 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18253 statement(ts: "SPIRV_CROSS_UNROLL");
18254 break;
18255 case SPIRBlock::HintDontUnroll:
18256 require_extension_internal(ext: "GL_EXT_control_flow_attributes");
18257 statement(ts: "SPIRV_CROSS_LOOP");
18258 break;
18259 default:
18260 break;
18261 }
18262}
18263
18264void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
18265{
18266 preserved_aliases[id] = get_name(id);
18267}
18268
18269void CompilerGLSL::reset_name_caches()
18270{
18271 for (auto &preserved : preserved_aliases)
18272 set_name(id: preserved.first, name: preserved.second);
18273
18274 preserved_aliases.clear();
18275 resource_names.clear();
18276 block_input_names.clear();
18277 block_output_names.clear();
18278 block_ubo_names.clear();
18279 block_ssbo_names.clear();
18280 block_names.clear();
18281 function_overloads.clear();
18282}
18283
18284void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
18285{
18286 if (visited.count(x: type.self))
18287 return;
18288 visited.insert(x: type.self);
18289
18290 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
18291 {
18292 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
18293
18294 if (mbr_type.basetype == SPIRType::Struct)
18295 {
18296 // If there are multiple aliases, the output might be somewhat unpredictable,
18297 // but the only real alternative in that case is to do nothing, which isn't any better.
18298 // This check should be fine in practice.
18299 if (get_name(id: mbr_type.self).empty() && !get_member_name(id: type.self, index: i).empty())
18300 {
18301 auto anon_name = join(ts: "anon_", ts: get_member_name(id: type.self, index: i));
18302 ParsedIR::sanitize_underscores(str&: anon_name);
18303 set_name(id: mbr_type.self, name: anon_name);
18304 }
18305
18306 fixup_anonymous_struct_names(visited, type: mbr_type);
18307 }
18308 }
18309}
18310
18311void CompilerGLSL::fixup_anonymous_struct_names()
18312{
18313 // HLSL codegen can often end up emitting anonymous structs inside blocks, which
18314 // breaks GL linking since all names must match ...
18315 // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
18316
18317 // Breaks exponential explosion with weird type trees.
18318 std::unordered_set<uint32_t> visited;
18319
18320 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) {
18321 if (type.basetype == SPIRType::Struct &&
18322 (has_decoration(id: type.self, decoration: DecorationBlock) ||
18323 has_decoration(id: type.self, decoration: DecorationBufferBlock)))
18324 {
18325 fixup_anonymous_struct_names(visited, type);
18326 }
18327 });
18328}
18329
18330void CompilerGLSL::fixup_type_alias()
18331{
18332 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
18333 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) {
18334 if (!type.type_alias)
18335 return;
18336
18337 if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock))
18338 {
18339 // Top-level block types should never alias anything else.
18340 type.type_alias = 0;
18341 }
18342 else if (type_is_block_like(type) && type.self == ID(self))
18343 {
18344 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
18345 // i.e. blocks which are placed inside buffers.
18346 // Become the master.
18347 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t other_id, SPIRType &other_type) {
18348 if (other_id == self)
18349 return;
18350
18351 if (other_type.type_alias == type.type_alias)
18352 other_type.type_alias = self;
18353 });
18354
18355 this->get<SPIRType>(id: type.type_alias).type_alias = self;
18356 type.type_alias = 0;
18357 }
18358 });
18359}
18360
18361void CompilerGLSL::reorder_type_alias()
18362{
18363 // Reorder declaration of types so that the master of the type alias is always emitted first.
18364 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
18365 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
18366 auto loop_lock = ir.create_loop_hard_lock();
18367
18368 auto &type_ids = ir.ids_for_type[TypeType];
18369 for (auto alias_itr = begin(cont&: type_ids); alias_itr != end(cont&: type_ids); ++alias_itr)
18370 {
18371 auto &type = get<SPIRType>(id: *alias_itr);
18372 if (type.type_alias != TypeID(0) &&
18373 !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
18374 {
18375 // We will skip declaring this type, so make sure the type_alias type comes before.
18376 auto master_itr = find(first: begin(cont&: type_ids), last: end(cont&: type_ids), val: ID(type.type_alias));
18377 assert(master_itr != end(type_ids));
18378
18379 if (alias_itr < master_itr)
18380 {
18381 // Must also swap the type order for the constant-type joined array.
18382 auto &joined_types = ir.ids_for_constant_undef_or_type;
18383 auto alt_alias_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *alias_itr);
18384 auto alt_master_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *master_itr);
18385 assert(alt_alias_itr != end(joined_types));
18386 assert(alt_master_itr != end(joined_types));
18387
18388 swap(a&: *alias_itr, b&: *master_itr);
18389 swap(a&: *alt_alias_itr, b&: *alt_master_itr);
18390 }
18391 }
18392 }
18393}
18394
18395void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
18396{
18397 // If we are redirecting statements, ignore the line directive.
18398 // Common case here is continue blocks.
18399 if (redirect_statement)
18400 return;
18401
18402 // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit
18403 // any line directives, because it's not possible.
18404 if (block_debug_directives)
18405 return;
18406
18407 if (options.emit_line_directives)
18408 {
18409 require_extension_internal(ext: "GL_GOOGLE_cpp_style_line_directive");
18410 statement_no_indent(ts: "#line ", ts&: line_literal, ts: " \"", ts&: get<SPIRString>(id: file_id).str, ts: "\"");
18411 }
18412}
18413
18414void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
18415 SmallVector<uint32_t> chain)
18416{
18417 // Fully unroll all member/array indices one by one.
18418
18419 auto &lhs_type = get<SPIRType>(id: lhs_type_id);
18420 auto &rhs_type = get<SPIRType>(id: rhs_type_id);
18421
18422 if (!lhs_type.array.empty())
18423 {
18424 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
18425 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
18426 uint32_t array_size = to_array_size_literal(type: lhs_type);
18427 chain.push_back(t: 0);
18428
18429 for (uint32_t i = 0; i < array_size; i++)
18430 {
18431 chain.back() = i;
18432 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.parent_type, rhs_id, rhs_type_id: rhs_type.parent_type, chain);
18433 }
18434 }
18435 else if (lhs_type.basetype == SPIRType::Struct)
18436 {
18437 chain.push_back(t: 0);
18438 uint32_t member_count = uint32_t(lhs_type.member_types.size());
18439 for (uint32_t i = 0; i < member_count; i++)
18440 {
18441 chain.back() = i;
18442 emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.member_types[i], rhs_id, rhs_type_id: rhs_type.member_types[i], chain);
18443 }
18444 }
18445 else
18446 {
18447 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
18448 // particularly in MSL.
18449 // To deal with this, we emit access chains and go through emit_store_statement
18450 // to deal with all the special cases we can encounter.
18451
18452 AccessChainMeta lhs_meta, rhs_meta;
18453 auto lhs = access_chain_internal(base: lhs_id, indices: chain.data(), count: uint32_t(chain.size()),
18454 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &lhs_meta);
18455 auto rhs = access_chain_internal(base: rhs_id, indices: chain.data(), count: uint32_t(chain.size()),
18456 flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &rhs_meta);
18457
18458 uint32_t id = ir.increase_bound_by(count: 2);
18459 lhs_id = id;
18460 rhs_id = id + 1;
18461
18462 {
18463 auto &lhs_expr = set<SPIRExpression>(id: lhs_id, args: std::move(lhs), args&: lhs_type_id, args: true);
18464 lhs_expr.need_transpose = lhs_meta.need_transpose;
18465
18466 if (lhs_meta.storage_is_packed)
18467 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
18468 if (lhs_meta.storage_physical_type != 0)
18469 set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: lhs_meta.storage_physical_type);
18470
18471 forwarded_temporaries.insert(x: lhs_id);
18472 suppressed_usage_tracking.insert(x: lhs_id);
18473 }
18474
18475 {
18476 auto &rhs_expr = set<SPIRExpression>(id: rhs_id, args: std::move(rhs), args&: rhs_type_id, args: true);
18477 rhs_expr.need_transpose = rhs_meta.need_transpose;
18478
18479 if (rhs_meta.storage_is_packed)
18480 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked);
18481 if (rhs_meta.storage_physical_type != 0)
18482 set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: rhs_meta.storage_physical_type);
18483
18484 forwarded_temporaries.insert(x: rhs_id);
18485 suppressed_usage_tracking.insert(x: rhs_id);
18486 }
18487
18488 emit_store_statement(lhs_expression: lhs_id, rhs_expression: rhs_id);
18489 }
18490}
18491
18492bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
18493{
18494 if (!has_decoration(id, decoration: DecorationInputAttachmentIndex))
18495 return false;
18496
18497 uint32_t input_attachment_index = get_decoration(id, decoration: DecorationInputAttachmentIndex);
18498 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
18499 if (remap.first == input_attachment_index)
18500 return true;
18501
18502 return false;
18503}
18504
18505const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
18506{
18507 const SPIRVariable *ret = nullptr;
18508 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
18509 if (has_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) &&
18510 get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) == index)
18511 {
18512 ret = &var;
18513 }
18514 });
18515 return ret;
18516}
18517
18518const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
18519{
18520 const SPIRVariable *ret = nullptr;
18521 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
18522 if (var.storage == StorageClassOutput && get_decoration(id: var.self, decoration: DecorationLocation) == location)
18523 ret = &var;
18524 });
18525 return ret;
18526}
18527
18528void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
18529{
18530 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
18531 {
18532 auto *subpass_var = find_subpass_input_by_attachment_index(index: remap.first);
18533 auto *output_var = find_color_output_by_location(location: remap.second);
18534 if (!subpass_var)
18535 continue;
18536 if (!output_var)
18537 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
18538 "to read from it.");
18539 if (is_array(type: get<SPIRType>(id: output_var->basetype)))
18540 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
18541
18542 auto &func = get<SPIRFunction>(id: get_entry_point().self);
18543 func.fixup_hooks_in.push_back(t: [=]() {
18544 if (is_legacy())
18545 {
18546 statement(ts: to_expression(id: subpass_var->self), ts: " = ", ts: "gl_LastFragData[",
18547 ts: get_decoration(id: output_var->self, decoration: DecorationLocation), ts: "];");
18548 }
18549 else
18550 {
18551 uint32_t num_rt_components = this->get<SPIRType>(id: output_var->basetype).vecsize;
18552 statement(ts: to_expression(id: subpass_var->self), ts: vector_swizzle(vecsize: num_rt_components, index: 0), ts: " = ",
18553 ts: to_expression(id: output_var->self), ts: ";");
18554 }
18555 });
18556 }
18557}
18558
18559bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
18560{
18561 return is_depth_image(type: get<SPIRType>(id: get<SPIRVariable>(id).basetype), id);
18562}
18563
18564const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
18565{
18566 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
18567 "GL_KHR_shader_subgroup_basic",
18568 "GL_KHR_shader_subgroup_vote",
18569 "GL_KHR_shader_subgroup_arithmetic",
18570 "GL_NV_gpu_shader_5",
18571 "GL_NV_shader_thread_group",
18572 "GL_NV_shader_thread_shuffle",
18573 "GL_ARB_shader_ballot",
18574 "GL_ARB_shader_group_vote",
18575 "GL_AMD_gcn_shader" };
18576 return retval[c];
18577}
18578
18579SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
18580{
18581 switch (c)
18582 {
18583 case ARB_shader_ballot:
18584 return { "GL_ARB_shader_int64" };
18585 case AMD_gcn_shader:
18586 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
18587 default:
18588 return {};
18589 }
18590}
18591
18592const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
18593{
18594 switch (c)
18595 {
18596 case ARB_shader_ballot:
18597 return "defined(GL_ARB_shader_int64)";
18598 case AMD_gcn_shader:
18599 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
18600 default:
18601 return "";
18602 }
18603}
18604
18605CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
18606 get_feature_dependencies(Feature feature)
18607{
18608 switch (feature)
18609 {
18610 case SubgroupAllEqualT:
18611 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
18612 case SubgroupElect:
18613 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
18614 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
18615 return { SubgroupMask };
18616 case SubgroupBallotBitCount:
18617 return { SubgroupBallot };
18618 case SubgroupArithmeticIAddReduce:
18619 case SubgroupArithmeticIAddInclusiveScan:
18620 case SubgroupArithmeticFAddReduce:
18621 case SubgroupArithmeticFAddInclusiveScan:
18622 case SubgroupArithmeticIMulReduce:
18623 case SubgroupArithmeticIMulInclusiveScan:
18624 case SubgroupArithmeticFMulReduce:
18625 case SubgroupArithmeticFMulInclusiveScan:
18626 return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
18627 case SubgroupArithmeticIAddExclusiveScan:
18628 case SubgroupArithmeticFAddExclusiveScan:
18629 case SubgroupArithmeticIMulExclusiveScan:
18630 case SubgroupArithmeticFMulExclusiveScan:
18631 return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
18632 SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
18633 default:
18634 return {};
18635 }
18636}
18637
18638CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
18639 get_feature_dependency_mask(Feature feature)
18640{
18641 return build_mask(features: get_feature_dependencies(feature));
18642}
18643
18644bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
18645{
18646 static const bool retval[FeatureCount] = {
18647 false, false, false, false, false, false,
18648 true, // SubgroupBalloFindLSB_MSB
18649 false, false, false, false,
18650 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
18651 false, false, true, false,
18652 false, false, false, false, false, false, // iadd, fadd
18653 false, false, false, false, false, false, // imul , fmul
18654 };
18655
18656 return retval[feature];
18657}
18658
18659CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
18660 get_KHR_extension_for_feature(Feature feature)
18661{
18662 static const Candidate extensions[FeatureCount] = {
18663 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
18664 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
18665 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
18666 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
18667 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18668 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18669 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18670 KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
18671 };
18672
18673 return extensions[feature];
18674}
18675
18676void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
18677{
18678 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
18679}
18680
18681bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
18682{
18683 return (feature_mask & (1u << feature)) != 0;
18684}
18685
18686CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
18687{
18688 Result res;
18689
18690 for (uint32_t i = 0u; i < FeatureCount; ++i)
18691 {
18692 if (feature_mask & (1u << i))
18693 {
18694 auto feature = static_cast<Feature>(i);
18695 std::unordered_set<uint32_t> unique_candidates;
18696
18697 auto candidates = get_candidates_for_feature(ft: feature);
18698 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
18699
18700 auto deps = get_feature_dependencies(feature);
18701 for (Feature d : deps)
18702 {
18703 candidates = get_candidates_for_feature(ft: d);
18704 if (!candidates.empty())
18705 unique_candidates.insert(first: candidates.begin(), last: candidates.end());
18706 }
18707
18708 for (uint32_t c : unique_candidates)
18709 ++res.weights[static_cast<Candidate>(c)];
18710 }
18711 }
18712
18713 return res;
18714}
18715
18716CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
18717 get_candidates_for_feature(Feature ft, const Result &r)
18718{
18719 auto c = get_candidates_for_feature(ft);
18720 auto cmp = [&r](Candidate a, Candidate b) {
18721 if (r.weights[a] == r.weights[b])
18722 return a < b; // Prefer candidates with lower enum value
18723 return r.weights[a] > r.weights[b];
18724 };
18725 std::sort(first: c.begin(), last: c.end(), comp: cmp);
18726 return c;
18727}
18728
18729CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
18730 get_candidates_for_feature(Feature feature)
18731{
18732 switch (feature)
18733 {
18734 case SubgroupMask:
18735 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
18736 case SubgroupSize:
18737 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
18738 case SubgroupInvocationID:
18739 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
18740 case SubgroupID:
18741 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
18742 case NumSubgroups:
18743 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
18744 case SubgroupBroadcast_First:
18745 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
18746 case SubgroupBallotFindLSB_MSB:
18747 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
18748 case SubgroupAll_Any_AllEqualBool:
18749 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
18750 case SubgroupAllEqualT:
18751 return {}; // depends on other features only
18752 case SubgroupElect:
18753 return {}; // depends on other features only
18754 case SubgroupBallot:
18755 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
18756 case SubgroupBarrier:
18757 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
18758 case SubgroupMemBarrier:
18759 return { KHR_shader_subgroup_basic };
18760 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
18761 return {};
18762 case SubgroupBallotBitExtract:
18763 return { NV_shader_thread_group };
18764 case SubgroupBallotBitCount:
18765 return {};
18766 case SubgroupArithmeticIAddReduce:
18767 case SubgroupArithmeticIAddExclusiveScan:
18768 case SubgroupArithmeticIAddInclusiveScan:
18769 case SubgroupArithmeticFAddReduce:
18770 case SubgroupArithmeticFAddExclusiveScan:
18771 case SubgroupArithmeticFAddInclusiveScan:
18772 case SubgroupArithmeticIMulReduce:
18773 case SubgroupArithmeticIMulExclusiveScan:
18774 case SubgroupArithmeticIMulInclusiveScan:
18775 case SubgroupArithmeticFMulReduce:
18776 case SubgroupArithmeticFMulExclusiveScan:
18777 case SubgroupArithmeticFMulInclusiveScan:
18778 return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
18779 default:
18780 return {};
18781 }
18782}
18783
18784CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
18785 const SmallVector<Feature> &features)
18786{
18787 FeatureMask mask = 0;
18788 for (Feature f : features)
18789 mask |= FeatureMask(1) << f;
18790 return mask;
18791}
18792
18793CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
18794{
18795 for (auto &weight : weights)
18796 weight = 0;
18797
18798 // Make sure KHR_shader_subgroup extensions are always prefered.
18799 const uint32_t big_num = FeatureCount;
18800 weights[KHR_shader_subgroup_ballot] = big_num;
18801 weights[KHR_shader_subgroup_basic] = big_num;
18802 weights[KHR_shader_subgroup_vote] = big_num;
18803 weights[KHR_shader_subgroup_arithmetic] = big_num;
18804}
18805
18806void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
18807{
18808 // Must be ordered to maintain deterministic output, so vector is appropriate.
18809 if (find(first: begin(cont&: workaround_ubo_load_overload_types), last: end(cont&: workaround_ubo_load_overload_types), val: id) ==
18810 end(cont&: workaround_ubo_load_overload_types))
18811 {
18812 force_recompile();
18813 workaround_ubo_load_overload_types.push_back(t: id);
18814 }
18815}
18816
18817void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
18818{
18819 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
18820 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
18821 // ensure row_major decoration is actually respected.
18822 auto *var = maybe_get_backing_variable(chain: ptr);
18823 if (!var)
18824 return;
18825
18826 auto &backing_type = get<SPIRType>(id: var->basetype);
18827 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
18828 has_decoration(id: backing_type.self, decoration: DecorationBlock);
18829 if (!is_ubo)
18830 return;
18831
18832 auto *type = &get<SPIRType>(id: loaded_type);
18833 bool rewrite = false;
18834 bool relaxed = options.es;
18835
18836 if (is_matrix(type: *type))
18837 {
18838 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
18839 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
18840 // If there is any row-major action going on, we apply the workaround.
18841 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
18842 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
18843 type = &backing_type;
18844 }
18845 else
18846 {
18847 // If we're loading a composite, we don't have overloads like these.
18848 relaxed = false;
18849 }
18850
18851 if (type->basetype == SPIRType::Struct)
18852 {
18853 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
18854 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
18855 {
18856 auto decorations = combined_decoration_for_member(type: *type, index: i);
18857 if (decorations.get(bit: DecorationRowMajor))
18858 rewrite = true;
18859
18860 // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
18861 if (!decorations.get(bit: DecorationRelaxedPrecision))
18862 relaxed = false;
18863 }
18864 }
18865
18866 if (rewrite)
18867 {
18868 request_workaround_wrapper_overload(id: loaded_type);
18869 expr = join(ts: "spvWorkaroundRowMajor", ts: (relaxed ? "MP" : ""), ts: "(", ts&: expr, ts: ")");
18870 }
18871}
18872
18873void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
18874{
18875 masked_output_locations.insert(x: { .location: location, .component: component });
18876}
18877
18878void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
18879{
18880 masked_output_builtins.insert(x: builtin);
18881}
18882
18883bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
18884{
18885 auto &type = get<SPIRType>(id: var.basetype);
18886 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
18887 // Blocks by themselves are never masked. Must be masked per-member.
18888 if (is_block)
18889 return false;
18890
18891 bool is_builtin = has_decoration(id: var.self, decoration: DecorationBuiltIn);
18892
18893 if (is_builtin)
18894 {
18895 return is_stage_output_builtin_masked(builtin: BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)));
18896 }
18897 else
18898 {
18899 if (!has_decoration(id: var.self, decoration: DecorationLocation))
18900 return false;
18901
18902 return is_stage_output_location_masked(
18903 location: get_decoration(id: var.self, decoration: DecorationLocation),
18904 component: get_decoration(id: var.self, decoration: DecorationComponent));
18905 }
18906}
18907
18908bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
18909{
18910 auto &type = get<SPIRType>(id: var.basetype);
18911 bool is_block = has_decoration(id: type.self, decoration: DecorationBlock);
18912 if (!is_block)
18913 return false;
18914
18915 BuiltIn builtin = BuiltInMax;
18916 if (is_member_builtin(type, index, builtin: &builtin))
18917 {
18918 return is_stage_output_builtin_masked(builtin);
18919 }
18920 else
18921 {
18922 uint32_t location = get_declared_member_location(var, mbr_idx: index, strip_array);
18923 uint32_t component = get_member_decoration(id: type.self, index, decoration: DecorationComponent);
18924 return is_stage_output_location_masked(location, component);
18925 }
18926}
18927
18928bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
18929{
18930 if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT))
18931 return true;
18932
18933 auto &type = get<SPIRType>(id: var.basetype);
18934 if (!has_decoration(id: type.self, decoration: DecorationBlock))
18935 return false;
18936
18937 for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
18938 if (!has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT))
18939 return false;
18940
18941 return true;
18942}
18943
18944bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
18945{
18946 return masked_output_locations.count(x: { .location: location, .component: component }) != 0;
18947}
18948
18949bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
18950{
18951 return masked_output_builtins.count(x: builtin) != 0;
18952}
18953
18954uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
18955{
18956 auto &block_type = get<SPIRType>(id: var.basetype);
18957 if (has_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation))
18958 return get_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation);
18959 else
18960 return get_accumulated_member_location(var, mbr_idx, strip_array);
18961}
18962
18963uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
18964{
18965 auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
18966 uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation);
18967
18968 for (uint32_t i = 0; i < mbr_idx; i++)
18969 {
18970 auto &mbr_type = get<SPIRType>(id: type.member_types[i]);
18971
18972 // Start counting from any place we have a new location decoration.
18973 if (has_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation))
18974 location = get_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation);
18975
18976 uint32_t location_count = type_to_location_count(type: mbr_type);
18977 location += location_count;
18978 }
18979
18980 return location;
18981}
18982
18983StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
18984{
18985 auto *var = maybe_get_backing_variable(chain: ptr);
18986
18987 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
18988 // We're looking for the effective storage class of a given expression.
18989 // An access chain or forwarded OpLoads from such access chains
18990 // will generally have the storage class of the underlying variable, but if the load was not forwarded
18991 // we have lost any address space qualifiers.
18992 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(id: ptr).access_chain &&
18993 (forced_temporaries.count(x: ptr) != 0 || forwarded_temporaries.count(x: ptr) == 0);
18994
18995 if (var && !forced_temporary)
18996 {
18997 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassWorkgroup))
18998 return StorageClassWorkgroup;
18999 if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassStorageBuffer))
19000 return StorageClassStorageBuffer;
19001
19002 // Normalize SSBOs to StorageBuffer here.
19003 if (var->storage == StorageClassUniform &&
19004 has_decoration(id: get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
19005 return StorageClassStorageBuffer;
19006 else
19007 return var->storage;
19008 }
19009 else
19010 return expression_type(id: ptr).storage;
19011}
19012
19013uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
19014{
19015 uint32_t count;
19016 if (type.basetype == SPIRType::Struct)
19017 {
19018 uint32_t mbr_count = uint32_t(type.member_types.size());
19019 count = 0;
19020 for (uint32_t i = 0; i < mbr_count; i++)
19021 count += type_to_location_count(type: get<SPIRType>(id: type.member_types[i]));
19022 }
19023 else
19024 {
19025 count = type.columns > 1 ? type.columns : 1;
19026 }
19027
19028 uint32_t dim_count = uint32_t(type.array.size());
19029 for (uint32_t i = 0; i < dim_count; i++)
19030 count *= to_array_size_literal(type, index: i);
19031
19032 return count;
19033}
19034
19035std::string CompilerGLSL::format_float(float value) const
19036{
19037 if (float_formatter)
19038 return float_formatter->format_float(value);
19039
19040 // default behavior
19041 return convert_to_string(t: value, locale_radix_point: current_locale_radix_character);
19042}
19043
19044std::string CompilerGLSL::format_double(double value) const
19045{
19046 if (float_formatter)
19047 return float_formatter->format_double(value);
19048
19049 // default behavior
19050 return convert_to_string(t: value, locale_radix_point: current_locale_radix_character);
19051}
19052
19053

Provided by KDAB

Privacy Policy
Start learning QML with our Intro Training
Find out more

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp