1 | /* |
2 | * Copyright 2015-2021 Arm Limited |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #include "spirv_glsl.hpp" |
25 | #include "GLSL.std.450.h" |
26 | #include "spirv_common.hpp" |
27 | #include <algorithm> |
28 | #include <assert.h> |
29 | #include <cmath> |
30 | #include <limits> |
31 | #include <locale.h> |
32 | #include <utility> |
33 | |
34 | #ifndef _WIN32 |
35 | #ifndef __ghs__ |
36 | #include <langinfo.h> |
37 | #endif |
38 | #endif |
39 | #include <locale.h> |
40 | |
41 | using namespace spv; |
42 | using namespace SPIRV_CROSS_NAMESPACE; |
43 | using namespace std; |
44 | |
45 | enum |
46 | { |
47 | // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. |
48 | = 0x10000000, |
49 | = 0x20000000 |
50 | }; |
51 | |
52 | static bool is_unsigned_opcode(Op op) |
53 | { |
54 | // Don't have to be exhaustive, only relevant for legacy target checking ... |
55 | switch (op) |
56 | { |
57 | case OpShiftRightLogical: |
58 | case OpUGreaterThan: |
59 | case OpUGreaterThanEqual: |
60 | case OpULessThan: |
61 | case OpULessThanEqual: |
62 | case OpUConvert: |
63 | case OpUDiv: |
64 | case OpUMod: |
65 | case OpUMulExtended: |
66 | case OpConvertUToF: |
67 | case OpConvertFToU: |
68 | return true; |
69 | |
70 | default: |
71 | return false; |
72 | } |
73 | } |
74 | |
75 | static bool is_unsigned_glsl_opcode(GLSLstd450 op) |
76 | { |
77 | // Don't have to be exhaustive, only relevant for legacy target checking ... |
78 | switch (op) |
79 | { |
80 | case GLSLstd450UClamp: |
81 | case GLSLstd450UMin: |
82 | case GLSLstd450UMax: |
83 | case GLSLstd450FindUMsb: |
84 | return true; |
85 | |
86 | default: |
87 | return false; |
88 | } |
89 | } |
90 | |
91 | static bool packing_is_vec4_padded(BufferPackingStandard packing) |
92 | { |
93 | switch (packing) |
94 | { |
95 | case BufferPackingHLSLCbuffer: |
96 | case BufferPackingHLSLCbufferPackOffset: |
97 | case BufferPackingStd140: |
98 | case BufferPackingStd140EnhancedLayout: |
99 | return true; |
100 | |
101 | default: |
102 | return false; |
103 | } |
104 | } |
105 | |
106 | static bool packing_is_hlsl(BufferPackingStandard packing) |
107 | { |
108 | switch (packing) |
109 | { |
110 | case BufferPackingHLSLCbuffer: |
111 | case BufferPackingHLSLCbufferPackOffset: |
112 | return true; |
113 | |
114 | default: |
115 | return false; |
116 | } |
117 | } |
118 | |
119 | static bool packing_has_flexible_offset(BufferPackingStandard packing) |
120 | { |
121 | switch (packing) |
122 | { |
123 | case BufferPackingStd140: |
124 | case BufferPackingStd430: |
125 | case BufferPackingScalar: |
126 | case BufferPackingHLSLCbuffer: |
127 | return false; |
128 | |
129 | default: |
130 | return true; |
131 | } |
132 | } |
133 | |
134 | static bool packing_is_scalar(BufferPackingStandard packing) |
135 | { |
136 | switch (packing) |
137 | { |
138 | case BufferPackingScalar: |
139 | case BufferPackingScalarEnhancedLayout: |
140 | return true; |
141 | |
142 | default: |
143 | return false; |
144 | } |
145 | } |
146 | |
147 | static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) |
148 | { |
149 | switch (packing) |
150 | { |
151 | case BufferPackingStd140EnhancedLayout: |
152 | return BufferPackingStd140; |
153 | case BufferPackingStd430EnhancedLayout: |
154 | return BufferPackingStd430; |
155 | case BufferPackingHLSLCbufferPackOffset: |
156 | return BufferPackingHLSLCbuffer; |
157 | case BufferPackingScalarEnhancedLayout: |
158 | return BufferPackingScalar; |
159 | default: |
160 | return packing; |
161 | } |
162 | } |
163 | |
164 | void CompilerGLSL::init() |
165 | { |
166 | if (ir.source.known) |
167 | { |
168 | options.es = ir.source.es; |
169 | options.version = ir.source.version; |
170 | } |
171 | |
172 | // Query the locale to see what the decimal point is. |
173 | // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale |
174 | // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather |
175 | // tricky. |
176 | #ifdef _WIN32 |
177 | // On Windows, localeconv uses thread-local storage, so it should be fine. |
178 | const struct lconv *conv = localeconv(); |
179 | if (conv && conv->decimal_point) |
180 | current_locale_radix_character = *conv->decimal_point; |
181 | #elif defined(__ANDROID__) && __ANDROID_API__ < 26 || defined(__ghs__) || defined(__QNXNTO__) |
182 | // nl_langinfo is not supported on this platform, fall back to the worse alternative. |
183 | const struct lconv *conv = localeconv(); |
184 | if (conv && conv->decimal_point) |
185 | current_locale_radix_character = *conv->decimal_point; |
186 | #else |
187 | // localeconv, the portable function is not MT safe ... |
188 | const char *decimal_point = nl_langinfo(RADIXCHAR); |
189 | if (decimal_point && *decimal_point != '\0') |
190 | current_locale_radix_character = *decimal_point; |
191 | #endif |
192 | } |
193 | |
194 | static const char *to_pls_layout(PlsFormat format) |
195 | { |
196 | switch (format) |
197 | { |
198 | case PlsR11FG11FB10F: |
199 | return "layout(r11f_g11f_b10f) " ; |
200 | case PlsR32F: |
201 | return "layout(r32f) " ; |
202 | case PlsRG16F: |
203 | return "layout(rg16f) " ; |
204 | case PlsRGB10A2: |
205 | return "layout(rgb10_a2) " ; |
206 | case PlsRGBA8: |
207 | return "layout(rgba8) " ; |
208 | case PlsRG16: |
209 | return "layout(rg16) " ; |
210 | case PlsRGBA8I: |
211 | return "layout(rgba8i)" ; |
212 | case PlsRG16I: |
213 | return "layout(rg16i) " ; |
214 | case PlsRGB10A2UI: |
215 | return "layout(rgb10_a2ui) " ; |
216 | case PlsRGBA8UI: |
217 | return "layout(rgba8ui) " ; |
218 | case PlsRG16UI: |
219 | return "layout(rg16ui) " ; |
220 | case PlsR32UI: |
221 | return "layout(r32ui) " ; |
222 | default: |
223 | return "" ; |
224 | } |
225 | } |
226 | |
227 | static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) |
228 | { |
229 | switch (format) |
230 | { |
231 | default: |
232 | case PlsR11FG11FB10F: |
233 | case PlsR32F: |
234 | case PlsRG16F: |
235 | case PlsRGB10A2: |
236 | case PlsRGBA8: |
237 | case PlsRG16: |
238 | return SPIRType::Float; |
239 | |
240 | case PlsRGBA8I: |
241 | case PlsRG16I: |
242 | return SPIRType::Int; |
243 | |
244 | case PlsRGB10A2UI: |
245 | case PlsRGBA8UI: |
246 | case PlsRG16UI: |
247 | case PlsR32UI: |
248 | return SPIRType::UInt; |
249 | } |
250 | } |
251 | |
252 | static uint32_t pls_format_to_components(PlsFormat format) |
253 | { |
254 | switch (format) |
255 | { |
256 | default: |
257 | case PlsR32F: |
258 | case PlsR32UI: |
259 | return 1; |
260 | |
261 | case PlsRG16F: |
262 | case PlsRG16: |
263 | case PlsRG16UI: |
264 | case PlsRG16I: |
265 | return 2; |
266 | |
267 | case PlsR11FG11FB10F: |
268 | return 3; |
269 | |
270 | case PlsRGB10A2: |
271 | case PlsRGBA8: |
272 | case PlsRGBA8I: |
273 | case PlsRGB10A2UI: |
274 | case PlsRGBA8UI: |
275 | return 4; |
276 | } |
277 | } |
278 | |
279 | const char *CompilerGLSL::vector_swizzle(int vecsize, int index) |
280 | { |
281 | static const char *const swizzle[4][4] = { |
282 | { ".x" , ".y" , ".z" , ".w" }, |
283 | { ".xy" , ".yz" , ".zw" , nullptr }, |
284 | { ".xyz" , ".yzw" , nullptr, nullptr }, |
285 | #if defined(__GNUC__) && (__GNUC__ == 9) |
286 | // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. |
287 | // This array ends up being compiled as all nullptrs, tripping the assertions below. |
288 | { "" , nullptr, nullptr, "$" }, |
289 | #else |
290 | { "" , nullptr, nullptr, nullptr }, |
291 | #endif |
292 | }; |
293 | |
294 | assert(vecsize >= 1 && vecsize <= 4); |
295 | assert(index >= 0 && index < 4); |
296 | assert(swizzle[vecsize - 1][index]); |
297 | |
298 | return swizzle[vecsize - 1][index]; |
299 | } |
300 | |
301 | void CompilerGLSL::reset(uint32_t iteration_count) |
302 | { |
303 | // Sanity check the iteration count to be robust against a certain class of bugs where |
304 | // we keep forcing recompilations without making clear forward progress. |
305 | // In buggy situations we will loop forever, or loop for an unbounded number of iterations. |
306 | // Certain types of recompilations are considered to make forward progress, |
307 | // but in almost all situations, we'll never see more than 3 iterations. |
308 | // It is highly context-sensitive when we need to force recompilation, |
309 | // and it is not practical with the current architecture |
310 | // to resolve everything up front. |
311 | if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) |
312 | SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!" ); |
313 | |
314 | // We do some speculative optimizations which should pretty much always work out, |
315 | // but just in case the SPIR-V is rather weird, recompile until it's happy. |
316 | // This typically only means one extra pass. |
317 | clear_force_recompile(); |
318 | |
319 | // Clear invalid expression tracking. |
320 | invalid_expressions.clear(); |
321 | composite_insert_overwritten.clear(); |
322 | current_function = nullptr; |
323 | |
324 | // Clear temporary usage tracking. |
325 | expression_usage_counts.clear(); |
326 | forwarded_temporaries.clear(); |
327 | suppressed_usage_tracking.clear(); |
328 | |
329 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
330 | flushed_phi_variables.clear(); |
331 | |
332 | reset_name_caches(); |
333 | |
334 | ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) { |
335 | func.active = false; |
336 | func.flush_undeclared = true; |
337 | }); |
338 | |
339 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); |
340 | |
341 | ir.reset_all_of_type<SPIRExpression>(); |
342 | ir.reset_all_of_type<SPIRAccessChain>(); |
343 | |
344 | statement_count = 0; |
345 | indent = 0; |
346 | current_loop_level = 0; |
347 | } |
348 | |
349 | void CompilerGLSL::remap_pls_variables() |
350 | { |
351 | for (auto &input : pls_inputs) |
352 | { |
353 | auto &var = get<SPIRVariable>(id: input.id); |
354 | |
355 | bool input_is_target = false; |
356 | if (var.storage == StorageClassUniformConstant) |
357 | { |
358 | auto &type = get<SPIRType>(id: var.basetype); |
359 | input_is_target = type.image.dim == DimSubpassData; |
360 | } |
361 | |
362 | if (var.storage != StorageClassInput && !input_is_target) |
363 | SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs." ); |
364 | var.remapped_variable = true; |
365 | } |
366 | |
367 | for (auto &output : pls_outputs) |
368 | { |
369 | auto &var = get<SPIRVariable>(id: output.id); |
370 | if (var.storage != StorageClassOutput) |
371 | SPIRV_CROSS_THROW("Can only use out variables for PLS outputs." ); |
372 | var.remapped_variable = true; |
373 | } |
374 | } |
375 | |
376 | void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent) |
377 | { |
378 | subpass_to_framebuffer_fetch_attachment.push_back(x: { input_attachment_index, color_location }); |
379 | inout_color_attachments.push_back(x: { color_location, coherent }); |
380 | } |
381 | |
382 | bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const |
383 | { |
384 | return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments), |
385 | pred: [&](const std::pair<uint32_t, bool> &elem) { |
386 | return elem.first == location; |
387 | }) != end(cont: inout_color_attachments); |
388 | } |
389 | |
390 | bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const |
391 | { |
392 | return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments), |
393 | pred: [&](const std::pair<uint32_t, bool> &elem) { |
394 | return elem.first == location && !elem.second; |
395 | }) != end(cont: inout_color_attachments); |
396 | } |
397 | |
398 | void CompilerGLSL::find_static_extensions() |
399 | { |
400 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) { |
401 | if (type.basetype == SPIRType::Double) |
402 | { |
403 | if (options.es) |
404 | SPIRV_CROSS_THROW("FP64 not supported in ES profile." ); |
405 | if (!options.es && options.version < 400) |
406 | require_extension_internal(ext: "GL_ARB_gpu_shader_fp64" ); |
407 | } |
408 | else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) |
409 | { |
410 | if (options.es) |
411 | SPIRV_CROSS_THROW("64-bit integers not supported in ES profile." ); |
412 | if (!options.es) |
413 | require_extension_internal(ext: "GL_ARB_gpu_shader_int64" ); |
414 | } |
415 | else if (type.basetype == SPIRType::Half) |
416 | { |
417 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_float16" ); |
418 | if (options.vulkan_semantics) |
419 | require_extension_internal(ext: "GL_EXT_shader_16bit_storage" ); |
420 | } |
421 | else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) |
422 | { |
423 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int8" ); |
424 | if (options.vulkan_semantics) |
425 | require_extension_internal(ext: "GL_EXT_shader_8bit_storage" ); |
426 | } |
427 | else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) |
428 | { |
429 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int16" ); |
430 | if (options.vulkan_semantics) |
431 | require_extension_internal(ext: "GL_EXT_shader_16bit_storage" ); |
432 | } |
433 | }); |
434 | |
435 | auto &execution = get_entry_point(); |
436 | switch (execution.model) |
437 | { |
438 | case ExecutionModelGLCompute: |
439 | if (!options.es && options.version < 430) |
440 | require_extension_internal(ext: "GL_ARB_compute_shader" ); |
441 | if (options.es && options.version < 310) |
442 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders." ); |
443 | break; |
444 | |
445 | case ExecutionModelGeometry: |
446 | if (options.es && options.version < 320) |
447 | require_extension_internal(ext: "GL_EXT_geometry_shader" ); |
448 | if (!options.es && options.version < 150) |
449 | require_extension_internal(ext: "GL_ARB_geometry_shader4" ); |
450 | |
451 | if (execution.flags.get(bit: ExecutionModeInvocations) && execution.invocations != 1) |
452 | { |
453 | // Instanced GS is part of 400 core or this extension. |
454 | if (!options.es && options.version < 400) |
455 | require_extension_internal(ext: "GL_ARB_gpu_shader5" ); |
456 | } |
457 | break; |
458 | |
459 | case ExecutionModelTessellationEvaluation: |
460 | case ExecutionModelTessellationControl: |
461 | if (options.es && options.version < 320) |
462 | require_extension_internal(ext: "GL_EXT_tessellation_shader" ); |
463 | if (!options.es && options.version < 400) |
464 | require_extension_internal(ext: "GL_ARB_tessellation_shader" ); |
465 | break; |
466 | |
467 | case ExecutionModelRayGenerationKHR: |
468 | case ExecutionModelIntersectionKHR: |
469 | case ExecutionModelAnyHitKHR: |
470 | case ExecutionModelClosestHitKHR: |
471 | case ExecutionModelMissKHR: |
472 | case ExecutionModelCallableKHR: |
473 | // NV enums are aliases. |
474 | if (options.es || options.version < 460) |
475 | SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above." ); |
476 | if (!options.vulkan_semantics) |
477 | SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics." ); |
478 | |
479 | // Need to figure out if we should target KHR or NV extension based on capabilities. |
480 | for (auto &cap : ir.declared_capabilities) |
481 | { |
482 | if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR || |
483 | cap == CapabilityRayTraversalPrimitiveCullingKHR) |
484 | { |
485 | ray_tracing_is_khr = true; |
486 | break; |
487 | } |
488 | } |
489 | |
490 | if (ray_tracing_is_khr) |
491 | { |
492 | // In KHR ray tracing we pass payloads by pointer instead of location, |
493 | // so make sure we assign locations properly. |
494 | ray_tracing_khr_fixup_locations(); |
495 | require_extension_internal(ext: "GL_EXT_ray_tracing" ); |
496 | } |
497 | else |
498 | require_extension_internal(ext: "GL_NV_ray_tracing" ); |
499 | break; |
500 | |
501 | default: |
502 | break; |
503 | } |
504 | |
505 | if (!pls_inputs.empty() || !pls_outputs.empty()) |
506 | { |
507 | if (execution.model != ExecutionModelFragment) |
508 | SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders." ); |
509 | require_extension_internal(ext: "GL_EXT_shader_pixel_local_storage" ); |
510 | } |
511 | |
512 | if (!inout_color_attachments.empty()) |
513 | { |
514 | if (execution.model != ExecutionModelFragment) |
515 | SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders." ); |
516 | if (options.vulkan_semantics) |
517 | SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL." ); |
518 | |
519 | bool has_coherent = false; |
520 | bool has_incoherent = false; |
521 | |
522 | for (auto &att : inout_color_attachments) |
523 | { |
524 | if (att.second) |
525 | has_coherent = true; |
526 | else |
527 | has_incoherent = true; |
528 | } |
529 | |
530 | if (has_coherent) |
531 | require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch" ); |
532 | if (has_incoherent) |
533 | require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch_non_coherent" ); |
534 | } |
535 | |
536 | if (options.separate_shader_objects && !options.es && options.version < 410) |
537 | require_extension_internal(ext: "GL_ARB_separate_shader_objects" ); |
538 | |
539 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
540 | { |
541 | if (!options.vulkan_semantics) |
542 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL." ); |
543 | if (options.es && options.version < 320) |
544 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320." ); |
545 | else if (!options.es && options.version < 450) |
546 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450." ); |
547 | require_extension_internal(ext: "GL_EXT_buffer_reference" ); |
548 | } |
549 | else if (ir.addressing_model != AddressingModelLogical) |
550 | { |
551 | SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported." ); |
552 | } |
553 | |
554 | // Check for nonuniform qualifier and passthrough. |
555 | // Instead of looping over all decorations to find this, just look at capabilities. |
556 | for (auto &cap : ir.declared_capabilities) |
557 | { |
558 | switch (cap) |
559 | { |
560 | case CapabilityShaderNonUniformEXT: |
561 | if (!options.vulkan_semantics) |
562 | require_extension_internal(ext: "GL_NV_gpu_shader5" ); |
563 | else |
564 | require_extension_internal(ext: "GL_EXT_nonuniform_qualifier" ); |
565 | break; |
566 | case CapabilityRuntimeDescriptorArrayEXT: |
567 | if (!options.vulkan_semantics) |
568 | SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL." ); |
569 | require_extension_internal(ext: "GL_EXT_nonuniform_qualifier" ); |
570 | break; |
571 | |
572 | case CapabilityGeometryShaderPassthroughNV: |
573 | if (execution.model == ExecutionModelGeometry) |
574 | { |
575 | require_extension_internal(ext: "GL_NV_geometry_shader_passthrough" ); |
576 | execution.geometry_passthrough = true; |
577 | } |
578 | break; |
579 | |
580 | case CapabilityVariablePointers: |
581 | case CapabilityVariablePointersStorageBuffer: |
582 | SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL." ); |
583 | |
584 | case CapabilityMultiView: |
585 | if (options.vulkan_semantics) |
586 | require_extension_internal(ext: "GL_EXT_multiview" ); |
587 | else |
588 | { |
589 | require_extension_internal(ext: "GL_OVR_multiview2" ); |
590 | if (options.ovr_multiview_view_count == 0) |
591 | SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2." ); |
592 | if (get_execution_model() != ExecutionModelVertex) |
593 | SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders." ); |
594 | } |
595 | break; |
596 | |
597 | case CapabilityRayQueryKHR: |
598 | if (options.es || options.version < 460 || !options.vulkan_semantics) |
599 | SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460." ); |
600 | require_extension_internal(ext: "GL_EXT_ray_query" ); |
601 | ray_tracing_is_khr = true; |
602 | break; |
603 | |
604 | case CapabilityRayTraversalPrimitiveCullingKHR: |
605 | if (options.es || options.version < 460 || !options.vulkan_semantics) |
606 | SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460." ); |
607 | require_extension_internal(ext: "GL_EXT_ray_flags_primitive_culling" ); |
608 | ray_tracing_is_khr = true; |
609 | break; |
610 | |
611 | default: |
612 | break; |
613 | } |
614 | } |
615 | |
616 | if (options.ovr_multiview_view_count) |
617 | { |
618 | if (options.vulkan_semantics) |
619 | SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics." ); |
620 | if (get_execution_model() != ExecutionModelVertex) |
621 | SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders." ); |
622 | require_extension_internal(ext: "GL_OVR_multiview2" ); |
623 | } |
624 | |
625 | // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. |
626 | for (auto &ext : ir.declared_extensions) |
627 | if (ext == "SPV_NV_fragment_shader_barycentric" ) |
628 | barycentric_is_nv = true; |
629 | } |
630 | |
631 | void CompilerGLSL::ray_tracing_khr_fixup_locations() |
632 | { |
633 | uint32_t location = 0; |
634 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
635 | // Incoming payload storage can also be used for tracing. |
636 | if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && |
637 | var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) |
638 | return; |
639 | if (is_hidden_variable(var)) |
640 | return; |
641 | set_decoration(id: var.self, decoration: DecorationLocation, argument: location++); |
642 | }); |
643 | } |
644 | |
645 | string CompilerGLSL::compile() |
646 | { |
647 | ir.fixup_reserved_names(); |
648 | |
649 | if (!options.vulkan_semantics) |
650 | { |
651 | // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers |
652 | backend.nonuniform_qualifier = "" ; |
653 | backend.needs_row_major_load_workaround = true; |
654 | } |
655 | backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; |
656 | backend.force_gl_in_out_block = true; |
657 | backend.supports_extensions = true; |
658 | backend.use_array_constructor = true; |
659 | backend.workgroup_size_is_hidden = true; |
660 | backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; |
661 | backend.support_precise_qualifier = |
662 | (!options.es && options.version >= 400) || (options.es && options.version >= 320); |
663 | |
664 | if (is_legacy_es()) |
665 | backend.support_case_fallthrough = false; |
666 | |
667 | // Scan the SPIR-V to find trivial uses of extensions. |
668 | fixup_anonymous_struct_names(); |
669 | fixup_type_alias(); |
670 | reorder_type_alias(); |
671 | build_function_control_flow_graphs_and_analyze(); |
672 | find_static_extensions(); |
673 | fixup_image_load_store_access(); |
674 | update_active_builtins(); |
675 | analyze_image_and_sampler_usage(); |
676 | analyze_interlocked_resource_usage(); |
677 | if (!inout_color_attachments.empty()) |
678 | emit_inout_fragment_outputs_copy_to_subpass_inputs(); |
679 | |
680 | // Shaders might cast unrelated data to pointers of non-block types. |
681 | // Find all such instances and make sure we can cast the pointers to a synthesized block type. |
682 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
683 | analyze_non_block_pointer_types(); |
684 | |
685 | uint32_t pass_count = 0; |
686 | do |
687 | { |
688 | reset(iteration_count: pass_count); |
689 | |
690 | buffer.reset(); |
691 | |
692 | emit_header(); |
693 | emit_resources(); |
694 | emit_extension_workarounds(model: get_execution_model()); |
695 | |
696 | emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset()); |
697 | |
698 | pass_count++; |
699 | } while (is_forcing_recompilation()); |
700 | |
701 | // Implement the interlocked wrapper function at the end. |
702 | // The body was implemented in lieu of main(). |
703 | if (interlocked_is_complex) |
704 | { |
705 | statement(ts: "void main()" ); |
706 | begin_scope(); |
707 | statement(ts: "// Interlocks were used in a way not compatible with GLSL, this is very slow." ); |
708 | statement(ts: "SPIRV_Cross_beginInvocationInterlock();" ); |
709 | statement(ts: "spvMainInterlockedBody();" ); |
710 | statement(ts: "SPIRV_Cross_endInvocationInterlock();" ); |
711 | end_scope(); |
712 | } |
713 | |
714 | // Entry point in GLSL is always main(). |
715 | get_entry_point().name = "main" ; |
716 | |
717 | return buffer.str(); |
718 | } |
719 | |
720 | std::string CompilerGLSL::get_partial_source() |
721 | { |
722 | return buffer.str(); |
723 | } |
724 | |
725 | void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x, |
726 | const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) |
727 | { |
728 | auto &execution = get_entry_point(); |
729 | bool builtin_workgroup = execution.workgroup_size.constant != 0; |
730 | bool use_local_size_id = !builtin_workgroup && execution.flags.get(bit: ExecutionModeLocalSizeId); |
731 | |
732 | if (wg_x.id) |
733 | { |
734 | if (options.vulkan_semantics) |
735 | arguments.push_back(t: join(ts: "local_size_x_id = " , ts: wg_x.constant_id)); |
736 | else |
737 | arguments.push_back(t: join(ts: "local_size_x = " , ts&: get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name)); |
738 | } |
739 | else if (use_local_size_id && execution.workgroup_size.id_x) |
740 | arguments.push_back(t: join(ts: "local_size_x = " , ts: get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar())); |
741 | else |
742 | arguments.push_back(t: join(ts: "local_size_x = " , ts&: execution.workgroup_size.x)); |
743 | |
744 | if (wg_y.id) |
745 | { |
746 | if (options.vulkan_semantics) |
747 | arguments.push_back(t: join(ts: "local_size_y_id = " , ts: wg_y.constant_id)); |
748 | else |
749 | arguments.push_back(t: join(ts: "local_size_y = " , ts&: get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name)); |
750 | } |
751 | else if (use_local_size_id && execution.workgroup_size.id_y) |
752 | arguments.push_back(t: join(ts: "local_size_y = " , ts: get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar())); |
753 | else |
754 | arguments.push_back(t: join(ts: "local_size_y = " , ts&: execution.workgroup_size.y)); |
755 | |
756 | if (wg_z.id) |
757 | { |
758 | if (options.vulkan_semantics) |
759 | arguments.push_back(t: join(ts: "local_size_z_id = " , ts: wg_z.constant_id)); |
760 | else |
761 | arguments.push_back(t: join(ts: "local_size_z = " , ts&: get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name)); |
762 | } |
763 | else if (use_local_size_id && execution.workgroup_size.id_z) |
764 | arguments.push_back(t: join(ts: "local_size_z = " , ts: get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar())); |
765 | else |
766 | arguments.push_back(t: join(ts: "local_size_z = " , ts&: execution.workgroup_size.z)); |
767 | } |
768 | |
769 | void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) |
770 | { |
771 | if (options.vulkan_semantics) |
772 | { |
773 | auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); |
774 | require_extension_internal(ext: ShaderSubgroupSupportHelper::get_extension_name(c: khr_extension)); |
775 | } |
776 | else |
777 | { |
778 | if (!shader_subgroup_supporter.is_feature_requested(feature)) |
779 | force_recompile(); |
780 | shader_subgroup_supporter.request_feature(feature); |
781 | } |
782 | } |
783 | |
784 | void CompilerGLSL::() |
785 | { |
786 | auto &execution = get_entry_point(); |
787 | statement(ts: "#version " , ts&: options.version, ts: options.es && options.version > 100 ? " es" : "" ); |
788 | |
789 | if (!options.es && options.version < 420) |
790 | { |
791 | // Needed for binding = # on UBOs, etc. |
792 | if (options.enable_420pack_extension) |
793 | { |
794 | statement(ts: "#ifdef GL_ARB_shading_language_420pack" ); |
795 | statement(ts: "#extension GL_ARB_shading_language_420pack : require" ); |
796 | statement(ts: "#endif" ); |
797 | } |
798 | // Needed for: layout(early_fragment_tests) in; |
799 | if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests)) |
800 | require_extension_internal(ext: "GL_ARB_shader_image_load_store" ); |
801 | } |
802 | |
803 | // Needed for: layout(post_depth_coverage) in; |
804 | if (execution.flags.get(bit: ExecutionModePostDepthCoverage)) |
805 | require_extension_internal(ext: "GL_ARB_post_depth_coverage" ); |
806 | |
807 | // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; |
808 | bool interlock_used = execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT) || |
809 | execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) || |
810 | execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) || |
811 | execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT); |
812 | |
813 | if (interlock_used) |
814 | { |
815 | if (options.es) |
816 | { |
817 | if (options.version < 310) |
818 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock." ); |
819 | require_extension_internal(ext: "GL_NV_fragment_shader_interlock" ); |
820 | } |
821 | else |
822 | { |
823 | if (options.version < 420) |
824 | require_extension_internal(ext: "GL_ARB_shader_image_load_store" ); |
825 | require_extension_internal(ext: "GL_ARB_fragment_shader_interlock" ); |
826 | } |
827 | } |
828 | |
829 | for (auto &ext : forced_extensions) |
830 | { |
831 | if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16" ) |
832 | { |
833 | // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. |
834 | // GL_AMD_gpu_shader_half_float is a superset, so try that first. |
835 | statement(ts: "#if defined(GL_AMD_gpu_shader_half_float)" ); |
836 | statement(ts: "#extension GL_AMD_gpu_shader_half_float : require" ); |
837 | if (!options.vulkan_semantics) |
838 | { |
839 | statement(ts: "#elif defined(GL_NV_gpu_shader5)" ); |
840 | statement(ts: "#extension GL_NV_gpu_shader5 : require" ); |
841 | } |
842 | else |
843 | { |
844 | statement(ts: "#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)" ); |
845 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require" ); |
846 | } |
847 | statement(ts: "#else" ); |
848 | statement(ts: "#error No extension available for FP16." ); |
849 | statement(ts: "#endif" ); |
850 | } |
851 | else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16" ) |
852 | { |
853 | if (options.vulkan_semantics) |
854 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require" ); |
855 | else |
856 | { |
857 | statement(ts: "#if defined(GL_AMD_gpu_shader_int16)" ); |
858 | statement(ts: "#extension GL_AMD_gpu_shader_int16 : require" ); |
859 | statement(ts: "#elif defined(GL_NV_gpu_shader5)" ); |
860 | statement(ts: "#extension GL_NV_gpu_shader5 : require" ); |
861 | statement(ts: "#else" ); |
862 | statement(ts: "#error No extension available for Int16." ); |
863 | statement(ts: "#endif" ); |
864 | } |
865 | } |
866 | else if (ext == "GL_ARB_post_depth_coverage" ) |
867 | { |
868 | if (options.es) |
869 | statement(ts: "#extension GL_EXT_post_depth_coverage : require" ); |
870 | else |
871 | { |
872 | statement(ts: "#if defined(GL_ARB_post_depth_coverge)" ); |
873 | statement(ts: "#extension GL_ARB_post_depth_coverage : require" ); |
874 | statement(ts: "#else" ); |
875 | statement(ts: "#extension GL_EXT_post_depth_coverage : require" ); |
876 | statement(ts: "#endif" ); |
877 | } |
878 | } |
879 | else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters" ) |
880 | { |
881 | // Soft-enable this extension on plain GLSL. |
882 | statement(ts: "#ifdef " , ts&: ext); |
883 | statement(ts: "#extension " , ts&: ext, ts: " : enable" ); |
884 | statement(ts: "#endif" ); |
885 | } |
886 | else if (ext == "GL_EXT_control_flow_attributes" ) |
887 | { |
888 | // These are just hints so we can conditionally enable and fallback in the shader. |
889 | statement(ts: "#if defined(GL_EXT_control_flow_attributes)" ); |
890 | statement(ts: "#extension GL_EXT_control_flow_attributes : require" ); |
891 | statement(ts: "#define SPIRV_CROSS_FLATTEN [[flatten]]" ); |
892 | statement(ts: "#define SPIRV_CROSS_BRANCH [[dont_flatten]]" ); |
893 | statement(ts: "#define SPIRV_CROSS_UNROLL [[unroll]]" ); |
894 | statement(ts: "#define SPIRV_CROSS_LOOP [[dont_unroll]]" ); |
895 | statement(ts: "#else" ); |
896 | statement(ts: "#define SPIRV_CROSS_FLATTEN" ); |
897 | statement(ts: "#define SPIRV_CROSS_BRANCH" ); |
898 | statement(ts: "#define SPIRV_CROSS_UNROLL" ); |
899 | statement(ts: "#define SPIRV_CROSS_LOOP" ); |
900 | statement(ts: "#endif" ); |
901 | } |
902 | else if (ext == "GL_NV_fragment_shader_interlock" ) |
903 | { |
904 | statement(ts: "#extension GL_NV_fragment_shader_interlock : require" ); |
905 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()" ); |
906 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()" ); |
907 | } |
908 | else if (ext == "GL_ARB_fragment_shader_interlock" ) |
909 | { |
910 | statement(ts: "#ifdef GL_ARB_fragment_shader_interlock" ); |
911 | statement(ts: "#extension GL_ARB_fragment_shader_interlock : enable" ); |
912 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()" ); |
913 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()" ); |
914 | statement(ts: "#elif defined(GL_INTEL_fragment_shader_ordering)" ); |
915 | statement(ts: "#extension GL_INTEL_fragment_shader_ordering : enable" ); |
916 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()" ); |
917 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock()" ); |
918 | statement(ts: "#endif" ); |
919 | } |
920 | else |
921 | statement(ts: "#extension " , ts&: ext, ts: " : require" ); |
922 | } |
923 | |
924 | if (!options.vulkan_semantics) |
925 | { |
926 | using Supp = ShaderSubgroupSupportHelper; |
927 | auto result = shader_subgroup_supporter.resolve(); |
928 | |
929 | for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) |
930 | { |
931 | auto feature = static_cast<Supp::Feature>(feature_index); |
932 | if (!shader_subgroup_supporter.is_feature_requested(feature)) |
933 | continue; |
934 | |
935 | auto exts = Supp::get_candidates_for_feature(ft: feature, r: result); |
936 | if (exts.empty()) |
937 | continue; |
938 | |
939 | statement(ts: "" ); |
940 | |
941 | for (auto &ext : exts) |
942 | { |
943 | const char *name = Supp::get_extension_name(c: ext); |
944 | const char * = Supp::get_extra_required_extension_predicate(c: ext); |
945 | auto = Supp::get_extra_required_extension_names(c: ext); |
946 | statement(ts: &ext != &exts.front() ? "#elif" : "#if" , ts: " defined(" , ts&: name, ts: ")" , |
947 | ts: (*extra_predicate != '\0' ? " && " : "" ), ts&: extra_predicate); |
948 | for (const auto &e : extra_names) |
949 | statement(ts: "#extension " , ts: e, ts: " : enable" ); |
950 | statement(ts: "#extension " , ts&: name, ts: " : require" ); |
951 | } |
952 | |
953 | if (!Supp::can_feature_be_implemented_without_extensions(feature)) |
954 | { |
955 | statement(ts: "#else" ); |
956 | statement(ts: "#error No extensions available to emulate requested subgroup feature." ); |
957 | } |
958 | |
959 | statement(ts: "#endif" ); |
960 | } |
961 | } |
962 | |
963 | for (auto & : header_lines) |
964 | statement(ts&: header); |
965 | |
966 | SmallVector<string> inputs; |
967 | SmallVector<string> outputs; |
968 | |
969 | switch (execution.model) |
970 | { |
971 | case ExecutionModelVertex: |
972 | if (options.ovr_multiview_view_count) |
973 | inputs.push_back(t: join(ts: "num_views = " , ts&: options.ovr_multiview_view_count)); |
974 | break; |
975 | case ExecutionModelGeometry: |
976 | if ((execution.flags.get(bit: ExecutionModeInvocations)) && execution.invocations != 1) |
977 | inputs.push_back(t: join(ts: "invocations = " , ts&: execution.invocations)); |
978 | if (execution.flags.get(bit: ExecutionModeInputPoints)) |
979 | inputs.push_back(t: "points" ); |
980 | if (execution.flags.get(bit: ExecutionModeInputLines)) |
981 | inputs.push_back(t: "lines" ); |
982 | if (execution.flags.get(bit: ExecutionModeInputLinesAdjacency)) |
983 | inputs.push_back(t: "lines_adjacency" ); |
984 | if (execution.flags.get(bit: ExecutionModeTriangles)) |
985 | inputs.push_back(t: "triangles" ); |
986 | if (execution.flags.get(bit: ExecutionModeInputTrianglesAdjacency)) |
987 | inputs.push_back(t: "triangles_adjacency" ); |
988 | |
989 | if (!execution.geometry_passthrough) |
990 | { |
991 | // For passthrough, these are implies and cannot be declared in shader. |
992 | outputs.push_back(t: join(ts: "max_vertices = " , ts&: execution.output_vertices)); |
993 | if (execution.flags.get(bit: ExecutionModeOutputTriangleStrip)) |
994 | outputs.push_back(t: "triangle_strip" ); |
995 | if (execution.flags.get(bit: ExecutionModeOutputPoints)) |
996 | outputs.push_back(t: "points" ); |
997 | if (execution.flags.get(bit: ExecutionModeOutputLineStrip)) |
998 | outputs.push_back(t: "line_strip" ); |
999 | } |
1000 | break; |
1001 | |
1002 | case ExecutionModelTessellationControl: |
1003 | if (execution.flags.get(bit: ExecutionModeOutputVertices)) |
1004 | outputs.push_back(t: join(ts: "vertices = " , ts&: execution.output_vertices)); |
1005 | break; |
1006 | |
1007 | case ExecutionModelTessellationEvaluation: |
1008 | if (execution.flags.get(bit: ExecutionModeQuads)) |
1009 | inputs.push_back(t: "quads" ); |
1010 | if (execution.flags.get(bit: ExecutionModeTriangles)) |
1011 | inputs.push_back(t: "triangles" ); |
1012 | if (execution.flags.get(bit: ExecutionModeIsolines)) |
1013 | inputs.push_back(t: "isolines" ); |
1014 | if (execution.flags.get(bit: ExecutionModePointMode)) |
1015 | inputs.push_back(t: "point_mode" ); |
1016 | |
1017 | if (!execution.flags.get(bit: ExecutionModeIsolines)) |
1018 | { |
1019 | if (execution.flags.get(bit: ExecutionModeVertexOrderCw)) |
1020 | inputs.push_back(t: "cw" ); |
1021 | if (execution.flags.get(bit: ExecutionModeVertexOrderCcw)) |
1022 | inputs.push_back(t: "ccw" ); |
1023 | } |
1024 | |
1025 | if (execution.flags.get(bit: ExecutionModeSpacingFractionalEven)) |
1026 | inputs.push_back(t: "fractional_even_spacing" ); |
1027 | if (execution.flags.get(bit: ExecutionModeSpacingFractionalOdd)) |
1028 | inputs.push_back(t: "fractional_odd_spacing" ); |
1029 | if (execution.flags.get(bit: ExecutionModeSpacingEqual)) |
1030 | inputs.push_back(t: "equal_spacing" ); |
1031 | break; |
1032 | |
1033 | case ExecutionModelGLCompute: |
1034 | { |
1035 | if (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId)) |
1036 | { |
1037 | SpecializationConstant wg_x, wg_y, wg_z; |
1038 | get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
1039 | |
1040 | // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro |
1041 | // declarations before we can emit the work group size. |
1042 | if (options.vulkan_semantics || |
1043 | ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) |
1044 | build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z); |
1045 | } |
1046 | else |
1047 | { |
1048 | inputs.push_back(t: join(ts: "local_size_x = " , ts&: execution.workgroup_size.x)); |
1049 | inputs.push_back(t: join(ts: "local_size_y = " , ts&: execution.workgroup_size.y)); |
1050 | inputs.push_back(t: join(ts: "local_size_z = " , ts&: execution.workgroup_size.z)); |
1051 | } |
1052 | break; |
1053 | } |
1054 | |
1055 | case ExecutionModelFragment: |
1056 | if (options.es) |
1057 | { |
1058 | switch (options.fragment.default_float_precision) |
1059 | { |
1060 | case Options::Lowp: |
1061 | statement(ts: "precision lowp float;" ); |
1062 | break; |
1063 | |
1064 | case Options::Mediump: |
1065 | statement(ts: "precision mediump float;" ); |
1066 | break; |
1067 | |
1068 | case Options::Highp: |
1069 | statement(ts: "precision highp float;" ); |
1070 | break; |
1071 | |
1072 | default: |
1073 | break; |
1074 | } |
1075 | |
1076 | switch (options.fragment.default_int_precision) |
1077 | { |
1078 | case Options::Lowp: |
1079 | statement(ts: "precision lowp int;" ); |
1080 | break; |
1081 | |
1082 | case Options::Mediump: |
1083 | statement(ts: "precision mediump int;" ); |
1084 | break; |
1085 | |
1086 | case Options::Highp: |
1087 | statement(ts: "precision highp int;" ); |
1088 | break; |
1089 | |
1090 | default: |
1091 | break; |
1092 | } |
1093 | } |
1094 | |
1095 | if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests)) |
1096 | inputs.push_back(t: "early_fragment_tests" ); |
1097 | if (execution.flags.get(bit: ExecutionModePostDepthCoverage)) |
1098 | inputs.push_back(t: "post_depth_coverage" ); |
1099 | |
1100 | if (interlock_used) |
1101 | statement(ts: "#if defined(GL_ARB_fragment_shader_interlock)" ); |
1102 | |
1103 | if (execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT)) |
1104 | statement(ts: "layout(pixel_interlock_ordered) in;" ); |
1105 | else if (execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT)) |
1106 | statement(ts: "layout(pixel_interlock_unordered) in;" ); |
1107 | else if (execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT)) |
1108 | statement(ts: "layout(sample_interlock_ordered) in;" ); |
1109 | else if (execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT)) |
1110 | statement(ts: "layout(sample_interlock_unordered) in;" ); |
1111 | |
1112 | if (interlock_used) |
1113 | { |
1114 | statement(ts: "#elif !defined(GL_INTEL_fragment_shader_ordering)" ); |
1115 | statement(ts: "#error Fragment Shader Interlock/Ordering extension missing!" ); |
1116 | statement(ts: "#endif" ); |
1117 | } |
1118 | |
1119 | if (!options.es && execution.flags.get(bit: ExecutionModeDepthGreater)) |
1120 | statement(ts: "layout(depth_greater) out float gl_FragDepth;" ); |
1121 | else if (!options.es && execution.flags.get(bit: ExecutionModeDepthLess)) |
1122 | statement(ts: "layout(depth_less) out float gl_FragDepth;" ); |
1123 | |
1124 | break; |
1125 | |
1126 | default: |
1127 | break; |
1128 | } |
1129 | |
1130 | for (auto &cap : ir.declared_capabilities) |
1131 | if (cap == CapabilityRayTraversalPrimitiveCullingKHR) |
1132 | statement(ts: "layout(primitive_culling);" ); |
1133 | |
1134 | if (!inputs.empty()) |
1135 | statement(ts: "layout(" , ts: merge(list: inputs), ts: ") in;" ); |
1136 | if (!outputs.empty()) |
1137 | statement(ts: "layout(" , ts: merge(list: outputs), ts: ") out;" ); |
1138 | |
1139 | statement(ts: "" ); |
1140 | } |
1141 | |
1142 | bool CompilerGLSL::type_is_empty(const SPIRType &type) |
1143 | { |
1144 | return type.basetype == SPIRType::Struct && type.member_types.empty(); |
1145 | } |
1146 | |
1147 | void CompilerGLSL::emit_struct(SPIRType &type) |
1148 | { |
1149 | // Struct types can be stamped out multiple times |
1150 | // with just different offsets, matrix layouts, etc ... |
1151 | // Type-punning with these types is legal, which complicates things |
1152 | // when we are storing struct and array types in an SSBO for example. |
1153 | // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. |
1154 | if (type.type_alias != TypeID(0) && |
1155 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
1156 | return; |
1157 | |
1158 | add_resource_name(id: type.self); |
1159 | auto name = type_to_glsl(type); |
1160 | |
1161 | statement(ts: !backend.explicit_struct_type ? "struct " : "" , ts&: name); |
1162 | begin_scope(); |
1163 | |
1164 | type.member_name_cache.clear(); |
1165 | |
1166 | uint32_t i = 0; |
1167 | bool emitted = false; |
1168 | for (auto &member : type.member_types) |
1169 | { |
1170 | add_member_name(type, name: i); |
1171 | emit_struct_member(type, member_type_id: member, index: i); |
1172 | i++; |
1173 | emitted = true; |
1174 | } |
1175 | |
1176 | // Don't declare empty structs in GLSL, this is not allowed. |
1177 | if (type_is_empty(type) && !backend.supports_empty_struct) |
1178 | { |
1179 | statement(ts: "int empty_struct_member;" ); |
1180 | emitted = true; |
1181 | } |
1182 | |
1183 | if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationPaddingTarget)) |
1184 | emit_struct_padding_target(type); |
1185 | |
1186 | end_scope_decl(); |
1187 | |
1188 | if (emitted) |
1189 | statement(ts: "" ); |
1190 | } |
1191 | |
1192 | string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) |
1193 | { |
1194 | string res; |
1195 | //if (flags & (1ull << DecorationSmooth)) |
1196 | // res += "smooth "; |
1197 | if (flags.get(bit: DecorationFlat)) |
1198 | res += "flat " ; |
1199 | if (flags.get(bit: DecorationNoPerspective)) |
1200 | res += "noperspective " ; |
1201 | if (flags.get(bit: DecorationCentroid)) |
1202 | res += "centroid " ; |
1203 | if (flags.get(bit: DecorationPatch)) |
1204 | res += "patch " ; |
1205 | if (flags.get(bit: DecorationSample)) |
1206 | res += "sample " ; |
1207 | if (flags.get(bit: DecorationInvariant)) |
1208 | res += "invariant " ; |
1209 | |
1210 | if (flags.get(bit: DecorationExplicitInterpAMD)) |
1211 | { |
1212 | require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter" ); |
1213 | res += "__explicitInterpAMD " ; |
1214 | } |
1215 | |
1216 | if (flags.get(bit: DecorationPerVertexKHR)) |
1217 | { |
1218 | if (options.es && options.version < 320) |
1219 | SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320." ); |
1220 | else if (!options.es && options.version < 450) |
1221 | SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450." ); |
1222 | |
1223 | if (barycentric_is_nv) |
1224 | { |
1225 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric" ); |
1226 | res += "pervertexNV " ; |
1227 | } |
1228 | else |
1229 | { |
1230 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric" ); |
1231 | res += "pervertexEXT " ; |
1232 | } |
1233 | } |
1234 | |
1235 | return res; |
1236 | } |
1237 | |
1238 | string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) |
1239 | { |
1240 | if (is_legacy()) |
1241 | return "" ; |
1242 | |
1243 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock); |
1244 | if (!is_block) |
1245 | return "" ; |
1246 | |
1247 | auto &memb = ir.meta[type.self].members; |
1248 | if (index >= memb.size()) |
1249 | return "" ; |
1250 | auto &dec = memb[index]; |
1251 | |
1252 | SmallVector<string> attr; |
1253 | |
1254 | if (has_member_decoration(id: type.self, index, decoration: DecorationPassthroughNV)) |
1255 | attr.push_back(t: "passthrough" ); |
1256 | |
1257 | // We can only apply layouts on members in block interfaces. |
1258 | // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. |
1259 | // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct |
1260 | // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. |
1261 | // |
1262 | // We would like to go from (SPIR-V style): |
1263 | // |
1264 | // struct Foo { layout(row_major) mat4 matrix; }; |
1265 | // buffer UBO { Foo foo; }; |
1266 | // |
1267 | // to |
1268 | // |
1269 | // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. |
1270 | // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. |
1271 | auto flags = combined_decoration_for_member(type, index); |
1272 | |
1273 | if (flags.get(bit: DecorationRowMajor)) |
1274 | attr.push_back(t: "row_major" ); |
1275 | // We don't emit any global layouts, so column_major is default. |
1276 | //if (flags & (1ull << DecorationColMajor)) |
1277 | // attr.push_back("column_major"); |
1278 | |
1279 | if (dec.decoration_flags.get(bit: DecorationLocation) && can_use_io_location(storage: type.storage, block: true)) |
1280 | attr.push_back(t: join(ts: "location = " , ts&: dec.location)); |
1281 | |
1282 | // Can only declare component if we can declare location. |
1283 | if (dec.decoration_flags.get(bit: DecorationComponent) && can_use_io_location(storage: type.storage, block: true)) |
1284 | { |
1285 | if (!options.es) |
1286 | { |
1287 | if (options.version < 440 && options.version >= 140) |
1288 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
1289 | else if (options.version < 140) |
1290 | SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40." ); |
1291 | attr.push_back(t: join(ts: "component = " , ts&: dec.component)); |
1292 | } |
1293 | else |
1294 | SPIRV_CROSS_THROW("Component decoration is not supported in ES targets." ); |
1295 | } |
1296 | |
1297 | // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. |
1298 | // This is only done selectively in GLSL as needed. |
1299 | if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) && |
1300 | dec.decoration_flags.get(bit: DecorationOffset)) |
1301 | attr.push_back(t: join(ts: "offset = " , ts&: dec.offset)); |
1302 | else if (type.storage == StorageClassOutput && dec.decoration_flags.get(bit: DecorationOffset)) |
1303 | attr.push_back(t: join(ts: "xfb_offset = " , ts&: dec.offset)); |
1304 | |
1305 | if (attr.empty()) |
1306 | return "" ; |
1307 | |
1308 | string res = "layout(" ; |
1309 | res += merge(list: attr); |
1310 | res += ") " ; |
1311 | return res; |
1312 | } |
1313 | |
1314 | const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) |
1315 | { |
1316 | if (options.es && is_desktop_only_format(format)) |
1317 | SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile." ); |
1318 | |
1319 | switch (format) |
1320 | { |
1321 | case ImageFormatRgba32f: |
1322 | return "rgba32f" ; |
1323 | case ImageFormatRgba16f: |
1324 | return "rgba16f" ; |
1325 | case ImageFormatR32f: |
1326 | return "r32f" ; |
1327 | case ImageFormatRgba8: |
1328 | return "rgba8" ; |
1329 | case ImageFormatRgba8Snorm: |
1330 | return "rgba8_snorm" ; |
1331 | case ImageFormatRg32f: |
1332 | return "rg32f" ; |
1333 | case ImageFormatRg16f: |
1334 | return "rg16f" ; |
1335 | case ImageFormatRgba32i: |
1336 | return "rgba32i" ; |
1337 | case ImageFormatRgba16i: |
1338 | return "rgba16i" ; |
1339 | case ImageFormatR32i: |
1340 | return "r32i" ; |
1341 | case ImageFormatRgba8i: |
1342 | return "rgba8i" ; |
1343 | case ImageFormatRg32i: |
1344 | return "rg32i" ; |
1345 | case ImageFormatRg16i: |
1346 | return "rg16i" ; |
1347 | case ImageFormatRgba32ui: |
1348 | return "rgba32ui" ; |
1349 | case ImageFormatRgba16ui: |
1350 | return "rgba16ui" ; |
1351 | case ImageFormatR32ui: |
1352 | return "r32ui" ; |
1353 | case ImageFormatRgba8ui: |
1354 | return "rgba8ui" ; |
1355 | case ImageFormatRg32ui: |
1356 | return "rg32ui" ; |
1357 | case ImageFormatRg16ui: |
1358 | return "rg16ui" ; |
1359 | case ImageFormatR11fG11fB10f: |
1360 | return "r11f_g11f_b10f" ; |
1361 | case ImageFormatR16f: |
1362 | return "r16f" ; |
1363 | case ImageFormatRgb10A2: |
1364 | return "rgb10_a2" ; |
1365 | case ImageFormatR8: |
1366 | return "r8" ; |
1367 | case ImageFormatRg8: |
1368 | return "rg8" ; |
1369 | case ImageFormatR16: |
1370 | return "r16" ; |
1371 | case ImageFormatRg16: |
1372 | return "rg16" ; |
1373 | case ImageFormatRgba16: |
1374 | return "rgba16" ; |
1375 | case ImageFormatR16Snorm: |
1376 | return "r16_snorm" ; |
1377 | case ImageFormatRg16Snorm: |
1378 | return "rg16_snorm" ; |
1379 | case ImageFormatRgba16Snorm: |
1380 | return "rgba16_snorm" ; |
1381 | case ImageFormatR8Snorm: |
1382 | return "r8_snorm" ; |
1383 | case ImageFormatRg8Snorm: |
1384 | return "rg8_snorm" ; |
1385 | case ImageFormatR8ui: |
1386 | return "r8ui" ; |
1387 | case ImageFormatRg8ui: |
1388 | return "rg8ui" ; |
1389 | case ImageFormatR16ui: |
1390 | return "r16ui" ; |
1391 | case ImageFormatRgb10a2ui: |
1392 | return "rgb10_a2ui" ; |
1393 | case ImageFormatR8i: |
1394 | return "r8i" ; |
1395 | case ImageFormatRg8i: |
1396 | return "rg8i" ; |
1397 | case ImageFormatR16i: |
1398 | return "r16i" ; |
1399 | default: |
1400 | case ImageFormatUnknown: |
1401 | return nullptr; |
1402 | } |
1403 | } |
1404 | |
1405 | uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) |
1406 | { |
1407 | switch (type.basetype) |
1408 | { |
1409 | case SPIRType::Double: |
1410 | case SPIRType::Int64: |
1411 | case SPIRType::UInt64: |
1412 | return 8; |
1413 | case SPIRType::Float: |
1414 | case SPIRType::Int: |
1415 | case SPIRType::UInt: |
1416 | return 4; |
1417 | case SPIRType::Half: |
1418 | case SPIRType::Short: |
1419 | case SPIRType::UShort: |
1420 | return 2; |
1421 | case SPIRType::SByte: |
1422 | case SPIRType::UByte: |
1423 | return 1; |
1424 | |
1425 | default: |
1426 | SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size." ); |
1427 | } |
1428 | } |
1429 | |
1430 | uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, |
1431 | BufferPackingStandard packing) |
1432 | { |
1433 | // If using PhysicalStorageBufferEXT storage class, this is a pointer, |
1434 | // and is 64-bit. |
1435 | if (type.storage == StorageClassPhysicalStorageBufferEXT) |
1436 | { |
1437 | if (!type.pointer) |
1438 | SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers." ); |
1439 | |
1440 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
1441 | { |
1442 | if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) |
1443 | return 16; |
1444 | else |
1445 | return 8; |
1446 | } |
1447 | else |
1448 | SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT." ); |
1449 | } |
1450 | |
1451 | if (!type.array.empty()) |
1452 | { |
1453 | uint32_t minimum_alignment = 1; |
1454 | if (packing_is_vec4_padded(packing)) |
1455 | minimum_alignment = 16; |
1456 | |
1457 | auto *tmp = &get<SPIRType>(id: type.parent_type); |
1458 | while (!tmp->array.empty()) |
1459 | tmp = &get<SPIRType>(id: tmp->parent_type); |
1460 | |
1461 | // Get the alignment of the base type, then maybe round up. |
1462 | return max(a: minimum_alignment, b: type_to_packed_alignment(type: *tmp, flags, packing)); |
1463 | } |
1464 | |
1465 | if (type.basetype == SPIRType::Struct) |
1466 | { |
1467 | // Rule 9. Structs alignments are maximum alignment of its members. |
1468 | uint32_t alignment = 1; |
1469 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1470 | { |
1471 | auto member_flags = ir.meta[type.self].members[i].decoration_flags; |
1472 | alignment = |
1473 | max(a: alignment, b: type_to_packed_alignment(type: get<SPIRType>(id: type.member_types[i]), flags: member_flags, packing)); |
1474 | } |
1475 | |
1476 | // In std140, struct alignment is rounded up to 16. |
1477 | if (packing_is_vec4_padded(packing)) |
1478 | alignment = max(a: alignment, b: 16u); |
1479 | |
1480 | return alignment; |
1481 | } |
1482 | else |
1483 | { |
1484 | const uint32_t base_alignment = type_to_packed_base_size(type, packing); |
1485 | |
1486 | // Alignment requirement for scalar block layout is always the alignment for the most basic component. |
1487 | if (packing_is_scalar(packing)) |
1488 | return base_alignment; |
1489 | |
1490 | // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle |
1491 | // a vec4, this is handled outside since that part knows our current offset. |
1492 | if (type.columns == 1 && packing_is_hlsl(packing)) |
1493 | return base_alignment; |
1494 | |
1495 | // From 7.6.2.2 in GL 4.5 core spec. |
1496 | // Rule 1 |
1497 | if (type.vecsize == 1 && type.columns == 1) |
1498 | return base_alignment; |
1499 | |
1500 | // Rule 2 |
1501 | if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) |
1502 | return type.vecsize * base_alignment; |
1503 | |
1504 | // Rule 3 |
1505 | if (type.vecsize == 3 && type.columns == 1) |
1506 | return 4 * base_alignment; |
1507 | |
1508 | // Rule 4 implied. Alignment does not change in std430. |
1509 | |
1510 | // Rule 5. Column-major matrices are stored as arrays of |
1511 | // vectors. |
1512 | if (flags.get(bit: DecorationColMajor) && type.columns > 1) |
1513 | { |
1514 | if (packing_is_vec4_padded(packing)) |
1515 | return 4 * base_alignment; |
1516 | else if (type.vecsize == 3) |
1517 | return 4 * base_alignment; |
1518 | else |
1519 | return type.vecsize * base_alignment; |
1520 | } |
1521 | |
1522 | // Rule 6 implied. |
1523 | |
1524 | // Rule 7. |
1525 | if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1) |
1526 | { |
1527 | if (packing_is_vec4_padded(packing)) |
1528 | return 4 * base_alignment; |
1529 | else if (type.columns == 3) |
1530 | return 4 * base_alignment; |
1531 | else |
1532 | return type.columns * base_alignment; |
1533 | } |
1534 | |
1535 | // Rule 8 implied. |
1536 | } |
1537 | |
1538 | SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?" ); |
1539 | } |
1540 | |
1541 | uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, |
1542 | BufferPackingStandard packing) |
1543 | { |
1544 | // Array stride is equal to aligned size of the underlying type. |
1545 | uint32_t parent = type.parent_type; |
1546 | assert(parent); |
1547 | |
1548 | auto &tmp = get<SPIRType>(id: parent); |
1549 | |
1550 | uint32_t size = type_to_packed_size(type: tmp, flags, packing); |
1551 | uint32_t alignment = type_to_packed_alignment(type, flags, packing); |
1552 | return (size + alignment - 1) & ~(alignment - 1); |
1553 | } |
1554 | |
1555 | uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) |
1556 | { |
1557 | if (!type.array.empty()) |
1558 | { |
1559 | uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); |
1560 | |
1561 | // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, |
1562 | // so that it is possible to pack other vectors into the last element. |
1563 | if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) |
1564 | packed_size -= (4 - type.vecsize) * (type.width / 8); |
1565 | |
1566 | return packed_size; |
1567 | } |
1568 | |
1569 | // If using PhysicalStorageBufferEXT storage class, this is a pointer, |
1570 | // and is 64-bit. |
1571 | if (type.storage == StorageClassPhysicalStorageBufferEXT) |
1572 | { |
1573 | if (!type.pointer) |
1574 | SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers." ); |
1575 | |
1576 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
1577 | return 8; |
1578 | else |
1579 | SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT." ); |
1580 | } |
1581 | |
1582 | uint32_t size = 0; |
1583 | |
1584 | if (type.basetype == SPIRType::Struct) |
1585 | { |
1586 | uint32_t pad_alignment = 1; |
1587 | |
1588 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1589 | { |
1590 | auto member_flags = ir.meta[type.self].members[i].decoration_flags; |
1591 | auto &member_type = get<SPIRType>(id: type.member_types[i]); |
1592 | |
1593 | uint32_t packed_alignment = type_to_packed_alignment(type: member_type, flags: member_flags, packing); |
1594 | uint32_t alignment = max(a: packed_alignment, b: pad_alignment); |
1595 | |
1596 | // The next member following a struct member is aligned to the base alignment of the struct that came before. |
1597 | // GL 4.5 spec, 7.6.2.2. |
1598 | if (member_type.basetype == SPIRType::Struct) |
1599 | pad_alignment = packed_alignment; |
1600 | else |
1601 | pad_alignment = 1; |
1602 | |
1603 | size = (size + alignment - 1) & ~(alignment - 1); |
1604 | size += type_to_packed_size(type: member_type, flags: member_flags, packing); |
1605 | } |
1606 | } |
1607 | else |
1608 | { |
1609 | const uint32_t base_alignment = type_to_packed_base_size(type, packing); |
1610 | |
1611 | if (packing_is_scalar(packing)) |
1612 | { |
1613 | size = type.vecsize * type.columns * base_alignment; |
1614 | } |
1615 | else |
1616 | { |
1617 | if (type.columns == 1) |
1618 | size = type.vecsize * base_alignment; |
1619 | |
1620 | if (flags.get(bit: DecorationColMajor) && type.columns > 1) |
1621 | { |
1622 | if (packing_is_vec4_padded(packing)) |
1623 | size = type.columns * 4 * base_alignment; |
1624 | else if (type.vecsize == 3) |
1625 | size = type.columns * 4 * base_alignment; |
1626 | else |
1627 | size = type.columns * type.vecsize * base_alignment; |
1628 | } |
1629 | |
1630 | if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1) |
1631 | { |
1632 | if (packing_is_vec4_padded(packing)) |
1633 | size = type.vecsize * 4 * base_alignment; |
1634 | else if (type.columns == 3) |
1635 | size = type.vecsize * 4 * base_alignment; |
1636 | else |
1637 | size = type.vecsize * type.columns * base_alignment; |
1638 | } |
1639 | |
1640 | // For matrices in HLSL, the last element has a size which depends on its vector size, |
1641 | // so that it is possible to pack other vectors into the last element. |
1642 | if (packing_is_hlsl(packing) && type.columns > 1) |
1643 | size -= (4 - type.vecsize) * (type.width / 8); |
1644 | } |
1645 | } |
1646 | |
1647 | return size; |
1648 | } |
1649 | |
1650 | bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, |
1651 | uint32_t *failed_validation_index, uint32_t start_offset, |
1652 | uint32_t end_offset) |
1653 | { |
1654 | // This is very tricky and error prone, but try to be exhaustive and correct here. |
1655 | // SPIR-V doesn't directly say if we're using std430 or std140. |
1656 | // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), |
1657 | // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. |
1658 | // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). |
1659 | // |
1660 | // It is almost certain that we're using std430, but it gets tricky with arrays in particular. |
1661 | // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. |
1662 | // |
1663 | // The only two differences between std140 and std430 are related to padding alignment/array stride |
1664 | // in arrays and structs. In std140 they take minimum vec4 alignment. |
1665 | // std430 only removes the vec4 requirement. |
1666 | |
1667 | uint32_t offset = 0; |
1668 | uint32_t pad_alignment = 1; |
1669 | |
1670 | bool is_top_level_block = |
1671 | has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock); |
1672 | |
1673 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1674 | { |
1675 | auto &memb_type = get<SPIRType>(id: type.member_types[i]); |
1676 | auto member_flags = ir.meta[type.self].members[i].decoration_flags; |
1677 | |
1678 | // Verify alignment rules. |
1679 | uint32_t packed_alignment = type_to_packed_alignment(type: memb_type, flags: member_flags, packing); |
1680 | |
1681 | // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: |
1682 | // layout(constant_id = 0) const int s = 10; |
1683 | // const int S = s + 5; // SpecConstantOp |
1684 | // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, |
1685 | // we would need full implementation of compile-time constant folding. :( |
1686 | // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant |
1687 | // for our analysis (e.g. unsized arrays). |
1688 | // This lets us simply ignore that there are spec constant op sized arrays in our buffers. |
1689 | // Querying size of this member will fail, so just don't call it unless we have to. |
1690 | // |
1691 | // This is likely "best effort" we can support without going into unacceptably complicated workarounds. |
1692 | bool member_can_be_unsized = |
1693 | is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); |
1694 | |
1695 | uint32_t packed_size = 0; |
1696 | if (!member_can_be_unsized || packing_is_hlsl(packing)) |
1697 | packed_size = type_to_packed_size(type: memb_type, flags: member_flags, packing); |
1698 | |
1699 | // We only need to care about this if we have non-array types which can straddle the vec4 boundary. |
1700 | if (packing_is_hlsl(packing)) |
1701 | { |
1702 | // If a member straddles across a vec4 boundary, alignment is actually vec4. |
1703 | uint32_t begin_word = offset / 16; |
1704 | uint32_t end_word = (offset + packed_size - 1) / 16; |
1705 | if (begin_word != end_word) |
1706 | packed_alignment = max(a: packed_alignment, b: 16u); |
1707 | } |
1708 | |
1709 | uint32_t actual_offset = type_struct_member_offset(type, index: i); |
1710 | // Field is not in the specified range anymore and we can ignore any further fields. |
1711 | if (actual_offset >= end_offset) |
1712 | break; |
1713 | |
1714 | uint32_t alignment = max(a: packed_alignment, b: pad_alignment); |
1715 | offset = (offset + alignment - 1) & ~(alignment - 1); |
1716 | |
1717 | // The next member following a struct member is aligned to the base alignment of the struct that came before. |
1718 | // GL 4.5 spec, 7.6.2.2. |
1719 | if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) |
1720 | pad_alignment = packed_alignment; |
1721 | else |
1722 | pad_alignment = 1; |
1723 | |
1724 | // Only care about packing if we are in the given range |
1725 | if (actual_offset >= start_offset) |
1726 | { |
1727 | // We only care about offsets in std140, std430, etc ... |
1728 | // For EnhancedLayout variants, we have the flexibility to choose our own offsets. |
1729 | if (!packing_has_flexible_offset(packing)) |
1730 | { |
1731 | if (actual_offset != offset) // This cannot be the packing we're looking for. |
1732 | { |
1733 | if (failed_validation_index) |
1734 | *failed_validation_index = i; |
1735 | return false; |
1736 | } |
1737 | } |
1738 | else if ((actual_offset & (alignment - 1)) != 0) |
1739 | { |
1740 | // We still need to verify that alignment rules are observed, even if we have explicit offset. |
1741 | if (failed_validation_index) |
1742 | *failed_validation_index = i; |
1743 | return false; |
1744 | } |
1745 | |
1746 | // Verify array stride rules. |
1747 | if (!memb_type.array.empty() && type_to_packed_array_stride(type: memb_type, flags: member_flags, packing) != |
1748 | type_struct_member_array_stride(type, index: i)) |
1749 | { |
1750 | if (failed_validation_index) |
1751 | *failed_validation_index = i; |
1752 | return false; |
1753 | } |
1754 | |
1755 | // Verify that sub-structs also follow packing rules. |
1756 | // We cannot use enhanced layouts on substructs, so they better be up to spec. |
1757 | auto substruct_packing = packing_to_substruct_packing(packing); |
1758 | |
1759 | if (!memb_type.pointer && !memb_type.member_types.empty() && |
1760 | !buffer_is_packing_standard(type: memb_type, packing: substruct_packing)) |
1761 | { |
1762 | if (failed_validation_index) |
1763 | *failed_validation_index = i; |
1764 | return false; |
1765 | } |
1766 | } |
1767 | |
1768 | // Bump size. |
1769 | offset = actual_offset + packed_size; |
1770 | } |
1771 | |
1772 | return true; |
1773 | } |
1774 | |
1775 | bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) |
1776 | { |
1777 | // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. |
1778 | // Be very explicit here about how to solve the issue. |
1779 | if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || |
1780 | (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) |
1781 | { |
1782 | uint32_t minimum_desktop_version = block ? 440 : 410; |
1783 | // ARB_enhanced_layouts vs ARB_separate_shader_objects ... |
1784 | |
1785 | if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) |
1786 | return false; |
1787 | else if (options.es && options.version < 310) |
1788 | return false; |
1789 | } |
1790 | |
1791 | if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || |
1792 | (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) |
1793 | { |
1794 | if (options.es && options.version < 300) |
1795 | return false; |
1796 | else if (!options.es && options.version < 330) |
1797 | return false; |
1798 | } |
1799 | |
1800 | if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) |
1801 | { |
1802 | if (options.es && options.version < 310) |
1803 | return false; |
1804 | else if (!options.es && options.version < 430) |
1805 | return false; |
1806 | } |
1807 | |
1808 | return true; |
1809 | } |
1810 | |
1811 | string CompilerGLSL::layout_for_variable(const SPIRVariable &var) |
1812 | { |
1813 | // FIXME: Come up with a better solution for when to disable layouts. |
1814 | // Having layouts depend on extensions as well as which types |
1815 | // of layouts are used. For now, the simple solution is to just disable |
1816 | // layouts for legacy versions. |
1817 | if (is_legacy()) |
1818 | return "" ; |
1819 | |
1820 | if (subpass_input_is_framebuffer_fetch(id: var.self)) |
1821 | return "" ; |
1822 | |
1823 | SmallVector<string> attr; |
1824 | |
1825 | auto &type = get<SPIRType>(id: var.basetype); |
1826 | auto &flags = get_decoration_bitset(id: var.self); |
1827 | auto &typeflags = get_decoration_bitset(id: type.self); |
1828 | |
1829 | if (flags.get(bit: DecorationPassthroughNV)) |
1830 | attr.push_back(t: "passthrough" ); |
1831 | |
1832 | if (options.vulkan_semantics && var.storage == StorageClassPushConstant) |
1833 | attr.push_back(t: "push_constant" ); |
1834 | else if (var.storage == StorageClassShaderRecordBufferKHR) |
1835 | attr.push_back(t: ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV" ); |
1836 | |
1837 | if (flags.get(bit: DecorationRowMajor)) |
1838 | attr.push_back(t: "row_major" ); |
1839 | if (flags.get(bit: DecorationColMajor)) |
1840 | attr.push_back(t: "column_major" ); |
1841 | |
1842 | if (options.vulkan_semantics) |
1843 | { |
1844 | if (flags.get(bit: DecorationInputAttachmentIndex)) |
1845 | attr.push_back(t: join(ts: "input_attachment_index = " , ts: get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex))); |
1846 | } |
1847 | |
1848 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
1849 | if (flags.get(bit: DecorationLocation) && can_use_io_location(storage: var.storage, block: is_block)) |
1850 | { |
1851 | Bitset combined_decoration; |
1852 | for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) |
1853 | combined_decoration.merge_or(other: combined_decoration_for_member(type, index: i)); |
1854 | |
1855 | // If our members have location decorations, we don't need to |
1856 | // emit location decorations at the top as well (looks weird). |
1857 | if (!combined_decoration.get(bit: DecorationLocation)) |
1858 | attr.push_back(t: join(ts: "location = " , ts: get_decoration(id: var.self, decoration: DecorationLocation))); |
1859 | } |
1860 | |
1861 | if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput && |
1862 | location_is_non_coherent_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation))) |
1863 | { |
1864 | attr.push_back(t: "noncoherent" ); |
1865 | } |
1866 | |
1867 | // Transform feedback |
1868 | bool uses_enhanced_layouts = false; |
1869 | if (is_block && var.storage == StorageClassOutput) |
1870 | { |
1871 | // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, |
1872 | // since all members must match the same xfb_buffer. The only thing we will declare for members of the block |
1873 | // is the xfb_offset. |
1874 | uint32_t member_count = uint32_t(type.member_types.size()); |
1875 | bool have_xfb_buffer_stride = false; |
1876 | bool have_any_xfb_offset = false; |
1877 | bool have_geom_stream = false; |
1878 | uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; |
1879 | |
1880 | if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride)) |
1881 | { |
1882 | have_xfb_buffer_stride = true; |
1883 | xfb_buffer = get_decoration(id: var.self, decoration: DecorationXfbBuffer); |
1884 | xfb_stride = get_decoration(id: var.self, decoration: DecorationXfbStride); |
1885 | } |
1886 | |
1887 | if (flags.get(bit: DecorationStream)) |
1888 | { |
1889 | have_geom_stream = true; |
1890 | geom_stream = get_decoration(id: var.self, decoration: DecorationStream); |
1891 | } |
1892 | |
1893 | // Verify that none of the members violate our assumption. |
1894 | for (uint32_t i = 0; i < member_count; i++) |
1895 | { |
1896 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationStream)) |
1897 | { |
1898 | uint32_t member_geom_stream = get_member_decoration(id: type.self, index: i, decoration: DecorationStream); |
1899 | if (have_geom_stream && member_geom_stream != geom_stream) |
1900 | SPIRV_CROSS_THROW("IO block member Stream mismatch." ); |
1901 | have_geom_stream = true; |
1902 | geom_stream = member_geom_stream; |
1903 | } |
1904 | |
1905 | // Only members with an Offset decoration participate in XFB. |
1906 | if (!has_member_decoration(id: type.self, index: i, decoration: DecorationOffset)) |
1907 | continue; |
1908 | have_any_xfb_offset = true; |
1909 | |
1910 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer)) |
1911 | { |
1912 | uint32_t buffer_index = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer); |
1913 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
1914 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch." ); |
1915 | have_xfb_buffer_stride = true; |
1916 | xfb_buffer = buffer_index; |
1917 | } |
1918 | |
1919 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride)) |
1920 | { |
1921 | uint32_t stride = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride); |
1922 | if (have_xfb_buffer_stride && stride != xfb_stride) |
1923 | SPIRV_CROSS_THROW("IO block member XfbStride mismatch." ); |
1924 | have_xfb_buffer_stride = true; |
1925 | xfb_stride = stride; |
1926 | } |
1927 | } |
1928 | |
1929 | if (have_xfb_buffer_stride && have_any_xfb_offset) |
1930 | { |
1931 | attr.push_back(t: join(ts: "xfb_buffer = " , ts&: xfb_buffer)); |
1932 | attr.push_back(t: join(ts: "xfb_stride = " , ts&: xfb_stride)); |
1933 | uses_enhanced_layouts = true; |
1934 | } |
1935 | |
1936 | if (have_geom_stream) |
1937 | { |
1938 | if (get_execution_model() != ExecutionModelGeometry) |
1939 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders." ); |
1940 | if (options.es) |
1941 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL." ); |
1942 | if (options.version < 400) |
1943 | require_extension_internal(ext: "GL_ARB_transform_feedback3" ); |
1944 | attr.push_back(t: join(ts: "stream = " , ts: get_decoration(id: var.self, decoration: DecorationStream))); |
1945 | } |
1946 | } |
1947 | else if (var.storage == StorageClassOutput) |
1948 | { |
1949 | if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride) && flags.get(bit: DecorationOffset)) |
1950 | { |
1951 | // XFB for standalone variables, we can emit all decorations. |
1952 | attr.push_back(t: join(ts: "xfb_buffer = " , ts: get_decoration(id: var.self, decoration: DecorationXfbBuffer))); |
1953 | attr.push_back(t: join(ts: "xfb_stride = " , ts: get_decoration(id: var.self, decoration: DecorationXfbStride))); |
1954 | attr.push_back(t: join(ts: "xfb_offset = " , ts: get_decoration(id: var.self, decoration: DecorationOffset))); |
1955 | uses_enhanced_layouts = true; |
1956 | } |
1957 | |
1958 | if (flags.get(bit: DecorationStream)) |
1959 | { |
1960 | if (get_execution_model() != ExecutionModelGeometry) |
1961 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders." ); |
1962 | if (options.es) |
1963 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL." ); |
1964 | if (options.version < 400) |
1965 | require_extension_internal(ext: "GL_ARB_transform_feedback3" ); |
1966 | attr.push_back(t: join(ts: "stream = " , ts: get_decoration(id: var.self, decoration: DecorationStream))); |
1967 | } |
1968 | } |
1969 | |
1970 | // Can only declare Component if we can declare location. |
1971 | if (flags.get(bit: DecorationComponent) && can_use_io_location(storage: var.storage, block: is_block)) |
1972 | { |
1973 | uses_enhanced_layouts = true; |
1974 | attr.push_back(t: join(ts: "component = " , ts: get_decoration(id: var.self, decoration: DecorationComponent))); |
1975 | } |
1976 | |
1977 | if (uses_enhanced_layouts) |
1978 | { |
1979 | if (!options.es) |
1980 | { |
1981 | if (options.version < 440 && options.version >= 140) |
1982 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
1983 | else if (options.version < 140) |
1984 | SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40." ); |
1985 | if (!options.es && options.version < 440) |
1986 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
1987 | } |
1988 | else if (options.es) |
1989 | SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL." ); |
1990 | } |
1991 | |
1992 | if (flags.get(bit: DecorationIndex)) |
1993 | attr.push_back(t: join(ts: "index = " , ts: get_decoration(id: var.self, decoration: DecorationIndex))); |
1994 | |
1995 | // Do not emit set = decoration in regular GLSL output, but |
1996 | // we need to preserve it in Vulkan GLSL mode. |
1997 | if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR) |
1998 | { |
1999 | if (flags.get(bit: DecorationDescriptorSet) && options.vulkan_semantics) |
2000 | attr.push_back(t: join(ts: "set = " , ts: get_decoration(id: var.self, decoration: DecorationDescriptorSet))); |
2001 | } |
2002 | |
2003 | bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; |
2004 | bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || |
2005 | (var.storage == StorageClassUniform && typeflags.get(bit: DecorationBufferBlock)); |
2006 | bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; |
2007 | bool ubo_block = var.storage == StorageClassUniform && typeflags.get(bit: DecorationBlock); |
2008 | |
2009 | // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... |
2010 | bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); |
2011 | |
2012 | // pretend no UBOs when options say so |
2013 | if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) |
2014 | can_use_buffer_blocks = false; |
2015 | |
2016 | bool can_use_binding; |
2017 | if (options.es) |
2018 | can_use_binding = options.version >= 310; |
2019 | else |
2020 | can_use_binding = options.enable_420pack_extension || (options.version >= 420); |
2021 | |
2022 | // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. |
2023 | if (!can_use_buffer_blocks && var.storage == StorageClassUniform) |
2024 | can_use_binding = false; |
2025 | |
2026 | if (var.storage == StorageClassShaderRecordBufferKHR) |
2027 | can_use_binding = false; |
2028 | |
2029 | if (can_use_binding && flags.get(bit: DecorationBinding)) |
2030 | attr.push_back(t: join(ts: "binding = " , ts: get_decoration(id: var.self, decoration: DecorationBinding))); |
2031 | |
2032 | if (var.storage != StorageClassOutput && flags.get(bit: DecorationOffset)) |
2033 | attr.push_back(t: join(ts: "offset = " , ts: get_decoration(id: var.self, decoration: DecorationOffset))); |
2034 | |
2035 | // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. |
2036 | // If SPIR-V does not comply with either layout, we cannot really work around it. |
2037 | if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) |
2038 | { |
2039 | attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: false)); |
2040 | } |
2041 | else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) |
2042 | { |
2043 | attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true)); |
2044 | } |
2045 | |
2046 | // For images, the type itself adds a layout qualifer. |
2047 | // Only emit the format for storage images. |
2048 | if (type.basetype == SPIRType::Image && type.image.sampled == 2) |
2049 | { |
2050 | const char *fmt = format_to_glsl(format: type.image.format); |
2051 | if (fmt) |
2052 | attr.push_back(t: fmt); |
2053 | } |
2054 | |
2055 | if (attr.empty()) |
2056 | return "" ; |
2057 | |
2058 | string res = "layout(" ; |
2059 | res += merge(list: attr); |
2060 | res += ") " ; |
2061 | return res; |
2062 | } |
2063 | |
2064 | string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) |
2065 | { |
2066 | if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, packing: BufferPackingStd430)) |
2067 | return "std430" ; |
2068 | else if (buffer_is_packing_standard(type, packing: BufferPackingStd140)) |
2069 | return "std140" ; |
2070 | else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalar)) |
2071 | { |
2072 | require_extension_internal(ext: "GL_EXT_scalar_block_layout" ); |
2073 | return "scalar" ; |
2074 | } |
2075 | else if (support_std430_without_scalar_layout && |
2076 | buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout)) |
2077 | { |
2078 | if (options.es && !options.vulkan_semantics) |
2079 | SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " |
2080 | "not support GL_ARB_enhanced_layouts." ); |
2081 | if (!options.es && !options.vulkan_semantics && options.version < 440) |
2082 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
2083 | |
2084 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2085 | return "std430" ; |
2086 | } |
2087 | else if (buffer_is_packing_standard(type, packing: BufferPackingStd140EnhancedLayout)) |
2088 | { |
2089 | // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, |
2090 | // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. |
2091 | // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. |
2092 | if (options.es && !options.vulkan_semantics) |
2093 | SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " |
2094 | "not support GL_ARB_enhanced_layouts." ); |
2095 | if (!options.es && !options.vulkan_semantics && options.version < 440) |
2096 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
2097 | |
2098 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2099 | return "std140" ; |
2100 | } |
2101 | else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalarEnhancedLayout)) |
2102 | { |
2103 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2104 | require_extension_internal(ext: "GL_EXT_scalar_block_layout" ); |
2105 | return "scalar" ; |
2106 | } |
2107 | else if (!support_std430_without_scalar_layout && options.vulkan_semantics && |
2108 | buffer_is_packing_standard(type, packing: BufferPackingStd430)) |
2109 | { |
2110 | // UBOs can support std430 with GL_EXT_scalar_block_layout. |
2111 | require_extension_internal(ext: "GL_EXT_scalar_block_layout" ); |
2112 | return "std430" ; |
2113 | } |
2114 | else if (!support_std430_without_scalar_layout && options.vulkan_semantics && |
2115 | buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout)) |
2116 | { |
2117 | // UBOs can support std430 with GL_EXT_scalar_block_layout. |
2118 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2119 | require_extension_internal(ext: "GL_EXT_scalar_block_layout" ); |
2120 | return "std430" ; |
2121 | } |
2122 | else |
2123 | { |
2124 | SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " |
2125 | "layouts. You can try flattening this block to support a more flexible layout." ); |
2126 | } |
2127 | } |
2128 | |
2129 | void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) |
2130 | { |
2131 | if (flattened_buffer_blocks.count(x: var.self)) |
2132 | emit_buffer_block_flattened(type: var); |
2133 | else if (options.vulkan_semantics) |
2134 | emit_push_constant_block_vulkan(var); |
2135 | else if (options.emit_push_constant_as_uniform_buffer) |
2136 | emit_buffer_block_native(var); |
2137 | else |
2138 | emit_push_constant_block_glsl(var); |
2139 | } |
2140 | |
2141 | void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) |
2142 | { |
2143 | emit_buffer_block(type: var); |
2144 | } |
2145 | |
2146 | void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) |
2147 | { |
2148 | // OpenGL has no concept of push constant blocks, implement it as a uniform struct. |
2149 | auto &type = get<SPIRType>(id: var.basetype); |
2150 | |
2151 | unset_decoration(id: var.self, decoration: DecorationBinding); |
2152 | unset_decoration(id: var.self, decoration: DecorationDescriptorSet); |
2153 | |
2154 | #if 0 |
2155 | if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) |
2156 | SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " |
2157 | "Remap to location with reflection API first or disable these decorations." ); |
2158 | #endif |
2159 | |
2160 | // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. |
2161 | // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. |
2162 | bool block_flag = has_decoration(id: type.self, decoration: DecorationBlock); |
2163 | unset_decoration(id: type.self, decoration: DecorationBlock); |
2164 | |
2165 | emit_struct(type); |
2166 | |
2167 | if (block_flag) |
2168 | set_decoration(id: type.self, decoration: DecorationBlock); |
2169 | |
2170 | emit_uniform(var); |
2171 | statement(ts: "" ); |
2172 | } |
2173 | |
2174 | void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) |
2175 | { |
2176 | auto &type = get<SPIRType>(id: var.basetype); |
2177 | bool ubo_block = var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock); |
2178 | |
2179 | if (flattened_buffer_blocks.count(x: var.self)) |
2180 | emit_buffer_block_flattened(type: var); |
2181 | else if (is_legacy() || (!options.es && options.version == 130) || |
2182 | (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) |
2183 | emit_buffer_block_legacy(var); |
2184 | else |
2185 | emit_buffer_block_native(var); |
2186 | } |
2187 | |
2188 | void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) |
2189 | { |
2190 | auto &type = get<SPIRType>(id: var.basetype); |
2191 | bool ssbo = var.storage == StorageClassStorageBuffer || |
2192 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
2193 | if (ssbo) |
2194 | SPIRV_CROSS_THROW("SSBOs not supported in legacy targets." ); |
2195 | |
2196 | // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. |
2197 | // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. |
2198 | auto &block_flags = ir.meta[type.self].decoration.decoration_flags; |
2199 | bool block_flag = block_flags.get(bit: DecorationBlock); |
2200 | block_flags.clear(bit: DecorationBlock); |
2201 | emit_struct(type); |
2202 | if (block_flag) |
2203 | block_flags.set(DecorationBlock); |
2204 | emit_uniform(var); |
2205 | statement(ts: "" ); |
2206 | } |
2207 | |
2208 | void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration) |
2209 | { |
2210 | auto &type = get<SPIRType>(id: type_id); |
2211 | string buffer_name; |
2212 | |
2213 | if (forward_declaration) |
2214 | { |
2215 | // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... |
2216 | // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. |
2217 | // The names must match up. |
2218 | buffer_name = to_name(id: type.self, allow_alias: false); |
2219 | |
2220 | // Shaders never use the block by interface name, so we don't |
2221 | // have to track this other than updating name caches. |
2222 | // If we have a collision for any reason, just fallback immediately. |
2223 | if (ir.meta[type.self].decoration.alias.empty() || |
2224 | block_ssbo_names.find(x: buffer_name) != end(cont&: block_ssbo_names) || |
2225 | resource_names.find(x: buffer_name) != end(cont&: resource_names)) |
2226 | { |
2227 | buffer_name = join(ts: "_" , ts&: type.self); |
2228 | } |
2229 | |
2230 | // Make sure we get something unique for both global name scope and block name scope. |
2231 | // See GLSL 4.5 spec: section 4.3.9 for details. |
2232 | add_variable(variables_primary&: block_ssbo_names, variables_secondary: resource_names, name&: buffer_name); |
2233 | |
2234 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2235 | // This cannot conflict with anything else, so we're safe now. |
2236 | // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. |
2237 | if (buffer_name.empty()) |
2238 | buffer_name = join(ts: "_" , ts&: type.self); |
2239 | |
2240 | block_names.insert(x: buffer_name); |
2241 | block_ssbo_names.insert(x: buffer_name); |
2242 | |
2243 | // Ensure we emit the correct name when emitting non-forward pointer type. |
2244 | ir.meta[type.self].decoration.alias = buffer_name; |
2245 | } |
2246 | else if (type.basetype != SPIRType::Struct) |
2247 | buffer_name = type_to_glsl(type); |
2248 | else |
2249 | buffer_name = to_name(id: type.self, allow_alias: false); |
2250 | |
2251 | if (!forward_declaration) |
2252 | { |
2253 | auto itr = physical_storage_type_to_alignment.find(x: type_id); |
2254 | uint32_t alignment = 0; |
2255 | if (itr != physical_storage_type_to_alignment.end()) |
2256 | alignment = itr->second.alignment; |
2257 | |
2258 | if (type.basetype == SPIRType::Struct) |
2259 | { |
2260 | SmallVector<std::string> attributes; |
2261 | attributes.push_back(t: "buffer_reference" ); |
2262 | if (alignment) |
2263 | attributes.push_back(t: join(ts: "buffer_reference_align = " , ts&: alignment)); |
2264 | attributes.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true)); |
2265 | |
2266 | auto flags = ir.get_buffer_block_type_flags(type); |
2267 | string decorations; |
2268 | if (flags.get(bit: DecorationRestrict)) |
2269 | decorations += " restrict" ; |
2270 | if (flags.get(bit: DecorationCoherent)) |
2271 | decorations += " coherent" ; |
2272 | if (flags.get(bit: DecorationNonReadable)) |
2273 | decorations += " writeonly" ; |
2274 | if (flags.get(bit: DecorationNonWritable)) |
2275 | decorations += " readonly" ; |
2276 | |
2277 | statement(ts: "layout(" , ts: merge(list: attributes), ts: ")" , ts&: decorations, ts: " buffer " , ts&: buffer_name); |
2278 | } |
2279 | else if (alignment) |
2280 | statement(ts: "layout(buffer_reference, buffer_reference_align = " , ts&: alignment, ts: ") buffer " , ts&: buffer_name); |
2281 | else |
2282 | statement(ts: "layout(buffer_reference) buffer " , ts&: buffer_name); |
2283 | |
2284 | begin_scope(); |
2285 | |
2286 | if (type.basetype == SPIRType::Struct) |
2287 | { |
2288 | type.member_name_cache.clear(); |
2289 | |
2290 | uint32_t i = 0; |
2291 | for (auto &member : type.member_types) |
2292 | { |
2293 | add_member_name(type, name: i); |
2294 | emit_struct_member(type, member_type_id: member, index: i); |
2295 | i++; |
2296 | } |
2297 | } |
2298 | else |
2299 | { |
2300 | auto &pointee_type = get_pointee_type(type); |
2301 | statement(ts: type_to_glsl(type: pointee_type), ts: " value" , ts: type_to_array_glsl(type: pointee_type), ts: ";" ); |
2302 | } |
2303 | |
2304 | end_scope_decl(); |
2305 | statement(ts: "" ); |
2306 | } |
2307 | else |
2308 | { |
2309 | statement(ts: "layout(buffer_reference) buffer " , ts&: buffer_name, ts: ";" ); |
2310 | } |
2311 | } |
2312 | |
2313 | void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) |
2314 | { |
2315 | auto &type = get<SPIRType>(id: var.basetype); |
2316 | |
2317 | Bitset flags = ir.get_buffer_block_flags(var); |
2318 | bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || |
2319 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
2320 | bool is_restrict = ssbo && flags.get(bit: DecorationRestrict); |
2321 | bool is_writeonly = ssbo && flags.get(bit: DecorationNonReadable); |
2322 | bool is_readonly = ssbo && flags.get(bit: DecorationNonWritable); |
2323 | bool is_coherent = ssbo && flags.get(bit: DecorationCoherent); |
2324 | |
2325 | // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... |
2326 | auto buffer_name = to_name(id: type.self, allow_alias: false); |
2327 | |
2328 | auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; |
2329 | |
2330 | // Shaders never use the block by interface name, so we don't |
2331 | // have to track this other than updating name caches. |
2332 | // If we have a collision for any reason, just fallback immediately. |
2333 | if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(x: buffer_name) != end(cont&: block_namespace) || |
2334 | resource_names.find(x: buffer_name) != end(cont&: resource_names)) |
2335 | { |
2336 | buffer_name = get_block_fallback_name(id: var.self); |
2337 | } |
2338 | |
2339 | // Make sure we get something unique for both global name scope and block name scope. |
2340 | // See GLSL 4.5 spec: section 4.3.9 for details. |
2341 | add_variable(variables_primary&: block_namespace, variables_secondary: resource_names, name&: buffer_name); |
2342 | |
2343 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2344 | // This cannot conflict with anything else, so we're safe now. |
2345 | // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. |
2346 | if (buffer_name.empty()) |
2347 | buffer_name = join(ts: "_" , ts&: get<SPIRType>(id: var.basetype).self, ts: "_" , ts: var.self); |
2348 | |
2349 | block_names.insert(x: buffer_name); |
2350 | block_namespace.insert(x: buffer_name); |
2351 | |
2352 | // Save for post-reflection later. |
2353 | declared_block_names[var.self] = buffer_name; |
2354 | |
2355 | statement(ts: layout_for_variable(var), ts: is_coherent ? "coherent " : "" , ts: is_restrict ? "restrict " : "" , |
2356 | ts: is_writeonly ? "writeonly " : "" , ts: is_readonly ? "readonly " : "" , ts: ssbo ? "buffer " : "uniform " , |
2357 | ts&: buffer_name); |
2358 | |
2359 | begin_scope(); |
2360 | |
2361 | type.member_name_cache.clear(); |
2362 | |
2363 | uint32_t i = 0; |
2364 | for (auto &member : type.member_types) |
2365 | { |
2366 | add_member_name(type, name: i); |
2367 | emit_struct_member(type, member_type_id: member, index: i); |
2368 | i++; |
2369 | } |
2370 | |
2371 | // var.self can be used as a backup name for the block name, |
2372 | // so we need to make sure we don't disturb the name here on a recompile. |
2373 | // It will need to be reset if we have to recompile. |
2374 | preserve_alias_on_reset(id: var.self); |
2375 | add_resource_name(id: var.self); |
2376 | end_scope_decl(decl: to_name(id: var.self) + type_to_array_glsl(type)); |
2377 | statement(ts: "" ); |
2378 | } |
2379 | |
2380 | void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) |
2381 | { |
2382 | auto &type = get<SPIRType>(id: var.basetype); |
2383 | |
2384 | // Block names should never alias. |
2385 | auto buffer_name = to_name(id: type.self, allow_alias: false); |
2386 | size_t buffer_size = (get_declared_struct_size(struct_type: type) + 15) / 16; |
2387 | |
2388 | SPIRType::BaseType basic_type; |
2389 | if (get_common_basic_type(type, base_type&: basic_type)) |
2390 | { |
2391 | SPIRType tmp; |
2392 | tmp.basetype = basic_type; |
2393 | tmp.vecsize = 4; |
2394 | if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) |
2395 | SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint." ); |
2396 | |
2397 | auto flags = ir.get_buffer_block_flags(var); |
2398 | statement(ts: "uniform " , ts: flags_to_qualifiers_glsl(type: tmp, flags), ts: type_to_glsl(type: tmp), ts: " " , ts&: buffer_name, ts: "[" , |
2399 | ts&: buffer_size, ts: "];" ); |
2400 | } |
2401 | else |
2402 | SPIRV_CROSS_THROW("All basic types in a flattened block must be the same." ); |
2403 | } |
2404 | |
2405 | const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) |
2406 | { |
2407 | auto &execution = get_entry_point(); |
2408 | |
2409 | if (subpass_input_is_framebuffer_fetch(id: var.self)) |
2410 | return "" ; |
2411 | |
2412 | if (var.storage == StorageClassInput || var.storage == StorageClassOutput) |
2413 | { |
2414 | if (is_legacy() && execution.model == ExecutionModelVertex) |
2415 | return var.storage == StorageClassInput ? "attribute " : "varying " ; |
2416 | else if (is_legacy() && execution.model == ExecutionModelFragment) |
2417 | return "varying " ; // Fragment outputs are renamed so they never hit this case. |
2418 | else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) |
2419 | { |
2420 | uint32_t loc = get_decoration(id: var.self, decoration: DecorationLocation); |
2421 | bool is_inout = location_is_framebuffer_fetch(location: loc); |
2422 | if (is_inout) |
2423 | return "inout " ; |
2424 | else |
2425 | return "out " ; |
2426 | } |
2427 | else |
2428 | return var.storage == StorageClassInput ? "in " : "out " ; |
2429 | } |
2430 | else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || |
2431 | var.storage == StorageClassPushConstant) |
2432 | { |
2433 | return "uniform " ; |
2434 | } |
2435 | else if (var.storage == StorageClassRayPayloadKHR) |
2436 | { |
2437 | return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV " ; |
2438 | } |
2439 | else if (var.storage == StorageClassIncomingRayPayloadKHR) |
2440 | { |
2441 | return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV " ; |
2442 | } |
2443 | else if (var.storage == StorageClassHitAttributeKHR) |
2444 | { |
2445 | return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV " ; |
2446 | } |
2447 | else if (var.storage == StorageClassCallableDataKHR) |
2448 | { |
2449 | return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV " ; |
2450 | } |
2451 | else if (var.storage == StorageClassIncomingCallableDataKHR) |
2452 | { |
2453 | return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV " ; |
2454 | } |
2455 | |
2456 | return "" ; |
2457 | } |
2458 | |
2459 | void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, |
2460 | const SmallVector<uint32_t> &indices) |
2461 | { |
2462 | uint32_t member_type_id = type.self; |
2463 | const SPIRType *member_type = &type; |
2464 | const SPIRType *parent_type = nullptr; |
2465 | auto flattened_name = basename; |
2466 | for (auto &index : indices) |
2467 | { |
2468 | flattened_name += "_" ; |
2469 | flattened_name += to_member_name(type: *member_type, index); |
2470 | parent_type = member_type; |
2471 | member_type_id = member_type->member_types[index]; |
2472 | member_type = &get<SPIRType>(id: member_type_id); |
2473 | } |
2474 | |
2475 | assert(member_type->basetype != SPIRType::Struct); |
2476 | |
2477 | // We're overriding struct member names, so ensure we do so on the primary type. |
2478 | if (parent_type->type_alias) |
2479 | parent_type = &get<SPIRType>(id: parent_type->type_alias); |
2480 | |
2481 | // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, |
2482 | // which is not allowed. |
2483 | ParsedIR::sanitize_underscores(str&: flattened_name); |
2484 | |
2485 | uint32_t last_index = indices.back(); |
2486 | |
2487 | // Pass in the varying qualifier here so it will appear in the correct declaration order. |
2488 | // Replace member name while emitting it so it encodes both struct name and member name. |
2489 | auto backup_name = get_member_name(id: parent_type->self, index: last_index); |
2490 | auto member_name = to_member_name(type: *parent_type, index: last_index); |
2491 | set_member_name(id: parent_type->self, index: last_index, name: flattened_name); |
2492 | emit_struct_member(type: *parent_type, member_type_id, index: last_index, qualifier: qual); |
2493 | // Restore member name. |
2494 | set_member_name(id: parent_type->self, index: last_index, name: member_name); |
2495 | } |
2496 | |
2497 | void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, |
2498 | const SmallVector<uint32_t> &indices) |
2499 | { |
2500 | auto sub_indices = indices; |
2501 | sub_indices.push_back(t: 0); |
2502 | |
2503 | const SPIRType *member_type = &type; |
2504 | for (auto &index : indices) |
2505 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
2506 | |
2507 | assert(member_type->basetype == SPIRType::Struct); |
2508 | |
2509 | if (!member_type->array.empty()) |
2510 | SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks." ); |
2511 | |
2512 | for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) |
2513 | { |
2514 | sub_indices.back() = i; |
2515 | if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct) |
2516 | emit_flattened_io_block_struct(basename, type, qual, indices: sub_indices); |
2517 | else |
2518 | emit_flattened_io_block_member(basename, type, qual, indices: sub_indices); |
2519 | } |
2520 | } |
2521 | |
2522 | void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) |
2523 | { |
2524 | auto &var_type = get<SPIRType>(id: var.basetype); |
2525 | if (!var_type.array.empty()) |
2526 | SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings." ); |
2527 | |
2528 | // Emit flattened types based on the type alias. Normally, we are never supposed to emit |
2529 | // struct declarations for aliased types. |
2530 | auto &type = var_type.type_alias ? get<SPIRType>(id: var_type.type_alias) : var_type; |
2531 | |
2532 | auto old_flags = ir.meta[type.self].decoration.decoration_flags; |
2533 | // Emit the members as if they are part of a block to get all qualifiers. |
2534 | ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); |
2535 | |
2536 | type.member_name_cache.clear(); |
2537 | |
2538 | SmallVector<uint32_t> member_indices; |
2539 | member_indices.push_back(t: 0); |
2540 | auto basename = to_name(id: var.self); |
2541 | |
2542 | uint32_t i = 0; |
2543 | for (auto &member : type.member_types) |
2544 | { |
2545 | add_member_name(type, name: i); |
2546 | auto &membertype = get<SPIRType>(id: member); |
2547 | |
2548 | member_indices.back() = i; |
2549 | if (membertype.basetype == SPIRType::Struct) |
2550 | emit_flattened_io_block_struct(basename, type, qual, indices: member_indices); |
2551 | else |
2552 | emit_flattened_io_block_member(basename, type, qual, indices: member_indices); |
2553 | i++; |
2554 | } |
2555 | |
2556 | ir.meta[type.self].decoration.decoration_flags = old_flags; |
2557 | |
2558 | // Treat this variable as fully flattened from now on. |
2559 | flattened_structs[var.self] = true; |
2560 | } |
2561 | |
2562 | void CompilerGLSL::emit_interface_block(const SPIRVariable &var) |
2563 | { |
2564 | auto &type = get<SPIRType>(id: var.basetype); |
2565 | |
2566 | if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && |
2567 | !options.es && options.version < 410) |
2568 | { |
2569 | require_extension_internal(ext: "GL_ARB_vertex_attrib_64bit" ); |
2570 | } |
2571 | |
2572 | // Either make it plain in/out or in/out blocks depending on what shader is doing ... |
2573 | bool block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock); |
2574 | const char *qual = to_storage_qualifiers_glsl(var); |
2575 | |
2576 | if (block) |
2577 | { |
2578 | // ESSL earlier than 310 and GLSL earlier than 150 did not support |
2579 | // I/O variables which are struct types. |
2580 | // To support this, flatten the struct into separate varyings instead. |
2581 | if (options.force_flattened_io_blocks || (options.es && options.version < 310) || |
2582 | (!options.es && options.version < 150)) |
2583 | { |
2584 | // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. |
2585 | // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). |
2586 | emit_flattened_io_block(var, qual); |
2587 | } |
2588 | else |
2589 | { |
2590 | if (options.es && options.version < 320) |
2591 | { |
2592 | // Geometry and tessellation extensions imply this extension. |
2593 | if (!has_extension(ext: "GL_EXT_geometry_shader" ) && !has_extension(ext: "GL_EXT_tessellation_shader" )) |
2594 | require_extension_internal(ext: "GL_EXT_shader_io_blocks" ); |
2595 | } |
2596 | |
2597 | // Workaround to make sure we can emit "patch in/out" correctly. |
2598 | fixup_io_block_patch_qualifiers(var); |
2599 | |
2600 | // Block names should never alias. |
2601 | auto block_name = to_name(id: type.self, allow_alias: false); |
2602 | |
2603 | // The namespace for I/O blocks is separate from other variables in GLSL. |
2604 | auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; |
2605 | |
2606 | // Shaders never use the block by interface name, so we don't |
2607 | // have to track this other than updating name caches. |
2608 | if (block_name.empty() || block_namespace.find(x: block_name) != end(cont&: block_namespace)) |
2609 | block_name = get_fallback_name(id: type.self); |
2610 | else |
2611 | block_namespace.insert(x: block_name); |
2612 | |
2613 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2614 | // This cannot conflict with anything else, so we're safe now. |
2615 | if (block_name.empty()) |
2616 | block_name = join(ts: "_" , ts&: get<SPIRType>(id: var.basetype).self, ts: "_" , ts: var.self); |
2617 | |
2618 | // Instance names cannot alias block names. |
2619 | resource_names.insert(x: block_name); |
2620 | |
2621 | bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch); |
2622 | statement(ts: layout_for_variable(var), ts: (is_patch ? "patch " : "" ), ts&: qual, ts&: block_name); |
2623 | begin_scope(); |
2624 | |
2625 | type.member_name_cache.clear(); |
2626 | |
2627 | uint32_t i = 0; |
2628 | for (auto &member : type.member_types) |
2629 | { |
2630 | add_member_name(type, name: i); |
2631 | emit_struct_member(type, member_type_id: member, index: i); |
2632 | i++; |
2633 | } |
2634 | |
2635 | add_resource_name(id: var.self); |
2636 | end_scope_decl(decl: join(ts: to_name(id: var.self), ts: type_to_array_glsl(type))); |
2637 | statement(ts: "" ); |
2638 | } |
2639 | } |
2640 | else |
2641 | { |
2642 | // ESSL earlier than 310 and GLSL earlier than 150 did not support |
2643 | // I/O variables which are struct types. |
2644 | // To support this, flatten the struct into separate varyings instead. |
2645 | if (type.basetype == SPIRType::Struct && |
2646 | (options.force_flattened_io_blocks || (options.es && options.version < 310) || |
2647 | (!options.es && options.version < 150))) |
2648 | { |
2649 | emit_flattened_io_block(var, qual); |
2650 | } |
2651 | else |
2652 | { |
2653 | add_resource_name(id: var.self); |
2654 | |
2655 | // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. |
2656 | // Opt for unsized as it's the more "correct" variant to use. |
2657 | bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() && |
2658 | !has_decoration(id: var.self, decoration: DecorationPatch) && |
2659 | (get_entry_point().model == ExecutionModelTessellationControl || |
2660 | get_entry_point().model == ExecutionModelTessellationEvaluation); |
2661 | |
2662 | uint32_t old_array_size = 0; |
2663 | bool old_array_size_literal = true; |
2664 | |
2665 | if (control_point_input_array) |
2666 | { |
2667 | swap(a&: type.array.back(), b&: old_array_size); |
2668 | swap(a&: type.array_size_literal.back(), b&: old_array_size_literal); |
2669 | } |
2670 | |
2671 | statement(ts: layout_for_variable(var), ts: to_qualifiers_glsl(id: var.self), |
2672 | ts: variable_decl(type, name: to_name(id: var.self), id: var.self), ts: ";" ); |
2673 | |
2674 | if (control_point_input_array) |
2675 | { |
2676 | swap(a&: type.array.back(), b&: old_array_size); |
2677 | swap(a&: type.array_size_literal.back(), b&: old_array_size_literal); |
2678 | } |
2679 | } |
2680 | } |
2681 | } |
2682 | |
2683 | void CompilerGLSL::emit_uniform(const SPIRVariable &var) |
2684 | { |
2685 | auto &type = get<SPIRType>(id: var.basetype); |
2686 | if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) |
2687 | { |
2688 | if (!options.es && options.version < 420) |
2689 | require_extension_internal(ext: "GL_ARB_shader_image_load_store" ); |
2690 | else if (options.es && options.version < 310) |
2691 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store." ); |
2692 | } |
2693 | |
2694 | add_resource_name(id: var.self); |
2695 | statement(ts: layout_for_variable(var), ts: variable_decl(variable: var), ts: ";" ); |
2696 | } |
2697 | |
2698 | string CompilerGLSL::constant_value_macro_name(uint32_t id) |
2699 | { |
2700 | return join(ts: "SPIRV_CROSS_CONSTANT_ID_" , ts&: id); |
2701 | } |
2702 | |
2703 | void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) |
2704 | { |
2705 | auto &type = get<SPIRType>(id: constant.basetype); |
2706 | add_resource_name(id: constant.self); |
2707 | auto name = to_name(id: constant.self); |
2708 | statement(ts: "const " , ts: variable_decl(type, name), ts: " = " , ts: constant_op_expression(cop: constant), ts: ";" ); |
2709 | } |
2710 | |
2711 | int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const |
2712 | { |
2713 | auto &entry_point = get_entry_point(); |
2714 | int index = -1; |
2715 | |
2716 | // Need to redirect specialization constants which are used as WorkGroupSize to the builtin, |
2717 | // since the spec constant declarations are never explicitly declared. |
2718 | if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(bit: ExecutionModeLocalSizeId)) |
2719 | { |
2720 | if (c.self == entry_point.workgroup_size.id_x) |
2721 | index = 0; |
2722 | else if (c.self == entry_point.workgroup_size.id_y) |
2723 | index = 1; |
2724 | else if (c.self == entry_point.workgroup_size.id_z) |
2725 | index = 2; |
2726 | } |
2727 | |
2728 | return index; |
2729 | } |
2730 | |
2731 | void CompilerGLSL::emit_constant(const SPIRConstant &constant) |
2732 | { |
2733 | auto &type = get<SPIRType>(id: constant.constant_type); |
2734 | |
2735 | SpecializationConstant wg_x, wg_y, wg_z; |
2736 | ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
2737 | |
2738 | // This specialization constant is implicitly declared by emitting layout() in; |
2739 | if (constant.self == workgroup_size_id) |
2740 | return; |
2741 | |
2742 | // These specialization constants are implicitly declared by emitting layout() in; |
2743 | // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration |
2744 | // later can use macro overrides for work group size. |
2745 | bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || |
2746 | ConstantID(constant.self) == wg_z.id; |
2747 | |
2748 | if (options.vulkan_semantics && is_workgroup_size_constant) |
2749 | { |
2750 | // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). |
2751 | return; |
2752 | } |
2753 | else if (!options.vulkan_semantics && is_workgroup_size_constant && |
2754 | !has_decoration(id: constant.self, decoration: DecorationSpecId)) |
2755 | { |
2756 | // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. |
2757 | return; |
2758 | } |
2759 | |
2760 | add_resource_name(id: constant.self); |
2761 | auto name = to_name(id: constant.self); |
2762 | |
2763 | // Only scalars have constant IDs. |
2764 | if (has_decoration(id: constant.self, decoration: DecorationSpecId)) |
2765 | { |
2766 | if (options.vulkan_semantics) |
2767 | { |
2768 | statement(ts: "layout(constant_id = " , ts: get_decoration(id: constant.self, decoration: DecorationSpecId), ts: ") const " , |
2769 | ts: variable_decl(type, name), ts: " = " , ts: constant_expression(c: constant), ts: ";" ); |
2770 | } |
2771 | else |
2772 | { |
2773 | const string ¯o_name = constant.specialization_constant_macro_name; |
2774 | statement(ts: "#ifndef " , ts: macro_name); |
2775 | statement(ts: "#define " , ts: macro_name, ts: " " , ts: constant_expression(c: constant)); |
2776 | statement(ts: "#endif" ); |
2777 | |
2778 | // For workgroup size constants, only emit the macros. |
2779 | if (!is_workgroup_size_constant) |
2780 | statement(ts: "const " , ts: variable_decl(type, name), ts: " = " , ts: macro_name, ts: ";" ); |
2781 | } |
2782 | } |
2783 | else |
2784 | { |
2785 | statement(ts: "const " , ts: variable_decl(type, name), ts: " = " , ts: constant_expression(c: constant), ts: ";" ); |
2786 | } |
2787 | } |
2788 | |
2789 | void CompilerGLSL::emit_entry_point_declarations() |
2790 | { |
2791 | } |
2792 | |
2793 | void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords) |
2794 | { |
2795 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
2796 | if (is_hidden_variable(var)) |
2797 | return; |
2798 | |
2799 | auto *meta = ir.find_meta(id: var.self); |
2800 | if (!meta) |
2801 | return; |
2802 | |
2803 | auto &m = meta->decoration; |
2804 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2805 | m.alias = join(ts: "_" , ts&: m.alias); |
2806 | }); |
2807 | |
2808 | ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, const SPIRFunction &func) { |
2809 | auto *meta = ir.find_meta(id: func.self); |
2810 | if (!meta) |
2811 | return; |
2812 | |
2813 | auto &m = meta->decoration; |
2814 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2815 | m.alias = join(ts: "_" , ts&: m.alias); |
2816 | }); |
2817 | |
2818 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) { |
2819 | auto *meta = ir.find_meta(id: type.self); |
2820 | if (!meta) |
2821 | return; |
2822 | |
2823 | auto &m = meta->decoration; |
2824 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2825 | m.alias = join(ts: "_" , ts&: m.alias); |
2826 | |
2827 | for (auto &memb : meta->members) |
2828 | if (keywords.find(x: memb.alias) != end(cont: keywords)) |
2829 | memb.alias = join(ts: "_" , ts&: memb.alias); |
2830 | }); |
2831 | } |
2832 | |
2833 | void CompilerGLSL::replace_illegal_names() |
2834 | { |
2835 | // clang-format off |
2836 | static const unordered_set<string> keywords = { |
2837 | "abs" , "acos" , "acosh" , "all" , "any" , "asin" , "asinh" , "atan" , "atanh" , |
2838 | "atomicAdd" , "atomicCompSwap" , "atomicCounter" , "atomicCounterDecrement" , "atomicCounterIncrement" , |
2839 | "atomicExchange" , "atomicMax" , "atomicMin" , "atomicOr" , "atomicXor" , |
2840 | "bitCount" , "bitfieldExtract" , "bitfieldInsert" , "bitfieldReverse" , |
2841 | "ceil" , "cos" , "cosh" , "cross" , "degrees" , |
2842 | "dFdx" , "dFdxCoarse" , "dFdxFine" , |
2843 | "dFdy" , "dFdyCoarse" , "dFdyFine" , |
2844 | "distance" , "dot" , "EmitStreamVertex" , "EmitVertex" , "EndPrimitive" , "EndStreamPrimitive" , "equal" , "exp" , "exp2" , |
2845 | "faceforward" , "findLSB" , "findMSB" , "float16BitsToInt16" , "float16BitsToUint16" , "floatBitsToInt" , "floatBitsToUint" , "floor" , "fma" , "fract" , |
2846 | "frexp" , "fwidth" , "fwidthCoarse" , "fwidthFine" , |
2847 | "greaterThan" , "greaterThanEqual" , "groupMemoryBarrier" , |
2848 | "imageAtomicAdd" , "imageAtomicAnd" , "imageAtomicCompSwap" , "imageAtomicExchange" , "imageAtomicMax" , "imageAtomicMin" , "imageAtomicOr" , "imageAtomicXor" , |
2849 | "imageLoad" , "imageSamples" , "imageSize" , "imageStore" , "imulExtended" , "int16BitsToFloat16" , "intBitsToFloat" , "interpolateAtOffset" , "interpolateAtCentroid" , "interpolateAtSample" , |
2850 | "inverse" , "inversesqrt" , "isinf" , "isnan" , "ldexp" , "length" , "lessThan" , "lessThanEqual" , "log" , "log2" , |
2851 | "matrixCompMult" , "max" , "memoryBarrier" , "memoryBarrierAtomicCounter" , "memoryBarrierBuffer" , "memoryBarrierImage" , "memoryBarrierShared" , |
2852 | "min" , "mix" , "mod" , "modf" , "noise" , "noise1" , "noise2" , "noise3" , "noise4" , "normalize" , "not" , "notEqual" , |
2853 | "outerProduct" , "packDouble2x32" , "packHalf2x16" , "packInt2x16" , "packInt4x16" , "packSnorm2x16" , "packSnorm4x8" , |
2854 | "packUint2x16" , "packUint4x16" , "packUnorm2x16" , "packUnorm4x8" , "pow" , |
2855 | "radians" , "reflect" , "refract" , "round" , "roundEven" , "sign" , "sin" , "sinh" , "smoothstep" , "sqrt" , "step" , |
2856 | "tan" , "tanh" , "texelFetch" , "texelFetchOffset" , "texture" , "textureGather" , "textureGatherOffset" , "textureGatherOffsets" , |
2857 | "textureGrad" , "textureGradOffset" , "textureLod" , "textureLodOffset" , "textureOffset" , "textureProj" , "textureProjGrad" , |
2858 | "textureProjGradOffset" , "textureProjLod" , "textureProjLodOffset" , "textureProjOffset" , "textureQueryLevels" , "textureQueryLod" , "textureSamples" , "textureSize" , |
2859 | "transpose" , "trunc" , "uaddCarry" , "uint16BitsToFloat16" , "uintBitsToFloat" , "umulExtended" , "unpackDouble2x32" , "unpackHalf2x16" , "unpackInt2x16" , "unpackInt4x16" , |
2860 | "unpackSnorm2x16" , "unpackSnorm4x8" , "unpackUint2x16" , "unpackUint4x16" , "unpackUnorm2x16" , "unpackUnorm4x8" , "usubBorrow" , |
2861 | |
2862 | "active" , "asm" , "atomic_uint" , "attribute" , "bool" , "break" , "buffer" , |
2863 | "bvec2" , "bvec3" , "bvec4" , "case" , "cast" , "centroid" , "class" , "coherent" , "common" , "const" , "continue" , "default" , "discard" , |
2864 | "dmat2" , "dmat2x2" , "dmat2x3" , "dmat2x4" , "dmat3" , "dmat3x2" , "dmat3x3" , "dmat3x4" , "dmat4" , "dmat4x2" , "dmat4x3" , "dmat4x4" , |
2865 | "do" , "double" , "dvec2" , "dvec3" , "dvec4" , "else" , "enum" , "extern" , "external" , "false" , "filter" , "fixed" , "flat" , "float" , |
2866 | "for" , "fvec2" , "fvec3" , "fvec4" , "goto" , "half" , "highp" , "hvec2" , "hvec3" , "hvec4" , "if" , "iimage1D" , "iimage1DArray" , |
2867 | "iimage2D" , "iimage2DArray" , "iimage2DMS" , "iimage2DMSArray" , "iimage2DRect" , "iimage3D" , "iimageBuffer" , "iimageCube" , |
2868 | "iimageCubeArray" , "image1D" , "image1DArray" , "image2D" , "image2DArray" , "image2DMS" , "image2DMSArray" , "image2DRect" , |
2869 | "image3D" , "imageBuffer" , "imageCube" , "imageCubeArray" , "in" , "inline" , "inout" , "input" , "int" , "interface" , "invariant" , |
2870 | "isampler1D" , "isampler1DArray" , "isampler2D" , "isampler2DArray" , "isampler2DMS" , "isampler2DMSArray" , "isampler2DRect" , |
2871 | "isampler3D" , "isamplerBuffer" , "isamplerCube" , "isamplerCubeArray" , "ivec2" , "ivec3" , "ivec4" , "layout" , "long" , "lowp" , |
2872 | "mat2" , "mat2x2" , "mat2x3" , "mat2x4" , "mat3" , "mat3x2" , "mat3x3" , "mat3x4" , "mat4" , "mat4x2" , "mat4x3" , "mat4x4" , "mediump" , |
2873 | "namespace" , "noinline" , "noperspective" , "out" , "output" , "packed" , "partition" , "patch" , "precise" , "precision" , "public" , "readonly" , |
2874 | "resource" , "restrict" , "return" , "sample" , "sampler1D" , "sampler1DArray" , "sampler1DArrayShadow" , |
2875 | "sampler1DShadow" , "sampler2D" , "sampler2DArray" , "sampler2DArrayShadow" , "sampler2DMS" , "sampler2DMSArray" , |
2876 | "sampler2DRect" , "sampler2DRectShadow" , "sampler2DShadow" , "sampler3D" , "sampler3DRect" , "samplerBuffer" , |
2877 | "samplerCube" , "samplerCubeArray" , "samplerCubeArrayShadow" , "samplerCubeShadow" , "shared" , "short" , "sizeof" , "smooth" , "static" , |
2878 | "struct" , "subroutine" , "superp" , "switch" , "template" , "this" , "true" , "typedef" , "uimage1D" , "uimage1DArray" , "uimage2D" , |
2879 | "uimage2DArray" , "uimage2DMS" , "uimage2DMSArray" , "uimage2DRect" , "uimage3D" , "uimageBuffer" , "uimageCube" , |
2880 | "uimageCubeArray" , "uint" , "uniform" , "union" , "unsigned" , "usampler1D" , "usampler1DArray" , "usampler2D" , "usampler2DArray" , |
2881 | "usampler2DMS" , "usampler2DMSArray" , "usampler2DRect" , "usampler3D" , "usamplerBuffer" , "usamplerCube" , |
2882 | "usamplerCubeArray" , "using" , "uvec2" , "uvec3" , "uvec4" , "varying" , "vec2" , "vec3" , "vec4" , "void" , "volatile" , |
2883 | "while" , "writeonly" , |
2884 | }; |
2885 | // clang-format on |
2886 | |
2887 | replace_illegal_names(keywords); |
2888 | } |
2889 | |
2890 | void CompilerGLSL::replace_fragment_output(SPIRVariable &var) |
2891 | { |
2892 | auto &m = ir.meta[var.self].decoration; |
2893 | uint32_t location = 0; |
2894 | if (m.decoration_flags.get(bit: DecorationLocation)) |
2895 | location = m.location; |
2896 | |
2897 | // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will |
2898 | // do the access chain part of this for us. |
2899 | auto &type = get<SPIRType>(id: var.basetype); |
2900 | |
2901 | if (type.array.empty()) |
2902 | { |
2903 | // Redirect the write to a specific render target in legacy GLSL. |
2904 | m.alias = join(ts: "gl_FragData[" , ts&: location, ts: "]" ); |
2905 | |
2906 | if (is_legacy_es() && location != 0) |
2907 | require_extension_internal(ext: "GL_EXT_draw_buffers" ); |
2908 | } |
2909 | else if (type.array.size() == 1) |
2910 | { |
2911 | // If location is non-zero, we probably have to add an offset. |
2912 | // This gets really tricky since we'd have to inject an offset in the access chain. |
2913 | // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. |
2914 | m.alias = "gl_FragData" ; |
2915 | if (location != 0) |
2916 | SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " |
2917 | "This is unimplemented in SPIRV-Cross." ); |
2918 | |
2919 | if (is_legacy_es()) |
2920 | require_extension_internal(ext: "GL_EXT_draw_buffers" ); |
2921 | } |
2922 | else |
2923 | SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL." ); |
2924 | |
2925 | var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. |
2926 | } |
2927 | |
2928 | void CompilerGLSL::replace_fragment_outputs() |
2929 | { |
2930 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
2931 | auto &type = this->get<SPIRType>(id: var.basetype); |
2932 | |
2933 | if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) |
2934 | replace_fragment_output(var); |
2935 | }); |
2936 | } |
2937 | |
2938 | string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) |
2939 | { |
2940 | if (out_type.vecsize == input_components) |
2941 | return expr; |
2942 | else if (input_components == 1 && !backend.can_swizzle_scalar) |
2943 | return join(ts: type_to_glsl(type: out_type), ts: "(" , ts: expr, ts: ")" ); |
2944 | else |
2945 | { |
2946 | // FIXME: This will not work with packed expressions. |
2947 | auto e = enclose_expression(expr) + "." ; |
2948 | // Just clamp the swizzle index if we have more outputs than inputs. |
2949 | for (uint32_t c = 0; c < out_type.vecsize; c++) |
2950 | e += index_to_swizzle(index: min(a: c, b: input_components - 1)); |
2951 | if (backend.swizzle_is_function && out_type.vecsize > 1) |
2952 | e += "()" ; |
2953 | |
2954 | remove_duplicate_swizzle(op&: e); |
2955 | return e; |
2956 | } |
2957 | } |
2958 | |
2959 | void CompilerGLSL::emit_pls() |
2960 | { |
2961 | auto &execution = get_entry_point(); |
2962 | if (execution.model != ExecutionModelFragment) |
2963 | SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders." ); |
2964 | |
2965 | if (!options.es) |
2966 | SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES." ); |
2967 | |
2968 | if (options.version < 300) |
2969 | SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above." ); |
2970 | |
2971 | if (!pls_inputs.empty()) |
2972 | { |
2973 | statement(ts: "__pixel_local_inEXT _PLSIn" ); |
2974 | begin_scope(); |
2975 | for (auto &input : pls_inputs) |
2976 | statement(ts: pls_decl(variable: input), ts: ";" ); |
2977 | end_scope_decl(); |
2978 | statement(ts: "" ); |
2979 | } |
2980 | |
2981 | if (!pls_outputs.empty()) |
2982 | { |
2983 | statement(ts: "__pixel_local_outEXT _PLSOut" ); |
2984 | begin_scope(); |
2985 | for (auto &output : pls_outputs) |
2986 | statement(ts: pls_decl(variable: output), ts: ";" ); |
2987 | end_scope_decl(); |
2988 | statement(ts: "" ); |
2989 | } |
2990 | } |
2991 | |
2992 | void CompilerGLSL::fixup_image_load_store_access() |
2993 | { |
2994 | if (!options.enable_storage_image_qualifier_deduction) |
2995 | return; |
2996 | |
2997 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t var, const SPIRVariable &) { |
2998 | auto &vartype = expression_type(id: var); |
2999 | if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) |
3000 | { |
3001 | // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. |
3002 | // Solve this by making the image access as restricted as possible and loosen up if we need to. |
3003 | // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. |
3004 | |
3005 | if (!has_decoration(id: var, decoration: DecorationNonWritable) && !has_decoration(id: var, decoration: DecorationNonReadable)) |
3006 | { |
3007 | set_decoration(id: var, decoration: DecorationNonWritable); |
3008 | set_decoration(id: var, decoration: DecorationNonReadable); |
3009 | } |
3010 | } |
3011 | }); |
3012 | } |
3013 | |
3014 | static bool is_block_builtin(BuiltIn builtin) |
3015 | { |
3016 | return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || |
3017 | builtin == BuiltInCullDistance; |
3018 | } |
3019 | |
3020 | bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) |
3021 | { |
3022 | // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. |
3023 | |
3024 | if (storage != StorageClassOutput) |
3025 | return false; |
3026 | bool should_force = false; |
3027 | |
3028 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3029 | if (should_force) |
3030 | return; |
3031 | |
3032 | auto &type = this->get<SPIRType>(id: var.basetype); |
3033 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3034 | if (var.storage == storage && block && is_builtin_variable(var)) |
3035 | { |
3036 | uint32_t member_count = uint32_t(type.member_types.size()); |
3037 | for (uint32_t i = 0; i < member_count; i++) |
3038 | { |
3039 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) && |
3040 | is_block_builtin(builtin: BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))) && |
3041 | has_member_decoration(id: type.self, index: i, decoration: DecorationOffset)) |
3042 | { |
3043 | should_force = true; |
3044 | } |
3045 | } |
3046 | } |
3047 | else if (var.storage == storage && !block && is_builtin_variable(var)) |
3048 | { |
3049 | if (is_block_builtin(builtin: BuiltIn(get_decoration(id: type.self, decoration: DecorationBuiltIn))) && |
3050 | has_decoration(id: var.self, decoration: DecorationOffset)) |
3051 | { |
3052 | should_force = true; |
3053 | } |
3054 | } |
3055 | }); |
3056 | |
3057 | // If we're declaring clip/cull planes with control points we need to force block declaration. |
3058 | if (get_execution_model() == ExecutionModelTessellationControl && |
3059 | (clip_distance_count || cull_distance_count)) |
3060 | { |
3061 | should_force = true; |
3062 | } |
3063 | |
3064 | return should_force; |
3065 | } |
3066 | |
3067 | void CompilerGLSL::fixup_implicit_builtin_block_names() |
3068 | { |
3069 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3070 | auto &type = this->get<SPIRType>(id: var.basetype); |
3071 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3072 | if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && |
3073 | is_builtin_variable(var)) |
3074 | { |
3075 | // Make sure the array has a supported name in the code. |
3076 | if (var.storage == StorageClassOutput) |
3077 | set_name(id: var.self, name: "gl_out" ); |
3078 | else if (var.storage == StorageClassInput) |
3079 | set_name(id: var.self, name: "gl_in" ); |
3080 | } |
3081 | }); |
3082 | } |
3083 | |
3084 | void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) |
3085 | { |
3086 | Bitset emitted_builtins; |
3087 | Bitset global_builtins; |
3088 | const SPIRVariable *block_var = nullptr; |
3089 | bool emitted_block = false; |
3090 | bool builtin_array = false; |
3091 | |
3092 | // Need to use declared size in the type. |
3093 | // These variables might have been declared, but not statically used, so we haven't deduced their size yet. |
3094 | uint32_t cull_distance_size = 0; |
3095 | uint32_t clip_distance_size = 0; |
3096 | |
3097 | bool have_xfb_buffer_stride = false; |
3098 | bool have_geom_stream = false; |
3099 | bool have_any_xfb_offset = false; |
3100 | uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; |
3101 | std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets; |
3102 | |
3103 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3104 | auto &type = this->get<SPIRType>(id: var.basetype); |
3105 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3106 | Bitset builtins; |
3107 | |
3108 | if (var.storage == storage && block && is_builtin_variable(var)) |
3109 | { |
3110 | uint32_t index = 0; |
3111 | for (auto &m : ir.meta[type.self].members) |
3112 | { |
3113 | if (m.builtin) |
3114 | { |
3115 | builtins.set(m.builtin_type); |
3116 | if (m.builtin_type == BuiltInCullDistance) |
3117 | cull_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index])); |
3118 | else if (m.builtin_type == BuiltInClipDistance) |
3119 | clip_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index])); |
3120 | |
3121 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationOffset)) |
3122 | { |
3123 | have_any_xfb_offset = true; |
3124 | builtin_xfb_offsets[m.builtin_type] = m.offset; |
3125 | } |
3126 | |
3127 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream)) |
3128 | { |
3129 | uint32_t stream = m.stream; |
3130 | if (have_geom_stream && geom_stream != stream) |
3131 | SPIRV_CROSS_THROW("IO block member Stream mismatch." ); |
3132 | have_geom_stream = true; |
3133 | geom_stream = stream; |
3134 | } |
3135 | } |
3136 | index++; |
3137 | } |
3138 | |
3139 | if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationXfbBuffer) && |
3140 | has_decoration(id: var.self, decoration: DecorationXfbStride)) |
3141 | { |
3142 | uint32_t buffer_index = get_decoration(id: var.self, decoration: DecorationXfbBuffer); |
3143 | uint32_t stride = get_decoration(id: var.self, decoration: DecorationXfbStride); |
3144 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
3145 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch." ); |
3146 | if (have_xfb_buffer_stride && stride != xfb_stride) |
3147 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch." ); |
3148 | have_xfb_buffer_stride = true; |
3149 | xfb_buffer = buffer_index; |
3150 | xfb_stride = stride; |
3151 | } |
3152 | |
3153 | if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationStream)) |
3154 | { |
3155 | uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream); |
3156 | if (have_geom_stream && geom_stream != stream) |
3157 | SPIRV_CROSS_THROW("IO block member Stream mismatch." ); |
3158 | have_geom_stream = true; |
3159 | geom_stream = stream; |
3160 | } |
3161 | } |
3162 | else if (var.storage == storage && !block && is_builtin_variable(var)) |
3163 | { |
3164 | // While we're at it, collect all declared global builtins (HLSL mostly ...). |
3165 | auto &m = ir.meta[var.self].decoration; |
3166 | if (m.builtin) |
3167 | { |
3168 | global_builtins.set(m.builtin_type); |
3169 | if (m.builtin_type == BuiltInCullDistance) |
3170 | cull_distance_size = to_array_size_literal(type); |
3171 | else if (m.builtin_type == BuiltInClipDistance) |
3172 | clip_distance_size = to_array_size_literal(type); |
3173 | |
3174 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationXfbStride) && |
3175 | m.decoration_flags.get(bit: DecorationXfbBuffer) && m.decoration_flags.get(bit: DecorationOffset)) |
3176 | { |
3177 | have_any_xfb_offset = true; |
3178 | builtin_xfb_offsets[m.builtin_type] = m.offset; |
3179 | uint32_t buffer_index = m.xfb_buffer; |
3180 | uint32_t stride = m.xfb_stride; |
3181 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
3182 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch." ); |
3183 | if (have_xfb_buffer_stride && stride != xfb_stride) |
3184 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch." ); |
3185 | have_xfb_buffer_stride = true; |
3186 | xfb_buffer = buffer_index; |
3187 | xfb_stride = stride; |
3188 | } |
3189 | |
3190 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream)) |
3191 | { |
3192 | uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream); |
3193 | if (have_geom_stream && geom_stream != stream) |
3194 | SPIRV_CROSS_THROW("IO block member Stream mismatch." ); |
3195 | have_geom_stream = true; |
3196 | geom_stream = stream; |
3197 | } |
3198 | } |
3199 | } |
3200 | |
3201 | if (builtins.empty()) |
3202 | return; |
3203 | |
3204 | if (emitted_block) |
3205 | SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block." ); |
3206 | |
3207 | emitted_builtins = builtins; |
3208 | emitted_block = true; |
3209 | builtin_array = !type.array.empty(); |
3210 | block_var = &var; |
3211 | }); |
3212 | |
3213 | global_builtins = |
3214 | Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | |
3215 | (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); |
3216 | |
3217 | // Try to collect all other declared builtins. |
3218 | if (!emitted_block) |
3219 | emitted_builtins = global_builtins; |
3220 | |
3221 | // Can't declare an empty interface block. |
3222 | if (emitted_builtins.empty()) |
3223 | return; |
3224 | |
3225 | if (storage == StorageClassOutput) |
3226 | { |
3227 | SmallVector<string> attr; |
3228 | if (have_xfb_buffer_stride && have_any_xfb_offset) |
3229 | { |
3230 | if (!options.es) |
3231 | { |
3232 | if (options.version < 440 && options.version >= 140) |
3233 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
3234 | else if (options.version < 140) |
3235 | SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40." ); |
3236 | if (!options.es && options.version < 440) |
3237 | require_extension_internal(ext: "GL_ARB_enhanced_layouts" ); |
3238 | } |
3239 | else if (options.es) |
3240 | SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer." ); |
3241 | attr.push_back(t: join(ts: "xfb_buffer = " , ts&: xfb_buffer, ts: ", xfb_stride = " , ts&: xfb_stride)); |
3242 | } |
3243 | |
3244 | if (have_geom_stream) |
3245 | { |
3246 | if (get_execution_model() != ExecutionModelGeometry) |
3247 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders." ); |
3248 | if (options.es) |
3249 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL." ); |
3250 | if (options.version < 400) |
3251 | require_extension_internal(ext: "GL_ARB_transform_feedback3" ); |
3252 | attr.push_back(t: join(ts: "stream = " , ts&: geom_stream)); |
3253 | } |
3254 | |
3255 | if (!attr.empty()) |
3256 | statement(ts: "layout(" , ts: merge(list: attr), ts: ") out gl_PerVertex" ); |
3257 | else |
3258 | statement(ts: "out gl_PerVertex" ); |
3259 | } |
3260 | else |
3261 | { |
3262 | // If we have passthrough, there is no way PerVertex cannot be passthrough. |
3263 | if (get_entry_point().geometry_passthrough) |
3264 | statement(ts: "layout(passthrough) in gl_PerVertex" ); |
3265 | else |
3266 | statement(ts: "in gl_PerVertex" ); |
3267 | } |
3268 | |
3269 | begin_scope(); |
3270 | if (emitted_builtins.get(bit: BuiltInPosition)) |
3271 | { |
3272 | auto itr = builtin_xfb_offsets.find(x: BuiltInPosition); |
3273 | if (itr != end(cont&: builtin_xfb_offsets)) |
3274 | statement(ts: "layout(xfb_offset = " , ts&: itr->second, ts: ") vec4 gl_Position;" ); |
3275 | else |
3276 | statement(ts: "vec4 gl_Position;" ); |
3277 | } |
3278 | |
3279 | if (emitted_builtins.get(bit: BuiltInPointSize)) |
3280 | { |
3281 | auto itr = builtin_xfb_offsets.find(x: BuiltInPointSize); |
3282 | if (itr != end(cont&: builtin_xfb_offsets)) |
3283 | statement(ts: "layout(xfb_offset = " , ts&: itr->second, ts: ") float gl_PointSize;" ); |
3284 | else |
3285 | statement(ts: "float gl_PointSize;" ); |
3286 | } |
3287 | |
3288 | if (emitted_builtins.get(bit: BuiltInClipDistance)) |
3289 | { |
3290 | auto itr = builtin_xfb_offsets.find(x: BuiltInClipDistance); |
3291 | if (itr != end(cont&: builtin_xfb_offsets)) |
3292 | statement(ts: "layout(xfb_offset = " , ts&: itr->second, ts: ") float gl_ClipDistance[" , ts&: clip_distance_size, ts: "];" ); |
3293 | else |
3294 | statement(ts: "float gl_ClipDistance[" , ts&: clip_distance_size, ts: "];" ); |
3295 | } |
3296 | |
3297 | if (emitted_builtins.get(bit: BuiltInCullDistance)) |
3298 | { |
3299 | auto itr = builtin_xfb_offsets.find(x: BuiltInCullDistance); |
3300 | if (itr != end(cont&: builtin_xfb_offsets)) |
3301 | statement(ts: "layout(xfb_offset = " , ts&: itr->second, ts: ") float gl_CullDistance[" , ts&: cull_distance_size, ts: "];" ); |
3302 | else |
3303 | statement(ts: "float gl_CullDistance[" , ts&: cull_distance_size, ts: "];" ); |
3304 | } |
3305 | |
3306 | if (builtin_array) |
3307 | { |
3308 | if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) |
3309 | end_scope_decl(decl: join(ts: to_name(id: block_var->self), ts: "[" , ts&: get_entry_point().output_vertices, ts: "]" )); |
3310 | else |
3311 | end_scope_decl(decl: join(ts: to_name(id: block_var->self), ts: "[]" )); |
3312 | } |
3313 | else |
3314 | end_scope_decl(); |
3315 | statement(ts: "" ); |
3316 | } |
3317 | |
3318 | void CompilerGLSL::declare_undefined_values() |
3319 | { |
3320 | bool emitted = false; |
3321 | ir.for_each_typed_id<SPIRUndef>(op: [&](uint32_t, const SPIRUndef &undef) { |
3322 | auto &type = this->get<SPIRType>(id: undef.basetype); |
3323 | // OpUndef can be void for some reason ... |
3324 | if (type.basetype == SPIRType::Void) |
3325 | return; |
3326 | |
3327 | string initializer; |
3328 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
3329 | initializer = join(ts: " = " , ts: to_zero_initialized_expression(type_id: undef.basetype)); |
3330 | |
3331 | statement(ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";" ); |
3332 | emitted = true; |
3333 | }); |
3334 | |
3335 | if (emitted) |
3336 | statement(ts: "" ); |
3337 | } |
3338 | |
3339 | bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const |
3340 | { |
3341 | bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; |
3342 | |
3343 | if (statically_assigned) |
3344 | { |
3345 | auto *constant = maybe_get<SPIRConstant>(id: var.static_expression); |
3346 | if (constant && constant->is_used_as_lut) |
3347 | return true; |
3348 | } |
3349 | |
3350 | return false; |
3351 | } |
3352 | |
3353 | void CompilerGLSL::emit_resources() |
3354 | { |
3355 | auto &execution = get_entry_point(); |
3356 | |
3357 | replace_illegal_names(); |
3358 | |
3359 | // Legacy GL uses gl_FragData[], redeclare all fragment outputs |
3360 | // with builtins. |
3361 | if (execution.model == ExecutionModelFragment && is_legacy()) |
3362 | replace_fragment_outputs(); |
3363 | |
3364 | // Emit PLS blocks if we have such variables. |
3365 | if (!pls_inputs.empty() || !pls_outputs.empty()) |
3366 | emit_pls(); |
3367 | |
3368 | switch (execution.model) |
3369 | { |
3370 | case ExecutionModelGeometry: |
3371 | case ExecutionModelTessellationControl: |
3372 | case ExecutionModelTessellationEvaluation: |
3373 | fixup_implicit_builtin_block_names(); |
3374 | break; |
3375 | |
3376 | default: |
3377 | break; |
3378 | } |
3379 | |
3380 | // Emit custom gl_PerVertex for SSO compatibility. |
3381 | if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) |
3382 | { |
3383 | switch (execution.model) |
3384 | { |
3385 | case ExecutionModelGeometry: |
3386 | case ExecutionModelTessellationControl: |
3387 | case ExecutionModelTessellationEvaluation: |
3388 | emit_declared_builtin_block(storage: StorageClassInput, model: execution.model); |
3389 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3390 | break; |
3391 | |
3392 | case ExecutionModelVertex: |
3393 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3394 | break; |
3395 | |
3396 | default: |
3397 | break; |
3398 | } |
3399 | } |
3400 | else if (should_force_emit_builtin_block(storage: StorageClassOutput)) |
3401 | { |
3402 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3403 | } |
3404 | else if (execution.geometry_passthrough) |
3405 | { |
3406 | // Need to declare gl_in with Passthrough. |
3407 | // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. |
3408 | emit_declared_builtin_block(storage: StorageClassInput, model: execution.model); |
3409 | } |
3410 | else |
3411 | { |
3412 | // Need to redeclare clip/cull distance with explicit size to use them. |
3413 | // SPIR-V mandates these builtins have a size declared. |
3414 | const char *storage = execution.model == ExecutionModelFragment ? "in" : "out" ; |
3415 | if (clip_distance_count != 0) |
3416 | statement(ts&: storage, ts: " float gl_ClipDistance[" , ts&: clip_distance_count, ts: "];" ); |
3417 | if (cull_distance_count != 0) |
3418 | statement(ts&: storage, ts: " float gl_CullDistance[" , ts&: cull_distance_count, ts: "];" ); |
3419 | if (clip_distance_count != 0 || cull_distance_count != 0) |
3420 | statement(ts: "" ); |
3421 | } |
3422 | |
3423 | if (position_invariant) |
3424 | { |
3425 | statement(ts: "invariant gl_Position;" ); |
3426 | statement(ts: "" ); |
3427 | } |
3428 | |
3429 | bool emitted = false; |
3430 | |
3431 | // If emitted Vulkan GLSL, |
3432 | // emit specialization constants as actual floats, |
3433 | // spec op expressions will redirect to the constant name. |
3434 | // |
3435 | { |
3436 | auto loop_lock = ir.create_loop_hard_lock(); |
3437 | for (auto &id_ : ir.ids_for_constant_or_type) |
3438 | { |
3439 | auto &id = ir.ids[id_]; |
3440 | |
3441 | if (id.get_type() == TypeConstant) |
3442 | { |
3443 | auto &c = id.get<SPIRConstant>(); |
3444 | |
3445 | bool needs_declaration = c.specialization || c.is_used_as_lut; |
3446 | |
3447 | if (needs_declaration) |
3448 | { |
3449 | if (!options.vulkan_semantics && c.specialization) |
3450 | { |
3451 | c.specialization_constant_macro_name = |
3452 | constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId)); |
3453 | } |
3454 | emit_constant(constant: c); |
3455 | emitted = true; |
3456 | } |
3457 | } |
3458 | else if (id.get_type() == TypeConstantOp) |
3459 | { |
3460 | emit_specialization_constant_op(constant: id.get<SPIRConstantOp>()); |
3461 | emitted = true; |
3462 | } |
3463 | else if (id.get_type() == TypeType) |
3464 | { |
3465 | auto *type = &id.get<SPIRType>(); |
3466 | |
3467 | bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && |
3468 | (!has_decoration(id: type->self, decoration: DecorationBlock) && |
3469 | !has_decoration(id: type->self, decoration: DecorationBufferBlock)); |
3470 | |
3471 | // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. |
3472 | if (type->basetype == SPIRType::Struct && type->pointer && |
3473 | has_decoration(id: type->self, decoration: DecorationBlock) && |
3474 | (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR || |
3475 | type->storage == StorageClassHitAttributeKHR)) |
3476 | { |
3477 | type = &get<SPIRType>(id: type->parent_type); |
3478 | is_natural_struct = true; |
3479 | } |
3480 | |
3481 | if (is_natural_struct) |
3482 | { |
3483 | if (emitted) |
3484 | statement(ts: "" ); |
3485 | emitted = false; |
3486 | |
3487 | emit_struct(type&: *type); |
3488 | } |
3489 | } |
3490 | } |
3491 | } |
3492 | |
3493 | if (emitted) |
3494 | statement(ts: "" ); |
3495 | |
3496 | // If we needed to declare work group size late, check here. |
3497 | // If the work group size depends on a specialization constant, we need to declare the layout() block |
3498 | // after constants (and their macros) have been declared. |
3499 | if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && |
3500 | (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId))) |
3501 | { |
3502 | SpecializationConstant wg_x, wg_y, wg_z; |
3503 | get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
3504 | |
3505 | if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) |
3506 | { |
3507 | SmallVector<string> inputs; |
3508 | build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z); |
3509 | statement(ts: "layout(" , ts: merge(list: inputs), ts: ") in;" ); |
3510 | statement(ts: "" ); |
3511 | } |
3512 | } |
3513 | |
3514 | emitted = false; |
3515 | |
3516 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
3517 | { |
3518 | for (auto type : physical_storage_non_block_pointer_types) |
3519 | { |
3520 | emit_buffer_reference_block(type_id: type, forward_declaration: false); |
3521 | } |
3522 | |
3523 | // Output buffer reference blocks. |
3524 | // Do this in two stages, one with forward declaration, |
3525 | // and one without. Buffer reference blocks can reference themselves |
3526 | // to support things like linked lists. |
3527 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) { |
3528 | if (type.basetype == SPIRType::Struct && type.pointer && |
3529 | type.pointer_depth == 1 && !type_is_array_of_pointers(type) && |
3530 | type.storage == StorageClassPhysicalStorageBufferEXT) |
3531 | { |
3532 | emit_buffer_reference_block(type_id: self, forward_declaration: true); |
3533 | } |
3534 | }); |
3535 | |
3536 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) { |
3537 | if (type.basetype == SPIRType::Struct && |
3538 | type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && |
3539 | type.storage == StorageClassPhysicalStorageBufferEXT) |
3540 | { |
3541 | emit_buffer_reference_block(type_id: self, forward_declaration: false); |
3542 | } |
3543 | }); |
3544 | } |
3545 | |
3546 | // Output UBOs and SSBOs |
3547 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3548 | auto &type = this->get<SPIRType>(id: var.basetype); |
3549 | |
3550 | bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || |
3551 | type.storage == StorageClassShaderRecordBufferKHR; |
3552 | bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) || |
3553 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
3554 | |
3555 | if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && |
3556 | has_block_flags) |
3557 | { |
3558 | emit_buffer_block(var); |
3559 | } |
3560 | }); |
3561 | |
3562 | // Output push constant blocks |
3563 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3564 | auto &type = this->get<SPIRType>(id: var.basetype); |
3565 | if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && |
3566 | !is_hidden_variable(var)) |
3567 | { |
3568 | emit_push_constant_block(var); |
3569 | } |
3570 | }); |
3571 | |
3572 | bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; |
3573 | |
3574 | // Output Uniform Constants (values, samplers, images, etc). |
3575 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3576 | auto &type = this->get<SPIRType>(id: var.basetype); |
3577 | |
3578 | // If we're remapping separate samplers and images, only emit the combined samplers. |
3579 | if (skip_separate_image_sampler) |
3580 | { |
3581 | // Sampler buffers are always used without a sampler, and they will also work in regular GL. |
3582 | bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; |
3583 | bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; |
3584 | bool separate_sampler = type.basetype == SPIRType::Sampler; |
3585 | if (!sampler_buffer && (separate_image || separate_sampler)) |
3586 | return; |
3587 | } |
3588 | |
3589 | if (var.storage != StorageClassFunction && type.pointer && |
3590 | (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || |
3591 | type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || |
3592 | type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || |
3593 | type.storage == StorageClassHitAttributeKHR) && |
3594 | !is_hidden_variable(var)) |
3595 | { |
3596 | emit_uniform(var); |
3597 | emitted = true; |
3598 | } |
3599 | }); |
3600 | |
3601 | if (emitted) |
3602 | statement(ts: "" ); |
3603 | emitted = false; |
3604 | |
3605 | bool emitted_base_instance = false; |
3606 | |
3607 | // Output in/out interfaces. |
3608 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3609 | auto &type = this->get<SPIRType>(id: var.basetype); |
3610 | |
3611 | bool is_hidden = is_hidden_variable(var); |
3612 | |
3613 | // Unused output I/O variables might still be required to implement framebuffer fetch. |
3614 | if (var.storage == StorageClassOutput && !is_legacy() && |
3615 | location_is_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)) != 0) |
3616 | { |
3617 | is_hidden = false; |
3618 | } |
3619 | |
3620 | if (var.storage != StorageClassFunction && type.pointer && |
3621 | (var.storage == StorageClassInput || var.storage == StorageClassOutput) && |
3622 | interface_variable_exists_in_entry_point(id: var.self) && !is_hidden) |
3623 | { |
3624 | if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && |
3625 | type.array.size() == 1) |
3626 | { |
3627 | SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader." ); |
3628 | } |
3629 | emit_interface_block(var); |
3630 | emitted = true; |
3631 | } |
3632 | else if (is_builtin_variable(var)) |
3633 | { |
3634 | auto builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)); |
3635 | // For gl_InstanceIndex emulation on GLES, the API user needs to |
3636 | // supply this uniform. |
3637 | |
3638 | // The draw parameter extension is soft-enabled on GL with some fallbacks. |
3639 | if (!options.vulkan_semantics) |
3640 | { |
3641 | if (!emitted_base_instance && |
3642 | ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || |
3643 | (builtin == BuiltInBaseInstance))) |
3644 | { |
3645 | statement(ts: "#ifdef GL_ARB_shader_draw_parameters" ); |
3646 | statement(ts: "#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB" ); |
3647 | statement(ts: "#else" ); |
3648 | // A crude, but simple workaround which should be good enough for non-indirect draws. |
3649 | statement(ts: "uniform int SPIRV_Cross_BaseInstance;" ); |
3650 | statement(ts: "#endif" ); |
3651 | emitted = true; |
3652 | emitted_base_instance = true; |
3653 | } |
3654 | else if (builtin == BuiltInBaseVertex) |
3655 | { |
3656 | statement(ts: "#ifdef GL_ARB_shader_draw_parameters" ); |
3657 | statement(ts: "#define SPIRV_Cross_BaseVertex gl_BaseVertexARB" ); |
3658 | statement(ts: "#else" ); |
3659 | // A crude, but simple workaround which should be good enough for non-indirect draws. |
3660 | statement(ts: "uniform int SPIRV_Cross_BaseVertex;" ); |
3661 | statement(ts: "#endif" ); |
3662 | } |
3663 | else if (builtin == BuiltInDrawIndex) |
3664 | { |
3665 | statement(ts: "#ifndef GL_ARB_shader_draw_parameters" ); |
3666 | // Cannot really be worked around. |
3667 | statement(ts: "#error GL_ARB_shader_draw_parameters is not supported." ); |
3668 | statement(ts: "#endif" ); |
3669 | } |
3670 | } |
3671 | } |
3672 | }); |
3673 | |
3674 | // Global variables. |
3675 | for (auto global : global_variables) |
3676 | { |
3677 | auto &var = get<SPIRVariable>(id: global); |
3678 | if (is_hidden_variable(var, include_builtins: true)) |
3679 | continue; |
3680 | |
3681 | if (var.storage != StorageClassOutput) |
3682 | { |
3683 | if (!variable_is_lut(var)) |
3684 | { |
3685 | add_resource_name(id: var.self); |
3686 | |
3687 | string initializer; |
3688 | if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && |
3689 | !var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var))) |
3690 | { |
3691 | initializer = join(ts: " = " , ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var))); |
3692 | } |
3693 | |
3694 | statement(ts: variable_decl(variable: var), ts&: initializer, ts: ";" ); |
3695 | emitted = true; |
3696 | } |
3697 | } |
3698 | else if (var.initializer && maybe_get<SPIRConstant>(id: var.initializer) != nullptr) |
3699 | { |
3700 | emit_output_variable_initializer(var); |
3701 | } |
3702 | } |
3703 | |
3704 | if (emitted) |
3705 | statement(ts: "" ); |
3706 | |
3707 | declare_undefined_values(); |
3708 | } |
3709 | |
3710 | void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) |
3711 | { |
3712 | // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). |
3713 | auto &entry_func = this->get<SPIRFunction>(id: ir.default_entry_point); |
3714 | auto &type = get<SPIRType>(id: var.basetype); |
3715 | bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch); |
3716 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
3717 | bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch; |
3718 | |
3719 | if (is_block) |
3720 | { |
3721 | uint32_t member_count = uint32_t(type.member_types.size()); |
3722 | bool type_is_array = type.array.size() == 1; |
3723 | uint32_t array_size = 1; |
3724 | if (type_is_array) |
3725 | array_size = to_array_size_literal(type); |
3726 | uint32_t iteration_count = is_control_point ? 1 : array_size; |
3727 | |
3728 | // If the initializer is a block, we must initialize each block member one at a time. |
3729 | for (uint32_t i = 0; i < member_count; i++) |
3730 | { |
3731 | // These outputs might not have been properly declared, so don't initialize them in that case. |
3732 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn)) |
3733 | { |
3734 | if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInCullDistance && |
3735 | !cull_distance_count) |
3736 | continue; |
3737 | |
3738 | if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInClipDistance && |
3739 | !clip_distance_count) |
3740 | continue; |
3741 | } |
3742 | |
3743 | // We need to build a per-member array first, essentially transposing from AoS to SoA. |
3744 | // This code path hits when we have an array of blocks. |
3745 | string lut_name; |
3746 | if (type_is_array) |
3747 | { |
3748 | lut_name = join(ts: "_" , ts: var.self, ts: "_" , ts&: i, ts: "_init" ); |
3749 | uint32_t member_type_id = get<SPIRType>(id: var.basetype).member_types[i]; |
3750 | auto &member_type = get<SPIRType>(id: member_type_id); |
3751 | auto array_type = member_type; |
3752 | array_type.parent_type = member_type_id; |
3753 | array_type.array.push_back(t: array_size); |
3754 | array_type.array_size_literal.push_back(t: true); |
3755 | |
3756 | SmallVector<string> exprs; |
3757 | exprs.reserve(count: array_size); |
3758 | auto &c = get<SPIRConstant>(id: var.initializer); |
3759 | for (uint32_t j = 0; j < array_size; j++) |
3760 | exprs.push_back(t: to_expression(id: get<SPIRConstant>(id: c.subconstants[j]).subconstants[i])); |
3761 | statement(ts: "const " , ts: type_to_glsl(type: array_type), ts: " " , ts&: lut_name, ts: type_to_array_glsl(type: array_type), ts: " = " , |
3762 | ts: type_to_glsl_constructor(type: array_type), ts: "(" , ts: merge(list: exprs, between: ", " ), ts: ");" ); |
3763 | } |
3764 | |
3765 | for (uint32_t j = 0; j < iteration_count; j++) |
3766 | { |
3767 | entry_func.fixup_hooks_in.push_back(t: [=, &var]() { |
3768 | AccessChainMeta meta; |
3769 | auto &c = this->get<SPIRConstant>(id: var.initializer); |
3770 | |
3771 | uint32_t invocation_id = 0; |
3772 | uint32_t member_index_id = 0; |
3773 | if (is_control_point) |
3774 | { |
3775 | uint32_t ids = ir.increase_bound_by(count: 3); |
3776 | SPIRType uint_type; |
3777 | uint_type.basetype = SPIRType::UInt; |
3778 | uint_type.width = 32; |
3779 | set<SPIRType>(id: ids, args&: uint_type); |
3780 | set<SPIRExpression>(id: ids + 1, args: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), args&: ids, args: true); |
3781 | set<SPIRConstant>(id: ids + 2, args&: ids, args: i, args: false); |
3782 | invocation_id = ids + 1; |
3783 | member_index_id = ids + 2; |
3784 | } |
3785 | |
3786 | if (is_patch) |
3787 | { |
3788 | statement(ts: "if (gl_InvocationID == 0)" ); |
3789 | begin_scope(); |
3790 | } |
3791 | |
3792 | if (type_is_array && !is_control_point) |
3793 | { |
3794 | uint32_t indices[2] = { j, i }; |
3795 | auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta); |
3796 | statement(ts&: chain, ts: " = " , ts: lut_name, ts: "[" , ts: j, ts: "];" ); |
3797 | } |
3798 | else if (is_control_point) |
3799 | { |
3800 | uint32_t indices[2] = { invocation_id, member_index_id }; |
3801 | auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: 0, meta: &meta); |
3802 | statement(ts&: chain, ts: " = " , ts: lut_name, ts: "[" , ts: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), ts: "];" ); |
3803 | } |
3804 | else |
3805 | { |
3806 | auto chain = |
3807 | access_chain_internal(base: var.self, indices: &i, count: 1, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta); |
3808 | statement(ts&: chain, ts: " = " , ts: to_expression(id: c.subconstants[i]), ts: ";" ); |
3809 | } |
3810 | |
3811 | if (is_patch) |
3812 | end_scope(); |
3813 | }); |
3814 | } |
3815 | } |
3816 | } |
3817 | else if (is_control_point) |
3818 | { |
3819 | auto lut_name = join(ts: "_" , ts: var.self, ts: "_init" ); |
3820 | statement(ts: "const " , ts: type_to_glsl(type), ts: " " , ts&: lut_name, ts: type_to_array_glsl(type), |
3821 | ts: " = " , ts: to_expression(id: var.initializer), ts: ";" ); |
3822 | entry_func.fixup_hooks_in.push_back(t: [&, lut_name]() { |
3823 | statement(ts: to_expression(id: var.self), ts: "[gl_InvocationID] = " , ts: lut_name, ts: "[gl_InvocationID];" ); |
3824 | }); |
3825 | } |
3826 | else if (has_decoration(id: var.self, decoration: DecorationBuiltIn) && |
3827 | BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)) == BuiltInSampleMask) |
3828 | { |
3829 | // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< |
3830 | entry_func.fixup_hooks_in.push_back(t: [&] { |
3831 | auto &c = this->get<SPIRConstant>(id: var.initializer); |
3832 | uint32_t num_constants = uint32_t(c.subconstants.size()); |
3833 | for (uint32_t i = 0; i < num_constants; i++) |
3834 | { |
3835 | // Don't use to_expression on constant since it might be uint, just fish out the raw int. |
3836 | statement(ts: to_expression(id: var.self), ts: "[" , ts&: i, ts: "] = " , |
3837 | ts: convert_to_string(value: this->get<SPIRConstant>(id: c.subconstants[i]).scalar_i32()), ts: ";" ); |
3838 | } |
3839 | }); |
3840 | } |
3841 | else |
3842 | { |
3843 | auto lut_name = join(ts: "_" , ts: var.self, ts: "_init" ); |
3844 | statement(ts: "const " , ts: type_to_glsl(type), ts: " " , ts&: lut_name, |
3845 | ts: type_to_array_glsl(type), ts: " = " , ts: to_expression(id: var.initializer), ts: ";" ); |
3846 | entry_func.fixup_hooks_in.push_back(t: [&, lut_name, is_patch]() { |
3847 | if (is_patch) |
3848 | { |
3849 | statement(ts: "if (gl_InvocationID == 0)" ); |
3850 | begin_scope(); |
3851 | } |
3852 | statement(ts: to_expression(id: var.self), ts: " = " , ts: lut_name, ts: ";" ); |
3853 | if (is_patch) |
3854 | end_scope(); |
3855 | }); |
3856 | } |
3857 | } |
3858 | |
3859 | void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) |
3860 | { |
3861 | static const char *workaround_types[] = { "int" , "ivec2" , "ivec3" , "ivec4" , "uint" , "uvec2" , "uvec3" , "uvec4" , |
3862 | "float" , "vec2" , "vec3" , "vec4" , "double" , "dvec2" , "dvec3" , "dvec4" }; |
3863 | |
3864 | if (!options.vulkan_semantics) |
3865 | { |
3866 | using Supp = ShaderSubgroupSupportHelper; |
3867 | auto result = shader_subgroup_supporter.resolve(); |
3868 | |
3869 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMask)) |
3870 | { |
3871 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupMask, r: result); |
3872 | |
3873 | for (auto &e : exts) |
3874 | { |
3875 | const char *name = Supp::get_extension_name(c: e); |
3876 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
3877 | |
3878 | switch (e) |
3879 | { |
3880 | case Supp::NV_shader_thread_group: |
3881 | statement(ts: "#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)" ); |
3882 | statement(ts: "#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)" ); |
3883 | statement(ts: "#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)" ); |
3884 | statement(ts: "#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)" ); |
3885 | statement(ts: "#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)" ); |
3886 | break; |
3887 | case Supp::ARB_shader_ballot: |
3888 | statement(ts: "#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)" ); |
3889 | statement(ts: "#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)" ); |
3890 | statement(ts: "#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)" ); |
3891 | statement(ts: "#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)" ); |
3892 | statement(ts: "#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)" ); |
3893 | break; |
3894 | default: |
3895 | break; |
3896 | } |
3897 | } |
3898 | statement(ts: "#endif" ); |
3899 | statement(ts: "" ); |
3900 | } |
3901 | |
3902 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupSize)) |
3903 | { |
3904 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupSize, r: result); |
3905 | |
3906 | for (auto &e : exts) |
3907 | { |
3908 | const char *name = Supp::get_extension_name(c: e); |
3909 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
3910 | |
3911 | switch (e) |
3912 | { |
3913 | case Supp::NV_shader_thread_group: |
3914 | statement(ts: "#define gl_SubgroupSize gl_WarpSizeNV" ); |
3915 | break; |
3916 | case Supp::ARB_shader_ballot: |
3917 | statement(ts: "#define gl_SubgroupSize gl_SubGroupSizeARB" ); |
3918 | break; |
3919 | case Supp::AMD_gcn_shader: |
3920 | statement(ts: "#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)" ); |
3921 | break; |
3922 | default: |
3923 | break; |
3924 | } |
3925 | } |
3926 | statement(ts: "#endif" ); |
3927 | statement(ts: "" ); |
3928 | } |
3929 | |
3930 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInvocationID)) |
3931 | { |
3932 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupInvocationID, r: result); |
3933 | |
3934 | for (auto &e : exts) |
3935 | { |
3936 | const char *name = Supp::get_extension_name(c: e); |
3937 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
3938 | |
3939 | switch (e) |
3940 | { |
3941 | case Supp::NV_shader_thread_group: |
3942 | statement(ts: "#define gl_SubgroupInvocationID gl_ThreadInWarpNV" ); |
3943 | break; |
3944 | case Supp::ARB_shader_ballot: |
3945 | statement(ts: "#define gl_SubgroupInvocationID gl_SubGroupInvocationARB" ); |
3946 | break; |
3947 | default: |
3948 | break; |
3949 | } |
3950 | } |
3951 | statement(ts: "#endif" ); |
3952 | statement(ts: "" ); |
3953 | } |
3954 | |
3955 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupID)) |
3956 | { |
3957 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupID, r: result); |
3958 | |
3959 | for (auto &e : exts) |
3960 | { |
3961 | const char *name = Supp::get_extension_name(c: e); |
3962 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
3963 | |
3964 | switch (e) |
3965 | { |
3966 | case Supp::NV_shader_thread_group: |
3967 | statement(ts: "#define gl_SubgroupID gl_WarpIDNV" ); |
3968 | break; |
3969 | default: |
3970 | break; |
3971 | } |
3972 | } |
3973 | statement(ts: "#endif" ); |
3974 | statement(ts: "" ); |
3975 | } |
3976 | |
3977 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::NumSubgroups)) |
3978 | { |
3979 | auto exts = Supp::get_candidates_for_feature(ft: Supp::NumSubgroups, r: result); |
3980 | |
3981 | for (auto &e : exts) |
3982 | { |
3983 | const char *name = Supp::get_extension_name(c: e); |
3984 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
3985 | |
3986 | switch (e) |
3987 | { |
3988 | case Supp::NV_shader_thread_group: |
3989 | statement(ts: "#define gl_NumSubgroups gl_WarpsPerSMNV" ); |
3990 | break; |
3991 | default: |
3992 | break; |
3993 | } |
3994 | } |
3995 | statement(ts: "#endif" ); |
3996 | statement(ts: "" ); |
3997 | } |
3998 | |
3999 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBroadcast_First)) |
4000 | { |
4001 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBroadcast_First, r: result); |
4002 | |
4003 | for (auto &e : exts) |
4004 | { |
4005 | const char *name = Supp::get_extension_name(c: e); |
4006 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
4007 | |
4008 | switch (e) |
4009 | { |
4010 | case Supp::NV_shader_thread_shuffle: |
4011 | for (const char *t : workaround_types) |
4012 | { |
4013 | statement(ts&: t, ts: " subgroupBroadcastFirst(" , ts&: t, |
4014 | ts: " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }" ); |
4015 | } |
4016 | for (const char *t : workaround_types) |
4017 | { |
4018 | statement(ts&: t, ts: " subgroupBroadcast(" , ts&: t, |
4019 | ts: " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }" ); |
4020 | } |
4021 | break; |
4022 | case Supp::ARB_shader_ballot: |
4023 | for (const char *t : workaround_types) |
4024 | { |
4025 | statement(ts&: t, ts: " subgroupBroadcastFirst(" , ts&: t, |
4026 | ts: " value) { return readFirstInvocationARB(value); }" ); |
4027 | } |
4028 | for (const char *t : workaround_types) |
4029 | { |
4030 | statement(ts&: t, ts: " subgroupBroadcast(" , ts&: t, |
4031 | ts: " value, uint id) { return readInvocationARB(value, id); }" ); |
4032 | } |
4033 | break; |
4034 | default: |
4035 | break; |
4036 | } |
4037 | } |
4038 | statement(ts: "#endif" ); |
4039 | statement(ts: "" ); |
4040 | } |
4041 | |
4042 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotFindLSB_MSB)) |
4043 | { |
4044 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallotFindLSB_MSB, r: result); |
4045 | |
4046 | for (auto &e : exts) |
4047 | { |
4048 | const char *name = Supp::get_extension_name(c: e); |
4049 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
4050 | |
4051 | switch (e) |
4052 | { |
4053 | case Supp::NV_shader_thread_group: |
4054 | statement(ts: "uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }" ); |
4055 | statement(ts: "uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }" ); |
4056 | break; |
4057 | default: |
4058 | break; |
4059 | } |
4060 | } |
4061 | statement(ts: "#else" ); |
4062 | statement(ts: "uint subgroupBallotFindLSB(uvec4 value)" ); |
4063 | begin_scope(); |
4064 | statement(ts: "int firstLive = findLSB(value.x);" ); |
4065 | statement(ts: "return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));" ); |
4066 | end_scope(); |
4067 | statement(ts: "uint subgroupBallotFindMSB(uvec4 value)" ); |
4068 | begin_scope(); |
4069 | statement(ts: "int firstLive = findMSB(value.y);" ); |
4070 | statement(ts: "return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));" ); |
4071 | end_scope(); |
4072 | statement(ts: "#endif" ); |
4073 | statement(ts: "" ); |
4074 | } |
4075 | |
4076 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAll_Any_AllEqualBool)) |
4077 | { |
4078 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupAll_Any_AllEqualBool, r: result); |
4079 | |
4080 | for (auto &e : exts) |
4081 | { |
4082 | const char *name = Supp::get_extension_name(c: e); |
4083 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
4084 | |
4085 | switch (e) |
4086 | { |
4087 | case Supp::NV_gpu_shader_5: |
4088 | statement(ts: "bool subgroupAll(bool value) { return allThreadsNV(value); }" ); |
4089 | statement(ts: "bool subgroupAny(bool value) { return anyThreadNV(value); }" ); |
4090 | statement(ts: "bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }" ); |
4091 | break; |
4092 | case Supp::ARB_shader_group_vote: |
4093 | statement(ts: "bool subgroupAll(bool v) { return allInvocationsARB(v); }" ); |
4094 | statement(ts: "bool subgroupAny(bool v) { return anyInvocationARB(v); }" ); |
4095 | statement(ts: "bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }" ); |
4096 | break; |
4097 | case Supp::AMD_gcn_shader: |
4098 | statement(ts: "bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }" ); |
4099 | statement(ts: "bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }" ); |
4100 | statement(ts: "bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " |
4101 | "b == ballotAMD(true); }" ); |
4102 | break; |
4103 | default: |
4104 | break; |
4105 | } |
4106 | } |
4107 | statement(ts: "#endif" ); |
4108 | statement(ts: "" ); |
4109 | } |
4110 | |
4111 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAllEqualT)) |
4112 | { |
4113 | statement(ts: "#ifndef GL_KHR_shader_subgroup_vote" ); |
4114 | statement( |
4115 | ts: "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " |
4116 | "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }" ); |
4117 | for (const char *t : workaround_types) |
4118 | statement(ts: "_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(" , ts&: t, ts: ")" ); |
4119 | statement(ts: "#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND" ); |
4120 | statement(ts: "#endif" ); |
4121 | statement(ts: "" ); |
4122 | } |
4123 | |
4124 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallot)) |
4125 | { |
4126 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallot, r: result); |
4127 | |
4128 | for (auto &e : exts) |
4129 | { |
4130 | const char *name = Supp::get_extension_name(c: e); |
4131 | statement(ts: &e == &exts.front() ? "#if" : "#elif" , ts: " defined(" , ts&: name, ts: ")" ); |
4132 | |
4133 | switch (e) |
4134 | { |
4135 | case Supp::NV_shader_thread_group: |
4136 | statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }" ); |
4137 | break; |
4138 | case Supp::ARB_shader_ballot: |
4139 | statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }" ); |
4140 | break; |
4141 | default: |
4142 | break; |
4143 | } |
4144 | } |
4145 | statement(ts: "#endif" ); |
4146 | statement(ts: "" ); |
4147 | } |
4148 | |
4149 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupElect)) |
4150 | { |
4151 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic" ); |
4152 | statement(ts: "bool subgroupElect()" ); |
4153 | begin_scope(); |
4154 | statement(ts: "uvec4 activeMask = subgroupBallot(true);" ); |
4155 | statement(ts: "uint firstLive = subgroupBallotFindLSB(activeMask);" ); |
4156 | statement(ts: "return gl_SubgroupInvocationID == firstLive;" ); |
4157 | end_scope(); |
4158 | statement(ts: "#endif" ); |
4159 | statement(ts: "" ); |
4160 | } |
4161 | |
4162 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBarrier)) |
4163 | { |
4164 | // Extensions we're using in place of GL_KHR_shader_subgroup_basic state |
4165 | // that subgroup execute in lockstep so this barrier is implicit. |
4166 | // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, |
4167 | // and a specific test of optimizing scans by leveraging lock-step invocation execution, |
4168 | // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. |
4169 | // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 |
4170 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic" ); |
4171 | statement(ts: "void subgroupBarrier() { memoryBarrierShared(); }" ); |
4172 | statement(ts: "#endif" ); |
4173 | statement(ts: "" ); |
4174 | } |
4175 | |
4176 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMemBarrier)) |
4177 | { |
4178 | if (model == spv::ExecutionModelGLCompute) |
4179 | { |
4180 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic" ); |
4181 | statement(ts: "void subgroupMemoryBarrier() { groupMemoryBarrier(); }" ); |
4182 | statement(ts: "void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }" ); |
4183 | statement(ts: "void subgroupMemoryBarrierShared() { memoryBarrierShared(); }" ); |
4184 | statement(ts: "void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }" ); |
4185 | statement(ts: "#endif" ); |
4186 | } |
4187 | else |
4188 | { |
4189 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic" ); |
4190 | statement(ts: "void subgroupMemoryBarrier() { memoryBarrier(); }" ); |
4191 | statement(ts: "void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }" ); |
4192 | statement(ts: "void subgroupMemoryBarrierImage() { memoryBarrierImage(); }" ); |
4193 | statement(ts: "#endif" ); |
4194 | } |
4195 | statement(ts: "" ); |
4196 | } |
4197 | |
4198 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) |
4199 | { |
4200 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot" ); |
4201 | statement(ts: "bool subgroupInverseBallot(uvec4 value)" ); |
4202 | begin_scope(); |
4203 | statement(ts: "return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));" ); |
4204 | end_scope(); |
4205 | |
4206 | statement(ts: "uint subgroupBallotInclusiveBitCount(uvec4 value)" ); |
4207 | begin_scope(); |
4208 | statement(ts: "uvec2 v = value.xy & gl_SubgroupLeMask.xy;" ); |
4209 | statement(ts: "ivec2 c = bitCount(v);" ); |
4210 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group" ); |
4211 | statement(ts: "return uint(c.x);" ); |
4212 | statement_no_indent(ts: "#else" ); |
4213 | statement(ts: "return uint(c.x + c.y);" ); |
4214 | statement_no_indent(ts: "#endif" ); |
4215 | end_scope(); |
4216 | |
4217 | statement(ts: "uint subgroupBallotExclusiveBitCount(uvec4 value)" ); |
4218 | begin_scope(); |
4219 | statement(ts: "uvec2 v = value.xy & gl_SubgroupLtMask.xy;" ); |
4220 | statement(ts: "ivec2 c = bitCount(v);" ); |
4221 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group" ); |
4222 | statement(ts: "return uint(c.x);" ); |
4223 | statement_no_indent(ts: "#else" ); |
4224 | statement(ts: "return uint(c.x + c.y);" ); |
4225 | statement_no_indent(ts: "#endif" ); |
4226 | end_scope(); |
4227 | statement(ts: "#endif" ); |
4228 | statement(ts: "" ); |
4229 | } |
4230 | |
4231 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitCount)) |
4232 | { |
4233 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot" ); |
4234 | statement(ts: "uint subgroupBallotBitCount(uvec4 value)" ); |
4235 | begin_scope(); |
4236 | statement(ts: "ivec2 c = bitCount(value.xy);" ); |
4237 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group" ); |
4238 | statement(ts: "return uint(c.x);" ); |
4239 | statement_no_indent(ts: "#else" ); |
4240 | statement(ts: "return uint(c.x + c.y);" ); |
4241 | statement_no_indent(ts: "#endif" ); |
4242 | end_scope(); |
4243 | statement(ts: "#endif" ); |
4244 | statement(ts: "" ); |
4245 | } |
4246 | |
4247 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitExtract)) |
4248 | { |
4249 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot" ); |
4250 | statement(ts: "bool subgroupBallotBitExtract(uvec4 value, uint index)" ); |
4251 | begin_scope(); |
4252 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group" ); |
4253 | statement(ts: "uint shifted = value.x >> index;" ); |
4254 | statement_no_indent(ts: "#else" ); |
4255 | statement(ts: "uint shifted = value[index >> 5u] >> (index & 0x1fu);" ); |
4256 | statement_no_indent(ts: "#endif" ); |
4257 | statement(ts: "return (shifted & 1u) != 0u;" ); |
4258 | end_scope(); |
4259 | statement(ts: "#endif" ); |
4260 | statement(ts: "" ); |
4261 | } |
4262 | } |
4263 | |
4264 | if (!workaround_ubo_load_overload_types.empty()) |
4265 | { |
4266 | for (auto &type_id : workaround_ubo_load_overload_types) |
4267 | { |
4268 | auto &type = get<SPIRType>(id: type_id); |
4269 | statement(ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(" , ts: type_to_glsl(type), |
4270 | ts: " wrap) { return wrap; }" ); |
4271 | } |
4272 | statement(ts: "" ); |
4273 | } |
4274 | |
4275 | if (requires_transpose_2x2) |
4276 | { |
4277 | statement(ts: "mat2 spvTranspose(mat2 m)" ); |
4278 | begin_scope(); |
4279 | statement(ts: "return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);" ); |
4280 | end_scope(); |
4281 | statement(ts: "" ); |
4282 | } |
4283 | |
4284 | if (requires_transpose_3x3) |
4285 | { |
4286 | statement(ts: "mat3 spvTranspose(mat3 m)" ); |
4287 | begin_scope(); |
4288 | statement(ts: "return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);" ); |
4289 | end_scope(); |
4290 | statement(ts: "" ); |
4291 | } |
4292 | |
4293 | if (requires_transpose_4x4) |
4294 | { |
4295 | statement(ts: "mat4 spvTranspose(mat4 m)" ); |
4296 | begin_scope(); |
4297 | statement(ts: "return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " |
4298 | "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);" ); |
4299 | end_scope(); |
4300 | statement(ts: "" ); |
4301 | } |
4302 | } |
4303 | |
4304 | // Returns a string representation of the ID, usable as a function arg. |
4305 | // Default is to simply return the expression representation fo the arg ID. |
4306 | // Subclasses may override to modify the return value. |
4307 | string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) |
4308 | { |
4309 | // Make sure that we use the name of the original variable, and not the parameter alias. |
4310 | uint32_t name_id = id; |
4311 | auto *var = maybe_get<SPIRVariable>(id); |
4312 | if (var && var->basevariable) |
4313 | name_id = var->basevariable; |
4314 | return to_expression(id: name_id); |
4315 | } |
4316 | |
4317 | void CompilerGLSL::force_temporary_and_recompile(uint32_t id) |
4318 | { |
4319 | auto res = forced_temporaries.insert(x: id); |
4320 | |
4321 | // Forcing new temporaries guarantees forward progress. |
4322 | if (res.second) |
4323 | force_recompile_guarantee_forward_progress(); |
4324 | else |
4325 | force_recompile(); |
4326 | } |
4327 | |
4328 | uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) |
4329 | { |
4330 | // Constants do not have innate precision. |
4331 | auto handle_type = ir.ids[id].get_type(); |
4332 | if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) |
4333 | return id; |
4334 | |
4335 | // Ignore anything that isn't 32-bit values. |
4336 | auto &type = get<SPIRType>(id: type_id); |
4337 | if (type.pointer) |
4338 | return id; |
4339 | if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) |
4340 | return id; |
4341 | |
4342 | if (precision == Options::DontCare) |
4343 | { |
4344 | // If precision is consumed as don't care (operations only consisting of constants), |
4345 | // we need to bind the expression to a temporary, |
4346 | // otherwise we have no way of controlling the precision later. |
4347 | auto itr = forced_temporaries.insert(x: id); |
4348 | if (itr.second) |
4349 | force_recompile_guarantee_forward_progress(); |
4350 | return id; |
4351 | } |
4352 | |
4353 | auto current_precision = has_decoration(id, decoration: DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; |
4354 | if (current_precision == precision) |
4355 | return id; |
4356 | |
4357 | auto itr = temporary_to_mirror_precision_alias.find(x: id); |
4358 | if (itr == temporary_to_mirror_precision_alias.end()) |
4359 | { |
4360 | uint32_t alias_id = ir.increase_bound_by(count: 1); |
4361 | auto &m = ir.meta[alias_id]; |
4362 | if (auto *input_m = ir.find_meta(id)) |
4363 | m = *input_m; |
4364 | |
4365 | const char *prefix; |
4366 | if (precision == Options::Mediump) |
4367 | { |
4368 | set_decoration(id: alias_id, decoration: DecorationRelaxedPrecision); |
4369 | prefix = "mp_copy_" ; |
4370 | } |
4371 | else |
4372 | { |
4373 | unset_decoration(id: alias_id, decoration: DecorationRelaxedPrecision); |
4374 | prefix = "hp_copy_" ; |
4375 | } |
4376 | |
4377 | auto alias_name = join(ts&: prefix, ts: to_name(id)); |
4378 | ParsedIR::sanitize_underscores(str&: alias_name); |
4379 | set_name(id: alias_id, name: alias_name); |
4380 | |
4381 | emit_op(result_type: type_id, result_id: alias_id, rhs: to_expression(id), forward_rhs: true); |
4382 | temporary_to_mirror_precision_alias[id] = alias_id; |
4383 | forced_temporaries.insert(x: id); |
4384 | forced_temporaries.insert(x: alias_id); |
4385 | force_recompile_guarantee_forward_progress(); |
4386 | id = alias_id; |
4387 | } |
4388 | else |
4389 | { |
4390 | id = itr->second; |
4391 | } |
4392 | |
4393 | return id; |
4394 | } |
4395 | |
4396 | void CompilerGLSL::handle_invalid_expression(uint32_t id) |
4397 | { |
4398 | // We tried to read an invalidated expression. |
4399 | // This means we need another pass at compilation, but next time, |
4400 | // force temporary variables so that they cannot be invalidated. |
4401 | force_temporary_and_recompile(id); |
4402 | |
4403 | // If the invalid expression happened as a result of a CompositeInsert |
4404 | // overwrite, we must block this from happening next iteration. |
4405 | if (composite_insert_overwritten.count(x: id)) |
4406 | block_composite_insert_overwrite.insert(x: id); |
4407 | } |
4408 | |
4409 | // Converts the format of the current expression from packed to unpacked, |
4410 | // by wrapping the expression in a constructor of the appropriate type. |
4411 | // GLSL does not support packed formats, so simply return the expression. |
4412 | // Subclasses that do will override. |
4413 | string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) |
4414 | { |
4415 | return expr_str; |
4416 | } |
4417 | |
4418 | // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. |
4419 | void CompilerGLSL::strip_enclosed_expression(string &expr) |
4420 | { |
4421 | if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') |
4422 | return; |
4423 | |
4424 | // Have to make sure that our first and last parens actually enclose everything inside it. |
4425 | uint32_t paren_count = 0; |
4426 | for (auto &c : expr) |
4427 | { |
4428 | if (c == '(') |
4429 | paren_count++; |
4430 | else if (c == ')') |
4431 | { |
4432 | paren_count--; |
4433 | |
4434 | // If we hit 0 and this is not the final char, our first and final parens actually don't |
4435 | // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). |
4436 | if (paren_count == 0 && &c != &expr.back()) |
4437 | return; |
4438 | } |
4439 | } |
4440 | expr.erase(pos: expr.size() - 1, n: 1); |
4441 | expr.erase(position: begin(cont&: expr)); |
4442 | } |
4443 | |
4444 | string CompilerGLSL::enclose_expression(const string &expr) |
4445 | { |
4446 | bool need_parens = false; |
4447 | |
4448 | // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back |
4449 | // unary expressions. |
4450 | if (!expr.empty()) |
4451 | { |
4452 | auto c = expr.front(); |
4453 | if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') |
4454 | need_parens = true; |
4455 | } |
4456 | |
4457 | if (!need_parens) |
4458 | { |
4459 | uint32_t paren_count = 0; |
4460 | for (auto c : expr) |
4461 | { |
4462 | if (c == '(' || c == '[') |
4463 | paren_count++; |
4464 | else if (c == ')' || c == ']') |
4465 | { |
4466 | assert(paren_count); |
4467 | paren_count--; |
4468 | } |
4469 | else if (c == ' ' && paren_count == 0) |
4470 | { |
4471 | need_parens = true; |
4472 | break; |
4473 | } |
4474 | } |
4475 | assert(paren_count == 0); |
4476 | } |
4477 | |
4478 | // If this expression contains any spaces which are not enclosed by parentheses, |
4479 | // we need to enclose it so we can treat the whole string as an expression. |
4480 | // This happens when two expressions have been part of a binary op earlier. |
4481 | if (need_parens) |
4482 | return join(ts: '(', ts: expr, ts: ')'); |
4483 | else |
4484 | return expr; |
4485 | } |
4486 | |
4487 | string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) |
4488 | { |
4489 | // If this expression starts with an address-of operator ('&'), then |
4490 | // just return the part after the operator. |
4491 | // TODO: Strip parens if unnecessary? |
4492 | if (expr.front() == '&') |
4493 | return expr.substr(pos: 1); |
4494 | else if (backend.native_pointers) |
4495 | return join(ts: '*', ts: expr); |
4496 | else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && |
4497 | expr_type.pointer_depth == 1) |
4498 | { |
4499 | return join(ts: enclose_expression(expr), ts: ".value" ); |
4500 | } |
4501 | else |
4502 | return expr; |
4503 | } |
4504 | |
4505 | string CompilerGLSL::address_of_expression(const std::string &expr) |
4506 | { |
4507 | if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') |
4508 | { |
4509 | // If we have an expression which looks like (*foo), taking the address of it is the same as stripping |
4510 | // the first two and last characters. We might have to enclose the expression. |
4511 | // This doesn't work for cases like (*foo + 10), |
4512 | // but this is an r-value expression which we cannot take the address of anyways. |
4513 | return enclose_expression(expr: expr.substr(pos: 2, n: expr.size() - 3)); |
4514 | } |
4515 | else if (expr.front() == '*') |
4516 | { |
4517 | // If this expression starts with a dereference operator ('*'), then |
4518 | // just return the part after the operator. |
4519 | return expr.substr(pos: 1); |
4520 | } |
4521 | else |
4522 | return join(ts: '&', ts: enclose_expression(expr)); |
4523 | } |
4524 | |
4525 | // Just like to_expression except that we enclose the expression inside parentheses if needed. |
4526 | string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) |
4527 | { |
4528 | return enclose_expression(expr: to_expression(id, register_expression_read)); |
4529 | } |
4530 | |
4531 | // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. |
4532 | // need_transpose must be forced to false. |
4533 | string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) |
4534 | { |
4535 | return unpack_expression_type(expr_str: to_expression(id), expression_type(id), |
4536 | get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID), |
4537 | has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), true); |
4538 | } |
4539 | |
4540 | string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) |
4541 | { |
4542 | // If we need to transpose, it will also take care of unpacking rules. |
4543 | auto *e = maybe_get<SPIRExpression>(id); |
4544 | bool need_transpose = e && e->need_transpose; |
4545 | bool is_remapped = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
4546 | bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
4547 | |
4548 | if (!need_transpose && (is_remapped || is_packed)) |
4549 | { |
4550 | return unpack_expression_type(expr_str: to_expression(id, register_expression_read), |
4551 | get_pointee_type(type_id: expression_type_id(id)), |
4552 | get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID), |
4553 | has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), false); |
4554 | } |
4555 | else |
4556 | return to_expression(id, register_expression_read); |
4557 | } |
4558 | |
4559 | string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) |
4560 | { |
4561 | return enclose_expression(expr: to_unpacked_expression(id, register_expression_read)); |
4562 | } |
4563 | |
4564 | string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) |
4565 | { |
4566 | auto &type = expression_type(id); |
4567 | if (type.pointer && should_dereference(id)) |
4568 | return dereference_expression(expr_type: type, expr: to_enclosed_expression(id, register_expression_read)); |
4569 | else |
4570 | return to_expression(id, register_expression_read); |
4571 | } |
4572 | |
4573 | string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) |
4574 | { |
4575 | auto &type = expression_type(id); |
4576 | if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) |
4577 | return address_of_expression(expr: to_enclosed_expression(id, register_expression_read)); |
4578 | else |
4579 | return to_unpacked_expression(id, register_expression_read); |
4580 | } |
4581 | |
4582 | string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) |
4583 | { |
4584 | auto &type = expression_type(id); |
4585 | if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) |
4586 | return address_of_expression(expr: to_enclosed_expression(id, register_expression_read)); |
4587 | else |
4588 | return to_enclosed_unpacked_expression(id, register_expression_read); |
4589 | } |
4590 | |
4591 | string CompilerGLSL::(uint32_t id, uint32_t index) |
4592 | { |
4593 | auto expr = to_enclosed_expression(id); |
4594 | if (has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
4595 | return join(ts&: expr, ts: "[" , ts&: index, ts: "]" ); |
4596 | else |
4597 | return join(ts&: expr, ts: "." , ts: index_to_swizzle(index)); |
4598 | } |
4599 | |
4600 | string CompilerGLSL::(uint32_t result_type, const SPIRConstant &c, |
4601 | const uint32_t *chain, uint32_t length) |
4602 | { |
4603 | // It is kinda silly if application actually enter this path since they know the constant up front. |
4604 | // It is useful here to extract the plain constant directly. |
4605 | SPIRConstant tmp; |
4606 | tmp.constant_type = result_type; |
4607 | auto &composite_type = get<SPIRType>(id: c.constant_type); |
4608 | assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty()); |
4609 | assert(!c.specialization); |
4610 | |
4611 | if (is_matrix(type: composite_type)) |
4612 | { |
4613 | if (length == 2) |
4614 | { |
4615 | tmp.m.c[0].vecsize = 1; |
4616 | tmp.m.columns = 1; |
4617 | tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]]; |
4618 | } |
4619 | else |
4620 | { |
4621 | assert(length == 1); |
4622 | tmp.m.c[0].vecsize = composite_type.vecsize; |
4623 | tmp.m.columns = 1; |
4624 | tmp.m.c[0] = c.m.c[chain[0]]; |
4625 | } |
4626 | } |
4627 | else |
4628 | { |
4629 | assert(length == 1); |
4630 | tmp.m.c[0].vecsize = 1; |
4631 | tmp.m.columns = 1; |
4632 | tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]]; |
4633 | } |
4634 | |
4635 | return constant_expression(c: tmp); |
4636 | } |
4637 | |
4638 | string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) |
4639 | { |
4640 | uint32_t size = to_array_size_literal(type); |
4641 | auto &parent = get<SPIRType>(id: type.parent_type); |
4642 | string expr = "{ " ; |
4643 | |
4644 | for (uint32_t i = 0; i < size; i++) |
4645 | { |
4646 | auto subexpr = join(ts: base_expr, ts: "[" , ts: convert_to_string(t: i), ts: "]" ); |
4647 | if (parent.array.empty()) |
4648 | expr += subexpr; |
4649 | else |
4650 | expr += to_rerolled_array_expression(base_expr: subexpr, type: parent); |
4651 | |
4652 | if (i + 1 < size) |
4653 | expr += ", " ; |
4654 | } |
4655 | |
4656 | expr += " }" ; |
4657 | return expr; |
4658 | } |
4659 | |
4660 | string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type) |
4661 | { |
4662 | auto &type = expression_type(id); |
4663 | |
4664 | bool reroll_array = !type.array.empty() && |
4665 | (!backend.array_is_value_type || |
4666 | (block_like_type && !backend.array_is_value_type_in_buffer_blocks)); |
4667 | |
4668 | if (reroll_array) |
4669 | { |
4670 | // For this case, we need to "re-roll" an array initializer from a temporary. |
4671 | // We cannot simply pass the array directly, since it decays to a pointer and it cannot |
4672 | // participate in a struct initializer. E.g. |
4673 | // float arr[2] = { 1.0, 2.0 }; |
4674 | // Foo foo = { arr }; must be transformed to |
4675 | // Foo foo = { { arr[0], arr[1] } }; |
4676 | // The array sizes cannot be deduced from specialization constants since we cannot use any loops. |
4677 | |
4678 | // We're only triggering one read of the array expression, but this is fine since arrays have to be declared |
4679 | // as temporaries anyways. |
4680 | return to_rerolled_array_expression(base_expr: to_enclosed_expression(id), type); |
4681 | } |
4682 | else |
4683 | return to_unpacked_expression(id); |
4684 | } |
4685 | |
4686 | string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) |
4687 | { |
4688 | string expr = to_expression(id); |
4689 | |
4690 | if (has_decoration(id, decoration: DecorationNonUniform)) |
4691 | convert_non_uniform_expression(expr, ptr_id: id); |
4692 | |
4693 | return expr; |
4694 | } |
4695 | |
4696 | string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) |
4697 | { |
4698 | auto itr = invalid_expressions.find(x: id); |
4699 | if (itr != end(cont&: invalid_expressions)) |
4700 | handle_invalid_expression(id); |
4701 | |
4702 | if (ir.ids[id].get_type() == TypeExpression) |
4703 | { |
4704 | // We might have a more complex chain of dependencies. |
4705 | // A possible scenario is that we |
4706 | // |
4707 | // %1 = OpLoad |
4708 | // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. |
4709 | // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. |
4710 | // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. |
4711 | // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. |
4712 | // |
4713 | // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, |
4714 | // and see that we should not forward reads of the original variable. |
4715 | auto &expr = get<SPIRExpression>(id); |
4716 | for (uint32_t dep : expr.expression_dependencies) |
4717 | if (invalid_expressions.find(x: dep) != end(cont&: invalid_expressions)) |
4718 | handle_invalid_expression(id: dep); |
4719 | } |
4720 | |
4721 | if (register_expression_read) |
4722 | track_expression_read(id); |
4723 | |
4724 | switch (ir.ids[id].get_type()) |
4725 | { |
4726 | case TypeExpression: |
4727 | { |
4728 | auto &e = get<SPIRExpression>(id); |
4729 | if (e.base_expression) |
4730 | return to_enclosed_expression(id: e.base_expression) + e.expression; |
4731 | else if (e.need_transpose) |
4732 | { |
4733 | // This should not be reached for access chains, since we always deal explicitly with transpose state |
4734 | // when consuming an access chain expression. |
4735 | uint32_t physical_type_id = get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
4736 | bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
4737 | return convert_row_major_matrix(exp_str: e.expression, exp_type: get<SPIRType>(id: e.expression_type), physical_type_id, |
4738 | is_packed); |
4739 | } |
4740 | else if (flattened_structs.count(x: id)) |
4741 | { |
4742 | return load_flattened_struct(basename: e.expression, type: get<SPIRType>(id: e.expression_type)); |
4743 | } |
4744 | else |
4745 | { |
4746 | if (is_forcing_recompilation()) |
4747 | { |
4748 | // During first compilation phase, certain expression patterns can trigger exponential growth of memory. |
4749 | // Avoid this by returning dummy expressions during this phase. |
4750 | // Do not use empty expressions here, because those are sentinels for other cases. |
4751 | return "_" ; |
4752 | } |
4753 | else |
4754 | return e.expression; |
4755 | } |
4756 | } |
4757 | |
4758 | case TypeConstant: |
4759 | { |
4760 | auto &c = get<SPIRConstant>(id); |
4761 | auto &type = get<SPIRType>(id: c.constant_type); |
4762 | |
4763 | // WorkGroupSize may be a constant. |
4764 | if (has_decoration(id: c.self, decoration: DecorationBuiltIn)) |
4765 | return builtin_to_glsl(builtin: BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)), storage: StorageClassGeneric); |
4766 | else if (c.specialization) |
4767 | { |
4768 | if (backend.workgroup_size_is_hidden) |
4769 | { |
4770 | int wg_index = get_constant_mapping_to_workgroup_component(c); |
4771 | if (wg_index >= 0) |
4772 | { |
4773 | auto wg_size = join(ts: builtin_to_glsl(builtin: BuiltInWorkgroupSize, storage: StorageClassInput), ts: vector_swizzle(vecsize: 1, index: wg_index)); |
4774 | if (type.basetype != SPIRType::UInt) |
4775 | wg_size = bitcast_expression(target_type: type, expr_type: SPIRType::UInt, expr: wg_size); |
4776 | return wg_size; |
4777 | } |
4778 | } |
4779 | |
4780 | return to_name(id); |
4781 | } |
4782 | else if (c.is_used_as_lut) |
4783 | return to_name(id); |
4784 | else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) |
4785 | return to_name(id); |
4786 | else if (!type.array.empty() && !backend.can_declare_arrays_inline) |
4787 | return to_name(id); |
4788 | else |
4789 | return constant_expression(c); |
4790 | } |
4791 | |
4792 | case TypeConstantOp: |
4793 | return to_name(id); |
4794 | |
4795 | case TypeVariable: |
4796 | { |
4797 | auto &var = get<SPIRVariable>(id); |
4798 | // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, |
4799 | // the variable has not been declared yet. |
4800 | if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) |
4801 | { |
4802 | // We might try to load from a loop variable before it has been initialized. |
4803 | // Prefer static expression and fallback to initializer. |
4804 | if (var.static_expression) |
4805 | return to_expression(id: var.static_expression); |
4806 | else if (var.initializer) |
4807 | return to_expression(id: var.initializer); |
4808 | else |
4809 | { |
4810 | // We cannot declare the variable yet, so have to fake it. |
4811 | uint32_t undef_id = ir.increase_bound_by(count: 1); |
4812 | return emit_uninitialized_temporary_expression(type: get_variable_data_type_id(var), id: undef_id).expression; |
4813 | } |
4814 | } |
4815 | else if (var.deferred_declaration) |
4816 | { |
4817 | var.deferred_declaration = false; |
4818 | return variable_decl(variable: var); |
4819 | } |
4820 | else if (flattened_structs.count(x: id)) |
4821 | { |
4822 | return load_flattened_struct(basename: to_name(id), type: get<SPIRType>(id: var.basetype)); |
4823 | } |
4824 | else |
4825 | { |
4826 | auto &dec = ir.meta[var.self].decoration; |
4827 | if (dec.builtin) |
4828 | return builtin_to_glsl(builtin: dec.builtin_type, storage: var.storage); |
4829 | else |
4830 | return to_name(id); |
4831 | } |
4832 | } |
4833 | |
4834 | case TypeCombinedImageSampler: |
4835 | // This type should never be taken the expression of directly. |
4836 | // The intention is that texture sampling functions will extract the image and samplers |
4837 | // separately and take their expressions as needed. |
4838 | // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler |
4839 | // expression ala sampler2D(texture, sampler). |
4840 | SPIRV_CROSS_THROW("Combined image samplers have no default expression representation." ); |
4841 | |
4842 | case TypeAccessChain: |
4843 | // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. |
4844 | SPIRV_CROSS_THROW("Access chains have no default expression representation." ); |
4845 | |
4846 | default: |
4847 | return to_name(id); |
4848 | } |
4849 | } |
4850 | |
4851 | string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) |
4852 | { |
4853 | auto &type = get<SPIRType>(id: cop.basetype); |
4854 | bool binary = false; |
4855 | bool unary = false; |
4856 | string op; |
4857 | |
4858 | if (is_legacy() && is_unsigned_opcode(op: cop.opcode)) |
4859 | SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets." ); |
4860 | |
4861 | // TODO: Find a clean way to reuse emit_instruction. |
4862 | switch (cop.opcode) |
4863 | { |
4864 | case OpSConvert: |
4865 | case OpUConvert: |
4866 | case OpFConvert: |
4867 | op = type_to_glsl_constructor(type); |
4868 | break; |
4869 | |
4870 | #define GLSL_BOP(opname, x) \ |
4871 | case Op##opname: \ |
4872 | binary = true; \ |
4873 | op = x; \ |
4874 | break |
4875 | |
4876 | #define GLSL_UOP(opname, x) \ |
4877 | case Op##opname: \ |
4878 | unary = true; \ |
4879 | op = x; \ |
4880 | break |
4881 | |
4882 | GLSL_UOP(SNegate, "-" ); |
4883 | GLSL_UOP(Not, "~" ); |
4884 | GLSL_BOP(IAdd, "+" ); |
4885 | GLSL_BOP(ISub, "-" ); |
4886 | GLSL_BOP(IMul, "*" ); |
4887 | GLSL_BOP(SDiv, "/" ); |
4888 | GLSL_BOP(UDiv, "/" ); |
4889 | GLSL_BOP(UMod, "%" ); |
4890 | GLSL_BOP(SMod, "%" ); |
4891 | GLSL_BOP(ShiftRightLogical, ">>" ); |
4892 | GLSL_BOP(ShiftRightArithmetic, ">>" ); |
4893 | GLSL_BOP(ShiftLeftLogical, "<<" ); |
4894 | GLSL_BOP(BitwiseOr, "|" ); |
4895 | GLSL_BOP(BitwiseXor, "^" ); |
4896 | GLSL_BOP(BitwiseAnd, "&" ); |
4897 | GLSL_BOP(LogicalOr, "||" ); |
4898 | GLSL_BOP(LogicalAnd, "&&" ); |
4899 | GLSL_UOP(LogicalNot, "!" ); |
4900 | GLSL_BOP(LogicalEqual, "==" ); |
4901 | GLSL_BOP(LogicalNotEqual, "!=" ); |
4902 | GLSL_BOP(IEqual, "==" ); |
4903 | GLSL_BOP(INotEqual, "!=" ); |
4904 | GLSL_BOP(ULessThan, "<" ); |
4905 | GLSL_BOP(SLessThan, "<" ); |
4906 | GLSL_BOP(ULessThanEqual, "<=" ); |
4907 | GLSL_BOP(SLessThanEqual, "<=" ); |
4908 | GLSL_BOP(UGreaterThan, ">" ); |
4909 | GLSL_BOP(SGreaterThan, ">" ); |
4910 | GLSL_BOP(UGreaterThanEqual, ">=" ); |
4911 | GLSL_BOP(SGreaterThanEqual, ">=" ); |
4912 | |
4913 | case OpSRem: |
4914 | { |
4915 | uint32_t op0 = cop.arguments[0]; |
4916 | uint32_t op1 = cop.arguments[1]; |
4917 | return join(ts: to_enclosed_expression(id: op0), ts: " - " , ts: to_enclosed_expression(id: op1), ts: " * " , ts: "(" , |
4918 | ts: to_enclosed_expression(id: op0), ts: " / " , ts: to_enclosed_expression(id: op1), ts: ")" ); |
4919 | } |
4920 | |
4921 | case OpSelect: |
4922 | { |
4923 | if (cop.arguments.size() < 3) |
4924 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp." ); |
4925 | |
4926 | // This one is pretty annoying. It's triggered from |
4927 | // uint(bool), int(bool) from spec constants. |
4928 | // In order to preserve its compile-time constness in Vulkan GLSL, |
4929 | // we need to reduce the OpSelect expression back to this simplified model. |
4930 | // If we cannot, fail. |
4931 | if (to_trivial_mix_op(type, op, left: cop.arguments[2], right: cop.arguments[1], lerp: cop.arguments[0])) |
4932 | { |
4933 | // Implement as a simple cast down below. |
4934 | } |
4935 | else |
4936 | { |
4937 | // Implement a ternary and pray the compiler understands it :) |
4938 | return to_ternary_expression(result_type: type, select: cop.arguments[0], true_value: cop.arguments[1], false_value: cop.arguments[2]); |
4939 | } |
4940 | break; |
4941 | } |
4942 | |
4943 | case OpVectorShuffle: |
4944 | { |
4945 | string expr = type_to_glsl_constructor(type); |
4946 | expr += "(" ; |
4947 | |
4948 | uint32_t left_components = expression_type(id: cop.arguments[0]).vecsize; |
4949 | string left_arg = to_enclosed_expression(id: cop.arguments[0]); |
4950 | string right_arg = to_enclosed_expression(id: cop.arguments[1]); |
4951 | |
4952 | for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) |
4953 | { |
4954 | uint32_t index = cop.arguments[i]; |
4955 | if (index >= left_components) |
4956 | expr += right_arg + "." + "xyzw" [index - left_components]; |
4957 | else |
4958 | expr += left_arg + "." + "xyzw" [index]; |
4959 | |
4960 | if (i + 1 < uint32_t(cop.arguments.size())) |
4961 | expr += ", " ; |
4962 | } |
4963 | |
4964 | expr += ")" ; |
4965 | return expr; |
4966 | } |
4967 | |
4968 | case OpCompositeExtract: |
4969 | { |
4970 | auto expr = access_chain_internal(base: cop.arguments[0], indices: &cop.arguments[1], count: uint32_t(cop.arguments.size() - 1), |
4971 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
4972 | return expr; |
4973 | } |
4974 | |
4975 | case OpCompositeInsert: |
4976 | SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported." ); |
4977 | |
4978 | default: |
4979 | // Some opcodes are unimplemented here, these are currently not possible to test from glslang. |
4980 | SPIRV_CROSS_THROW("Unimplemented spec constant op." ); |
4981 | } |
4982 | |
4983 | uint32_t bit_width = 0; |
4984 | if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) |
4985 | bit_width = expression_type(id: cop.arguments[0]).width; |
4986 | |
4987 | SPIRType::BaseType input_type; |
4988 | bool skip_cast_if_equal_type = opcode_is_sign_invariant(opcode: cop.opcode); |
4989 | |
4990 | switch (cop.opcode) |
4991 | { |
4992 | case OpIEqual: |
4993 | case OpINotEqual: |
4994 | input_type = to_signed_basetype(width: bit_width); |
4995 | break; |
4996 | |
4997 | case OpSLessThan: |
4998 | case OpSLessThanEqual: |
4999 | case OpSGreaterThan: |
5000 | case OpSGreaterThanEqual: |
5001 | case OpSMod: |
5002 | case OpSDiv: |
5003 | case OpShiftRightArithmetic: |
5004 | case OpSConvert: |
5005 | case OpSNegate: |
5006 | input_type = to_signed_basetype(width: bit_width); |
5007 | break; |
5008 | |
5009 | case OpULessThan: |
5010 | case OpULessThanEqual: |
5011 | case OpUGreaterThan: |
5012 | case OpUGreaterThanEqual: |
5013 | case OpUMod: |
5014 | case OpUDiv: |
5015 | case OpShiftRightLogical: |
5016 | case OpUConvert: |
5017 | input_type = to_unsigned_basetype(width: bit_width); |
5018 | break; |
5019 | |
5020 | default: |
5021 | input_type = type.basetype; |
5022 | break; |
5023 | } |
5024 | |
5025 | #undef GLSL_BOP |
5026 | #undef GLSL_UOP |
5027 | if (binary) |
5028 | { |
5029 | if (cop.arguments.size() < 2) |
5030 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp." ); |
5031 | |
5032 | string cast_op0; |
5033 | string cast_op1; |
5034 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0: cop.arguments[0], |
5035 | op1: cop.arguments[1], skip_cast_if_equal_type); |
5036 | |
5037 | if (type.basetype != input_type && type.basetype != SPIRType::Boolean) |
5038 | { |
5039 | expected_type.basetype = input_type; |
5040 | auto expr = bitcast_glsl_op(result_type: type, argument_type: expected_type); |
5041 | expr += '('; |
5042 | expr += join(ts&: cast_op0, ts: " " , ts&: op, ts: " " , ts&: cast_op1); |
5043 | expr += ')'; |
5044 | return expr; |
5045 | } |
5046 | else |
5047 | return join(ts: "(" , ts&: cast_op0, ts: " " , ts&: op, ts: " " , ts&: cast_op1, ts: ")" ); |
5048 | } |
5049 | else if (unary) |
5050 | { |
5051 | if (cop.arguments.size() < 1) |
5052 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp." ); |
5053 | |
5054 | // Auto-bitcast to result type as needed. |
5055 | // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. |
5056 | return join(ts: "(" , ts&: op, ts: bitcast_glsl(result_type: type, arg: cop.arguments[0]), ts: ")" ); |
5057 | } |
5058 | else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) |
5059 | { |
5060 | if (cop.arguments.size() < 1) |
5061 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp." ); |
5062 | |
5063 | auto &arg_type = expression_type(id: cop.arguments[0]); |
5064 | if (arg_type.width < type.width && input_type != arg_type.basetype) |
5065 | { |
5066 | auto expected = arg_type; |
5067 | expected.basetype = input_type; |
5068 | return join(ts&: op, ts: "(" , ts: bitcast_glsl(result_type: expected, arg: cop.arguments[0]), ts: ")" ); |
5069 | } |
5070 | else |
5071 | return join(ts&: op, ts: "(" , ts: to_expression(id: cop.arguments[0]), ts: ")" ); |
5072 | } |
5073 | else |
5074 | { |
5075 | if (cop.arguments.size() < 1) |
5076 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp." ); |
5077 | return join(ts&: op, ts: "(" , ts: to_expression(id: cop.arguments[0]), ts: ")" ); |
5078 | } |
5079 | } |
5080 | |
5081 | string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope) |
5082 | { |
5083 | auto &type = get<SPIRType>(id: c.constant_type); |
5084 | |
5085 | if (type.pointer) |
5086 | { |
5087 | return backend.null_pointer_literal; |
5088 | } |
5089 | else if (!c.subconstants.empty()) |
5090 | { |
5091 | // Handles Arrays and structures. |
5092 | string res; |
5093 | |
5094 | // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration. |
5095 | // Outside a block-like struct declaration, we can always bind to a constant array with templated type. |
5096 | // Should look at ArrayStride here as well, but it's possible to declare a constant struct |
5097 | // with Offset = 0, using no ArrayStride on the enclosed array type. |
5098 | // A particular CTS test hits this scenario. |
5099 | bool array_type_decays = inside_block_like_struct_scope && |
5100 | !type.array.empty() && !backend.array_is_value_type_in_buffer_blocks; |
5101 | |
5102 | // Allow Metal to use the array<T> template to make arrays a value type |
5103 | bool needs_trailing_tracket = false; |
5104 | if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && |
5105 | type.array.empty()) |
5106 | { |
5107 | res = type_to_glsl_constructor(type) + "{ " ; |
5108 | } |
5109 | else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && |
5110 | !type.array.empty() && !array_type_decays) |
5111 | { |
5112 | res = type_to_glsl_constructor(type) + "({ " ; |
5113 | needs_trailing_tracket = true; |
5114 | } |
5115 | else if (backend.use_initializer_list) |
5116 | { |
5117 | res = "{ " ; |
5118 | } |
5119 | else |
5120 | { |
5121 | res = type_to_glsl_constructor(type) + "(" ; |
5122 | } |
5123 | |
5124 | uint32_t subconstant_index = 0; |
5125 | for (auto &elem : c.subconstants) |
5126 | { |
5127 | auto &subc = get<SPIRConstant>(id: elem); |
5128 | if (subc.specialization) |
5129 | res += to_name(id: elem); |
5130 | else |
5131 | { |
5132 | if (type.array.empty() && type.basetype == SPIRType::Struct) |
5133 | { |
5134 | // When we get down to emitting struct members, override the block-like information. |
5135 | // For constants, we can freely mix and match block-like state. |
5136 | inside_block_like_struct_scope = |
5137 | has_member_decoration(id: type.self, index: subconstant_index, decoration: DecorationOffset); |
5138 | } |
5139 | |
5140 | res += constant_expression(c: subc, inside_block_like_struct_scope); |
5141 | } |
5142 | |
5143 | if (&elem != &c.subconstants.back()) |
5144 | res += ", " ; |
5145 | |
5146 | subconstant_index++; |
5147 | } |
5148 | |
5149 | res += backend.use_initializer_list ? " }" : ")" ; |
5150 | if (needs_trailing_tracket) |
5151 | res += ")" ; |
5152 | |
5153 | return res; |
5154 | } |
5155 | else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) |
5156 | { |
5157 | // Metal tessellation likes empty structs which are then constant expressions. |
5158 | if (backend.supports_empty_struct) |
5159 | return "{ }" ; |
5160 | else if (backend.use_typed_initializer_list) |
5161 | return join(ts: type_to_glsl(type: get<SPIRType>(id: c.constant_type)), ts: "{ 0 }" ); |
5162 | else if (backend.use_initializer_list) |
5163 | return "{ 0 }" ; |
5164 | else |
5165 | return join(ts: type_to_glsl(type: get<SPIRType>(id: c.constant_type)), ts: "(0)" ); |
5166 | } |
5167 | else if (c.columns() == 1) |
5168 | { |
5169 | return constant_expression_vector(c, vector: 0); |
5170 | } |
5171 | else |
5172 | { |
5173 | string res = type_to_glsl(type: get<SPIRType>(id: c.constant_type)) + "(" ; |
5174 | for (uint32_t col = 0; col < c.columns(); col++) |
5175 | { |
5176 | if (c.specialization_constant_id(col) != 0) |
5177 | res += to_name(id: c.specialization_constant_id(col)); |
5178 | else |
5179 | res += constant_expression_vector(c, vector: col); |
5180 | |
5181 | if (col + 1 < c.columns()) |
5182 | res += ", " ; |
5183 | } |
5184 | res += ")" ; |
5185 | return res; |
5186 | } |
5187 | } |
5188 | |
5189 | #ifdef _MSC_VER |
5190 | // sprintf warning. |
5191 | // We cannot rely on snprintf existing because, ..., MSVC. |
5192 | #pragma warning(push) |
5193 | #pragma warning(disable : 4996) |
5194 | #endif |
5195 | |
5196 | string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
5197 | { |
5198 | string res; |
5199 | float float_value = c.scalar_f16(col, row); |
5200 | |
5201 | // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots |
5202 | // of complicated workarounds, just value-cast to the half type always. |
5203 | if (std::isnan(x: float_value) || std::isinf(x: float_value)) |
5204 | { |
5205 | SPIRType type; |
5206 | type.basetype = SPIRType::Half; |
5207 | type.vecsize = 1; |
5208 | type.columns = 1; |
5209 | |
5210 | if (float_value == numeric_limits<float>::infinity()) |
5211 | res = join(ts: type_to_glsl(type), ts: "(1.0 / 0.0)" ); |
5212 | else if (float_value == -numeric_limits<float>::infinity()) |
5213 | res = join(ts: type_to_glsl(type), ts: "(-1.0 / 0.0)" ); |
5214 | else if (std::isnan(x: float_value)) |
5215 | res = join(ts: type_to_glsl(type), ts: "(0.0 / 0.0)" ); |
5216 | else |
5217 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant." ); |
5218 | } |
5219 | else |
5220 | { |
5221 | SPIRType type; |
5222 | type.basetype = SPIRType::Half; |
5223 | type.vecsize = 1; |
5224 | type.columns = 1; |
5225 | res = join(ts: type_to_glsl(type), ts: "(" , ts: convert_to_string(t: float_value, locale_radix_point: current_locale_radix_character), ts: ")" ); |
5226 | } |
5227 | |
5228 | return res; |
5229 | } |
5230 | |
5231 | string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
5232 | { |
5233 | string res; |
5234 | float float_value = c.scalar_f32(col, row); |
5235 | |
5236 | if (std::isnan(x: float_value) || std::isinf(x: float_value)) |
5237 | { |
5238 | // Use special representation. |
5239 | if (!is_legacy()) |
5240 | { |
5241 | SPIRType out_type; |
5242 | SPIRType in_type; |
5243 | out_type.basetype = SPIRType::Float; |
5244 | in_type.basetype = SPIRType::UInt; |
5245 | out_type.vecsize = 1; |
5246 | in_type.vecsize = 1; |
5247 | out_type.width = 32; |
5248 | in_type.width = 32; |
5249 | |
5250 | char print_buffer[32]; |
5251 | sprintf(s: print_buffer, format: "0x%xu" , c.scalar(col, row)); |
5252 | |
5253 | const char * = "inf" ; |
5254 | if (float_value == -numeric_limits<float>::infinity()) |
5255 | comment = "-inf" ; |
5256 | else if (std::isnan(x: float_value)) |
5257 | comment = "nan" ; |
5258 | res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(" , ts&: print_buffer, ts: " /* " , ts&: comment, ts: " */)" ); |
5259 | } |
5260 | else |
5261 | { |
5262 | if (float_value == numeric_limits<float>::infinity()) |
5263 | { |
5264 | if (backend.float_literal_suffix) |
5265 | res = "(1.0f / 0.0f)" ; |
5266 | else |
5267 | res = "(1.0 / 0.0)" ; |
5268 | } |
5269 | else if (float_value == -numeric_limits<float>::infinity()) |
5270 | { |
5271 | if (backend.float_literal_suffix) |
5272 | res = "(-1.0f / 0.0f)" ; |
5273 | else |
5274 | res = "(-1.0 / 0.0)" ; |
5275 | } |
5276 | else if (std::isnan(x: float_value)) |
5277 | { |
5278 | if (backend.float_literal_suffix) |
5279 | res = "(0.0f / 0.0f)" ; |
5280 | else |
5281 | res = "(0.0 / 0.0)" ; |
5282 | } |
5283 | else |
5284 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant." ); |
5285 | } |
5286 | } |
5287 | else |
5288 | { |
5289 | res = convert_to_string(t: float_value, locale_radix_point: current_locale_radix_character); |
5290 | if (backend.float_literal_suffix) |
5291 | res += "f" ; |
5292 | } |
5293 | |
5294 | return res; |
5295 | } |
5296 | |
5297 | std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
5298 | { |
5299 | string res; |
5300 | double double_value = c.scalar_f64(col, row); |
5301 | |
5302 | if (std::isnan(x: double_value) || std::isinf(x: double_value)) |
5303 | { |
5304 | // Use special representation. |
5305 | if (!is_legacy()) |
5306 | { |
5307 | SPIRType out_type; |
5308 | SPIRType in_type; |
5309 | out_type.basetype = SPIRType::Double; |
5310 | in_type.basetype = SPIRType::UInt64; |
5311 | out_type.vecsize = 1; |
5312 | in_type.vecsize = 1; |
5313 | out_type.width = 64; |
5314 | in_type.width = 64; |
5315 | |
5316 | uint64_t u64_value = c.scalar_u64(col, row); |
5317 | |
5318 | if (options.es) |
5319 | SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile." ); |
5320 | require_extension_internal(ext: "GL_ARB_gpu_shader_int64" ); |
5321 | |
5322 | char print_buffer[64]; |
5323 | sprintf(s: print_buffer, format: "0x%llx%s" , static_cast<unsigned long long>(u64_value), |
5324 | backend.long_long_literal_suffix ? "ull" : "ul" ); |
5325 | |
5326 | const char * = "inf" ; |
5327 | if (double_value == -numeric_limits<double>::infinity()) |
5328 | comment = "-inf" ; |
5329 | else if (std::isnan(x: double_value)) |
5330 | comment = "nan" ; |
5331 | res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(" , ts&: print_buffer, ts: " /* " , ts&: comment, ts: " */)" ); |
5332 | } |
5333 | else |
5334 | { |
5335 | if (options.es) |
5336 | SPIRV_CROSS_THROW("FP64 not supported in ES profile." ); |
5337 | if (options.version < 400) |
5338 | require_extension_internal(ext: "GL_ARB_gpu_shader_fp64" ); |
5339 | |
5340 | if (double_value == numeric_limits<double>::infinity()) |
5341 | { |
5342 | if (backend.double_literal_suffix) |
5343 | res = "(1.0lf / 0.0lf)" ; |
5344 | else |
5345 | res = "(1.0 / 0.0)" ; |
5346 | } |
5347 | else if (double_value == -numeric_limits<double>::infinity()) |
5348 | { |
5349 | if (backend.double_literal_suffix) |
5350 | res = "(-1.0lf / 0.0lf)" ; |
5351 | else |
5352 | res = "(-1.0 / 0.0)" ; |
5353 | } |
5354 | else if (std::isnan(x: double_value)) |
5355 | { |
5356 | if (backend.double_literal_suffix) |
5357 | res = "(0.0lf / 0.0lf)" ; |
5358 | else |
5359 | res = "(0.0 / 0.0)" ; |
5360 | } |
5361 | else |
5362 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant." ); |
5363 | } |
5364 | } |
5365 | else |
5366 | { |
5367 | res = convert_to_string(t: double_value, locale_radix_point: current_locale_radix_character); |
5368 | if (backend.double_literal_suffix) |
5369 | res += "lf" ; |
5370 | } |
5371 | |
5372 | return res; |
5373 | } |
5374 | |
5375 | #ifdef _MSC_VER |
5376 | #pragma warning(pop) |
5377 | #endif |
5378 | |
5379 | string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) |
5380 | { |
5381 | auto type = get<SPIRType>(id: c.constant_type); |
5382 | type.columns = 1; |
5383 | |
5384 | auto scalar_type = type; |
5385 | scalar_type.vecsize = 1; |
5386 | |
5387 | string res; |
5388 | bool splat = backend.use_constructor_splatting && c.vector_size() > 1; |
5389 | bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; |
5390 | |
5391 | if (!type_is_floating_point(type)) |
5392 | { |
5393 | // Cannot swizzle literal integers as a special case. |
5394 | swizzle_splat = false; |
5395 | } |
5396 | |
5397 | if (splat || swizzle_splat) |
5398 | { |
5399 | // Cannot use constant splatting if we have specialization constants somewhere in the vector. |
5400 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5401 | { |
5402 | if (c.specialization_constant_id(col: vector, row: i) != 0) |
5403 | { |
5404 | splat = false; |
5405 | swizzle_splat = false; |
5406 | break; |
5407 | } |
5408 | } |
5409 | } |
5410 | |
5411 | if (splat || swizzle_splat) |
5412 | { |
5413 | if (type.width == 64) |
5414 | { |
5415 | uint64_t ident = c.scalar_u64(col: vector, row: 0); |
5416 | for (uint32_t i = 1; i < c.vector_size(); i++) |
5417 | { |
5418 | if (ident != c.scalar_u64(col: vector, row: i)) |
5419 | { |
5420 | splat = false; |
5421 | swizzle_splat = false; |
5422 | break; |
5423 | } |
5424 | } |
5425 | } |
5426 | else |
5427 | { |
5428 | uint32_t ident = c.scalar(col: vector, row: 0); |
5429 | for (uint32_t i = 1; i < c.vector_size(); i++) |
5430 | { |
5431 | if (ident != c.scalar(col: vector, row: i)) |
5432 | { |
5433 | splat = false; |
5434 | swizzle_splat = false; |
5435 | } |
5436 | } |
5437 | } |
5438 | } |
5439 | |
5440 | if (c.vector_size() > 1 && !swizzle_splat) |
5441 | res += type_to_glsl(type) + "(" ; |
5442 | |
5443 | switch (type.basetype) |
5444 | { |
5445 | case SPIRType::Half: |
5446 | if (splat || swizzle_splat) |
5447 | { |
5448 | res += convert_half_to_string(c, col: vector, row: 0); |
5449 | if (swizzle_splat) |
5450 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
5451 | } |
5452 | else |
5453 | { |
5454 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5455 | { |
5456 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5457 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5458 | else |
5459 | res += convert_half_to_string(c, col: vector, row: i); |
5460 | |
5461 | if (i + 1 < c.vector_size()) |
5462 | res += ", " ; |
5463 | } |
5464 | } |
5465 | break; |
5466 | |
5467 | case SPIRType::Float: |
5468 | if (splat || swizzle_splat) |
5469 | { |
5470 | res += convert_float_to_string(c, col: vector, row: 0); |
5471 | if (swizzle_splat) |
5472 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
5473 | } |
5474 | else |
5475 | { |
5476 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5477 | { |
5478 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5479 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5480 | else |
5481 | res += convert_float_to_string(c, col: vector, row: i); |
5482 | |
5483 | if (i + 1 < c.vector_size()) |
5484 | res += ", " ; |
5485 | } |
5486 | } |
5487 | break; |
5488 | |
5489 | case SPIRType::Double: |
5490 | if (splat || swizzle_splat) |
5491 | { |
5492 | res += convert_double_to_string(c, col: vector, row: 0); |
5493 | if (swizzle_splat) |
5494 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
5495 | } |
5496 | else |
5497 | { |
5498 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5499 | { |
5500 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5501 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5502 | else |
5503 | res += convert_double_to_string(c, col: vector, row: i); |
5504 | |
5505 | if (i + 1 < c.vector_size()) |
5506 | res += ", " ; |
5507 | } |
5508 | } |
5509 | break; |
5510 | |
5511 | case SPIRType::Int64: |
5512 | { |
5513 | auto tmp = type; |
5514 | tmp.vecsize = 1; |
5515 | tmp.columns = 1; |
5516 | auto int64_type = type_to_glsl(type: tmp); |
5517 | |
5518 | if (splat) |
5519 | { |
5520 | res += convert_to_string(value: c.scalar_i64(col: vector, row: 0), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix); |
5521 | } |
5522 | else |
5523 | { |
5524 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5525 | { |
5526 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5527 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5528 | else |
5529 | res += convert_to_string(value: c.scalar_i64(col: vector, row: i), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix); |
5530 | |
5531 | if (i + 1 < c.vector_size()) |
5532 | res += ", " ; |
5533 | } |
5534 | } |
5535 | break; |
5536 | } |
5537 | |
5538 | case SPIRType::UInt64: |
5539 | if (splat) |
5540 | { |
5541 | res += convert_to_string(t: c.scalar_u64(col: vector, row: 0)); |
5542 | if (backend.long_long_literal_suffix) |
5543 | res += "ull" ; |
5544 | else |
5545 | res += "ul" ; |
5546 | } |
5547 | else |
5548 | { |
5549 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5550 | { |
5551 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5552 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5553 | else |
5554 | { |
5555 | res += convert_to_string(t: c.scalar_u64(col: vector, row: i)); |
5556 | if (backend.long_long_literal_suffix) |
5557 | res += "ull" ; |
5558 | else |
5559 | res += "ul" ; |
5560 | } |
5561 | |
5562 | if (i + 1 < c.vector_size()) |
5563 | res += ", " ; |
5564 | } |
5565 | } |
5566 | break; |
5567 | |
5568 | case SPIRType::UInt: |
5569 | if (splat) |
5570 | { |
5571 | res += convert_to_string(t: c.scalar(col: vector, row: 0)); |
5572 | if (is_legacy()) |
5573 | { |
5574 | // Fake unsigned constant literals with signed ones if possible. |
5575 | // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. |
5576 | if (c.scalar_i32(col: vector, row: 0) < 0) |
5577 | SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative." ); |
5578 | } |
5579 | else if (backend.uint32_t_literal_suffix) |
5580 | res += "u" ; |
5581 | } |
5582 | else |
5583 | { |
5584 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5585 | { |
5586 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5587 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5588 | else |
5589 | { |
5590 | res += convert_to_string(t: c.scalar(col: vector, row: i)); |
5591 | if (is_legacy()) |
5592 | { |
5593 | // Fake unsigned constant literals with signed ones if possible. |
5594 | // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. |
5595 | if (c.scalar_i32(col: vector, row: i) < 0) |
5596 | SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " |
5597 | "the literal negative." ); |
5598 | } |
5599 | else if (backend.uint32_t_literal_suffix) |
5600 | res += "u" ; |
5601 | } |
5602 | |
5603 | if (i + 1 < c.vector_size()) |
5604 | res += ", " ; |
5605 | } |
5606 | } |
5607 | break; |
5608 | |
5609 | case SPIRType::Int: |
5610 | if (splat) |
5611 | res += convert_to_string(value: c.scalar_i32(col: vector, row: 0)); |
5612 | else |
5613 | { |
5614 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5615 | { |
5616 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5617 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5618 | else |
5619 | res += convert_to_string(value: c.scalar_i32(col: vector, row: i)); |
5620 | if (i + 1 < c.vector_size()) |
5621 | res += ", " ; |
5622 | } |
5623 | } |
5624 | break; |
5625 | |
5626 | case SPIRType::UShort: |
5627 | if (splat) |
5628 | { |
5629 | res += convert_to_string(t: c.scalar(col: vector, row: 0)); |
5630 | } |
5631 | else |
5632 | { |
5633 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5634 | { |
5635 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5636 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5637 | else |
5638 | { |
5639 | if (*backend.uint16_t_literal_suffix) |
5640 | { |
5641 | res += convert_to_string(t: c.scalar_u16(col: vector, row: i)); |
5642 | res += backend.uint16_t_literal_suffix; |
5643 | } |
5644 | else |
5645 | { |
5646 | // If backend doesn't have a literal suffix, we need to value cast. |
5647 | res += type_to_glsl(type: scalar_type); |
5648 | res += "(" ; |
5649 | res += convert_to_string(t: c.scalar_u16(col: vector, row: i)); |
5650 | res += ")" ; |
5651 | } |
5652 | } |
5653 | |
5654 | if (i + 1 < c.vector_size()) |
5655 | res += ", " ; |
5656 | } |
5657 | } |
5658 | break; |
5659 | |
5660 | case SPIRType::Short: |
5661 | if (splat) |
5662 | { |
5663 | res += convert_to_string(t: c.scalar_i16(col: vector, row: 0)); |
5664 | } |
5665 | else |
5666 | { |
5667 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5668 | { |
5669 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5670 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5671 | else |
5672 | { |
5673 | if (*backend.int16_t_literal_suffix) |
5674 | { |
5675 | res += convert_to_string(t: c.scalar_i16(col: vector, row: i)); |
5676 | res += backend.int16_t_literal_suffix; |
5677 | } |
5678 | else |
5679 | { |
5680 | // If backend doesn't have a literal suffix, we need to value cast. |
5681 | res += type_to_glsl(type: scalar_type); |
5682 | res += "(" ; |
5683 | res += convert_to_string(t: c.scalar_i16(col: vector, row: i)); |
5684 | res += ")" ; |
5685 | } |
5686 | } |
5687 | |
5688 | if (i + 1 < c.vector_size()) |
5689 | res += ", " ; |
5690 | } |
5691 | } |
5692 | break; |
5693 | |
5694 | case SPIRType::UByte: |
5695 | if (splat) |
5696 | { |
5697 | res += convert_to_string(t: c.scalar_u8(col: vector, row: 0)); |
5698 | } |
5699 | else |
5700 | { |
5701 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5702 | { |
5703 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5704 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5705 | else |
5706 | { |
5707 | res += type_to_glsl(type: scalar_type); |
5708 | res += "(" ; |
5709 | res += convert_to_string(t: c.scalar_u8(col: vector, row: i)); |
5710 | res += ")" ; |
5711 | } |
5712 | |
5713 | if (i + 1 < c.vector_size()) |
5714 | res += ", " ; |
5715 | } |
5716 | } |
5717 | break; |
5718 | |
5719 | case SPIRType::SByte: |
5720 | if (splat) |
5721 | { |
5722 | res += convert_to_string(t: c.scalar_i8(col: vector, row: 0)); |
5723 | } |
5724 | else |
5725 | { |
5726 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5727 | { |
5728 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5729 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5730 | else |
5731 | { |
5732 | res += type_to_glsl(type: scalar_type); |
5733 | res += "(" ; |
5734 | res += convert_to_string(t: c.scalar_i8(col: vector, row: i)); |
5735 | res += ")" ; |
5736 | } |
5737 | |
5738 | if (i + 1 < c.vector_size()) |
5739 | res += ", " ; |
5740 | } |
5741 | } |
5742 | break; |
5743 | |
5744 | case SPIRType::Boolean: |
5745 | if (splat) |
5746 | res += c.scalar(col: vector, row: 0) ? "true" : "false" ; |
5747 | else |
5748 | { |
5749 | for (uint32_t i = 0; i < c.vector_size(); i++) |
5750 | { |
5751 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
5752 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
5753 | else |
5754 | res += c.scalar(col: vector, row: i) ? "true" : "false" ; |
5755 | |
5756 | if (i + 1 < c.vector_size()) |
5757 | res += ", " ; |
5758 | } |
5759 | } |
5760 | break; |
5761 | |
5762 | default: |
5763 | SPIRV_CROSS_THROW("Invalid constant expression basetype." ); |
5764 | } |
5765 | |
5766 | if (c.vector_size() > 1 && !swizzle_splat) |
5767 | res += ")" ; |
5768 | |
5769 | return res; |
5770 | } |
5771 | |
5772 | SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) |
5773 | { |
5774 | forced_temporaries.insert(x: id); |
5775 | emit_uninitialized_temporary(type, id); |
5776 | return set<SPIRExpression>(id, args: to_name(id), args&: type, args: true); |
5777 | } |
5778 | |
5779 | void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) |
5780 | { |
5781 | // If we're declaring temporaries inside continue blocks, |
5782 | // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. |
5783 | if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id)) |
5784 | { |
5785 | auto & = get<SPIRBlock>(id: current_continue_block->loop_dominator); |
5786 | if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary), |
5787 | pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { |
5788 | return tmp.first == result_type && tmp.second == result_id; |
5789 | }) == end(cont&: header.declare_temporary)) |
5790 | { |
5791 | header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id); |
5792 | hoisted_temporaries.insert(x: result_id); |
5793 | force_recompile(); |
5794 | } |
5795 | } |
5796 | else if (hoisted_temporaries.count(x: result_id) == 0) |
5797 | { |
5798 | auto &type = get<SPIRType>(id: result_type); |
5799 | auto &flags = get_decoration_bitset(id: result_id); |
5800 | |
5801 | // The result_id has not been made into an expression yet, so use flags interface. |
5802 | add_local_variable_name(id: result_id); |
5803 | |
5804 | string initializer; |
5805 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
5806 | initializer = join(ts: " = " , ts: to_zero_initialized_expression(type_id: result_type)); |
5807 | |
5808 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts&: initializer, ts: ";" ); |
5809 | } |
5810 | } |
5811 | |
5812 | string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) |
5813 | { |
5814 | auto &type = get<SPIRType>(id: result_type); |
5815 | |
5816 | // If we're declaring temporaries inside continue blocks, |
5817 | // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. |
5818 | if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id)) |
5819 | { |
5820 | auto & = get<SPIRBlock>(id: current_continue_block->loop_dominator); |
5821 | if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary), |
5822 | pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { |
5823 | return tmp.first == result_type && tmp.second == result_id; |
5824 | }) == end(cont&: header.declare_temporary)) |
5825 | { |
5826 | header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id); |
5827 | hoisted_temporaries.insert(x: result_id); |
5828 | force_recompile_guarantee_forward_progress(); |
5829 | } |
5830 | |
5831 | return join(ts: to_name(id: result_id), ts: " = " ); |
5832 | } |
5833 | else if (hoisted_temporaries.count(x: result_id)) |
5834 | { |
5835 | // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. |
5836 | return join(ts: to_name(id: result_id), ts: " = " ); |
5837 | } |
5838 | else |
5839 | { |
5840 | // The result_id has not been made into an expression yet, so use flags interface. |
5841 | add_local_variable_name(id: result_id); |
5842 | auto &flags = get_decoration_bitset(id: result_id); |
5843 | return join(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts: " = " ); |
5844 | } |
5845 | } |
5846 | |
5847 | bool CompilerGLSL::expression_is_forwarded(uint32_t id) const |
5848 | { |
5849 | return forwarded_temporaries.count(x: id) != 0; |
5850 | } |
5851 | |
5852 | bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const |
5853 | { |
5854 | return suppressed_usage_tracking.count(x: id) != 0; |
5855 | } |
5856 | |
5857 | bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const |
5858 | { |
5859 | auto *expr = maybe_get<SPIRExpression>(id); |
5860 | if (!expr) |
5861 | return false; |
5862 | |
5863 | // If we're emitting code at a deeper loop level than when we emitted the expression, |
5864 | // we're probably reading the same expression over and over. |
5865 | return current_loop_level > expr->emitted_loop_level; |
5866 | } |
5867 | |
5868 | SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, |
5869 | bool suppress_usage_tracking) |
5870 | { |
5871 | if (forwarding && (forced_temporaries.find(x: result_id) == end(cont&: forced_temporaries))) |
5872 | { |
5873 | // Just forward it without temporary. |
5874 | // If the forward is trivial, we do not force flushing to temporary for this expression. |
5875 | forwarded_temporaries.insert(x: result_id); |
5876 | if (suppress_usage_tracking) |
5877 | suppressed_usage_tracking.insert(x: result_id); |
5878 | |
5879 | return set<SPIRExpression>(id: result_id, args: rhs, args&: result_type, args: true); |
5880 | } |
5881 | else |
5882 | { |
5883 | // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). |
5884 | statement(ts: declare_temporary(result_type, result_id), ts: rhs, ts: ";" ); |
5885 | return set<SPIRExpression>(id: result_id, args: to_name(id: result_id), args&: result_type, args: true); |
5886 | } |
5887 | } |
5888 | |
5889 | void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) |
5890 | { |
5891 | bool forward = should_forward(id: op0); |
5892 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: to_enclosed_unpacked_expression(id: op0)), forwarding: forward); |
5893 | inherit_expression_dependencies(dst: result_id, source: op0); |
5894 | } |
5895 | |
5896 | void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) |
5897 | { |
5898 | // Various FP arithmetic opcodes such as add, sub, mul will hit this. |
5899 | bool force_temporary_precise = backend.support_precise_qualifier && |
5900 | has_decoration(id: result_id, decoration: DecorationNoContraction) && |
5901 | type_is_floating_point(type: get<SPIRType>(id: result_type)); |
5902 | bool forward = should_forward(id: op0) && should_forward(id: op1) && !force_temporary_precise; |
5903 | |
5904 | emit_op(result_type, result_id, |
5905 | rhs: join(ts: to_enclosed_unpacked_expression(id: op0), ts: " " , ts&: op, ts: " " , ts: to_enclosed_unpacked_expression(id: op1)), forwarding: forward); |
5906 | |
5907 | inherit_expression_dependencies(dst: result_id, source: op0); |
5908 | inherit_expression_dependencies(dst: result_id, source: op1); |
5909 | } |
5910 | |
5911 | void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) |
5912 | { |
5913 | auto &type = get<SPIRType>(id: result_type); |
5914 | auto expr = type_to_glsl_constructor(type); |
5915 | expr += '('; |
5916 | for (uint32_t i = 0; i < type.vecsize; i++) |
5917 | { |
5918 | // Make sure to call to_expression multiple times to ensure |
5919 | // that these expressions are properly flushed to temporaries if needed. |
5920 | expr += op; |
5921 | expr += to_extract_component_expression(id: operand, index: i); |
5922 | |
5923 | if (i + 1 < type.vecsize) |
5924 | expr += ", " ; |
5925 | } |
5926 | expr += ')'; |
5927 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand)); |
5928 | |
5929 | inherit_expression_dependencies(dst: result_id, source: operand); |
5930 | } |
5931 | |
5932 | void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
5933 | const char *op, bool negate, SPIRType::BaseType expected_type) |
5934 | { |
5935 | auto &type0 = expression_type(id: op0); |
5936 | auto &type1 = expression_type(id: op1); |
5937 | |
5938 | SPIRType target_type0 = type0; |
5939 | SPIRType target_type1 = type1; |
5940 | target_type0.basetype = expected_type; |
5941 | target_type1.basetype = expected_type; |
5942 | target_type0.vecsize = 1; |
5943 | target_type1.vecsize = 1; |
5944 | |
5945 | auto &type = get<SPIRType>(id: result_type); |
5946 | auto expr = type_to_glsl_constructor(type); |
5947 | expr += '('; |
5948 | for (uint32_t i = 0; i < type.vecsize; i++) |
5949 | { |
5950 | // Make sure to call to_expression multiple times to ensure |
5951 | // that these expressions are properly flushed to temporaries if needed. |
5952 | if (negate) |
5953 | expr += "!(" ; |
5954 | |
5955 | if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) |
5956 | expr += bitcast_expression(target_type: target_type0, expr_type: type0.basetype, expr: to_extract_component_expression(id: op0, index: i)); |
5957 | else |
5958 | expr += to_extract_component_expression(id: op0, index: i); |
5959 | |
5960 | expr += ' '; |
5961 | expr += op; |
5962 | expr += ' '; |
5963 | |
5964 | if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) |
5965 | expr += bitcast_expression(target_type: target_type1, expr_type: type1.basetype, expr: to_extract_component_expression(id: op1, index: i)); |
5966 | else |
5967 | expr += to_extract_component_expression(id: op1, index: i); |
5968 | |
5969 | if (negate) |
5970 | expr += ")" ; |
5971 | |
5972 | if (i + 1 < type.vecsize) |
5973 | expr += ", " ; |
5974 | } |
5975 | expr += ')'; |
5976 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
5977 | |
5978 | inherit_expression_dependencies(dst: result_id, source: op0); |
5979 | inherit_expression_dependencies(dst: result_id, source: op1); |
5980 | } |
5981 | |
5982 | SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, |
5983 | uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) |
5984 | { |
5985 | auto &type0 = expression_type(id: op0); |
5986 | auto &type1 = expression_type(id: op1); |
5987 | |
5988 | // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. |
5989 | // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected |
5990 | // since equality test is exactly the same. |
5991 | bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); |
5992 | |
5993 | // Create a fake type so we can bitcast to it. |
5994 | // We only deal with regular arithmetic types here like int, uints and so on. |
5995 | SPIRType expected_type; |
5996 | expected_type.basetype = input_type; |
5997 | expected_type.vecsize = type0.vecsize; |
5998 | expected_type.columns = type0.columns; |
5999 | expected_type.width = type0.width; |
6000 | |
6001 | if (cast) |
6002 | { |
6003 | cast_op0 = bitcast_glsl(result_type: expected_type, arg: op0); |
6004 | cast_op1 = bitcast_glsl(result_type: expected_type, arg: op1); |
6005 | } |
6006 | else |
6007 | { |
6008 | // If we don't cast, our actual input type is that of the first (or second) argument. |
6009 | cast_op0 = to_enclosed_unpacked_expression(id: op0); |
6010 | cast_op1 = to_enclosed_unpacked_expression(id: op1); |
6011 | input_type = type0.basetype; |
6012 | } |
6013 | |
6014 | return expected_type; |
6015 | } |
6016 | |
6017 | bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) |
6018 | { |
6019 | // Some bitcasts may require complex casting sequences, and are implemented here. |
6020 | // Otherwise a simply unary function will do with bitcast_glsl_op. |
6021 | |
6022 | auto &output_type = get<SPIRType>(id: result_type); |
6023 | auto &input_type = expression_type(id: op0); |
6024 | string expr; |
6025 | |
6026 | if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) |
6027 | expr = join(ts: "unpackFloat2x16(floatBitsToUint(" , ts: to_unpacked_expression(id: op0), ts: "))" ); |
6028 | else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && |
6029 | input_type.vecsize == 2) |
6030 | expr = join(ts: "uintBitsToFloat(packFloat2x16(" , ts: to_unpacked_expression(id: op0), ts: "))" ); |
6031 | else |
6032 | return false; |
6033 | |
6034 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: op0)); |
6035 | return true; |
6036 | } |
6037 | |
6038 | void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6039 | const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) |
6040 | { |
6041 | string cast_op0, cast_op1; |
6042 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); |
6043 | auto &out_type = get<SPIRType>(id: result_type); |
6044 | |
6045 | // We might have casted away from the result type, so bitcast again. |
6046 | // For example, arithmetic right shift with uint inputs. |
6047 | // Special case boolean outputs since relational opcodes output booleans instead of int/uint. |
6048 | string expr; |
6049 | if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) |
6050 | { |
6051 | expected_type.basetype = input_type; |
6052 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6053 | expr += '('; |
6054 | expr += join(ts&: cast_op0, ts: " " , ts&: op, ts: " " , ts&: cast_op1); |
6055 | expr += ')'; |
6056 | } |
6057 | else |
6058 | expr += join(ts&: cast_op0, ts: " " , ts&: op, ts: " " , ts&: cast_op1); |
6059 | |
6060 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
6061 | inherit_expression_dependencies(dst: result_id, source: op0); |
6062 | inherit_expression_dependencies(dst: result_id, source: op1); |
6063 | } |
6064 | |
6065 | void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) |
6066 | { |
6067 | bool forward = should_forward(id: op0); |
6068 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(" , ts: to_unpacked_expression(id: op0), ts: ")" ), forwarding: forward); |
6069 | inherit_expression_dependencies(dst: result_id, source: op0); |
6070 | } |
6071 | |
6072 | void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6073 | const char *op) |
6074 | { |
6075 | bool forward = should_forward(id: op0) && should_forward(id: op1); |
6076 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(" , ts: to_unpacked_expression(id: op0), ts: ", " , ts: to_unpacked_expression(id: op1), ts: ")" ), |
6077 | forwarding: forward); |
6078 | inherit_expression_dependencies(dst: result_id, source: op0); |
6079 | inherit_expression_dependencies(dst: result_id, source: op1); |
6080 | } |
6081 | |
6082 | void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6083 | const char *op) |
6084 | { |
6085 | auto &type = get<SPIRType>(id: result_type); |
6086 | if (type_is_floating_point(type)) |
6087 | { |
6088 | if (!options.vulkan_semantics) |
6089 | SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics." ); |
6090 | if (options.es) |
6091 | SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL." ); |
6092 | require_extension_internal(ext: "GL_EXT_shader_atomic_float" ); |
6093 | } |
6094 | |
6095 | forced_temporaries.insert(x: result_id); |
6096 | emit_op(result_type, result_id, |
6097 | rhs: join(ts&: op, ts: "(" , ts: to_non_uniform_aware_expression(id: op0), ts: ", " , |
6098 | ts: to_unpacked_expression(id: op1), ts: ")" ), forwarding: false); |
6099 | flush_all_atomic_capable_variables(); |
6100 | } |
6101 | |
6102 | void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, |
6103 | uint32_t op0, uint32_t op1, uint32_t op2, |
6104 | const char *op) |
6105 | { |
6106 | forced_temporaries.insert(x: result_id); |
6107 | emit_op(result_type, result_id, |
6108 | rhs: join(ts&: op, ts: "(" , ts: to_non_uniform_aware_expression(id: op0), ts: ", " , |
6109 | ts: to_unpacked_expression(id: op1), ts: ", " , ts: to_unpacked_expression(id: op2), ts: ")" ), forwarding: false); |
6110 | flush_all_atomic_capable_variables(); |
6111 | } |
6112 | |
6113 | void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, |
6114 | SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) |
6115 | { |
6116 | auto &out_type = get<SPIRType>(id: result_type); |
6117 | auto &expr_type = expression_type(id: op0); |
6118 | auto expected_type = out_type; |
6119 | |
6120 | // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. |
6121 | expected_type.basetype = input_type; |
6122 | expected_type.width = expr_type.width; |
6123 | |
6124 | string cast_op; |
6125 | if (expr_type.basetype != input_type) |
6126 | { |
6127 | if (expr_type.basetype == SPIRType::Boolean) |
6128 | cast_op = join(ts: type_to_glsl(type: expected_type), ts: "(" , ts: to_unpacked_expression(id: op0), ts: ")" ); |
6129 | else |
6130 | cast_op = bitcast_glsl(result_type: expected_type, arg: op0); |
6131 | } |
6132 | else |
6133 | cast_op = to_unpacked_expression(id: op0); |
6134 | |
6135 | string expr; |
6136 | if (out_type.basetype != expected_result_type) |
6137 | { |
6138 | expected_type.basetype = expected_result_type; |
6139 | expected_type.width = out_type.width; |
6140 | if (out_type.basetype == SPIRType::Boolean) |
6141 | expr = type_to_glsl(type: out_type); |
6142 | else |
6143 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6144 | expr += '('; |
6145 | expr += join(ts&: op, ts: "(" , ts&: cast_op, ts: ")" ); |
6146 | expr += ')'; |
6147 | } |
6148 | else |
6149 | { |
6150 | expr += join(ts&: op, ts: "(" , ts&: cast_op, ts: ")" ); |
6151 | } |
6152 | |
6153 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0)); |
6154 | inherit_expression_dependencies(dst: result_id, source: op0); |
6155 | } |
6156 | |
6157 | // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs |
6158 | // and different vector sizes all at once. Need a special purpose method here. |
6159 | void CompilerGLSL::(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6160 | uint32_t op2, const char *op, |
6161 | SPIRType::BaseType expected_result_type, |
6162 | SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, |
6163 | SPIRType::BaseType input_type2) |
6164 | { |
6165 | auto &out_type = get<SPIRType>(id: result_type); |
6166 | auto expected_type = out_type; |
6167 | expected_type.basetype = input_type0; |
6168 | |
6169 | string cast_op0 = |
6170 | expression_type(id: op0).basetype != input_type0 ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
6171 | |
6172 | auto op1_expr = to_unpacked_expression(id: op1); |
6173 | auto op2_expr = to_unpacked_expression(id: op2); |
6174 | |
6175 | // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. |
6176 | expected_type.basetype = input_type1; |
6177 | expected_type.vecsize = 1; |
6178 | string cast_op1 = expression_type(id: op1).basetype != input_type1 ? |
6179 | join(ts: type_to_glsl_constructor(type: expected_type), ts: "(" , ts&: op1_expr, ts: ")" ) : |
6180 | op1_expr; |
6181 | |
6182 | expected_type.basetype = input_type2; |
6183 | expected_type.vecsize = 1; |
6184 | string cast_op2 = expression_type(id: op2).basetype != input_type2 ? |
6185 | join(ts: type_to_glsl_constructor(type: expected_type), ts: "(" , ts&: op2_expr, ts: ")" ) : |
6186 | op2_expr; |
6187 | |
6188 | string expr; |
6189 | if (out_type.basetype != expected_result_type) |
6190 | { |
6191 | expected_type.vecsize = out_type.vecsize; |
6192 | expected_type.basetype = expected_result_type; |
6193 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6194 | expr += '('; |
6195 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ", " , ts&: cast_op2, ts: ")" ); |
6196 | expr += ')'; |
6197 | } |
6198 | else |
6199 | { |
6200 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ", " , ts&: cast_op2, ts: ")" ); |
6201 | } |
6202 | |
6203 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2)); |
6204 | inherit_expression_dependencies(dst: result_id, source: op0); |
6205 | inherit_expression_dependencies(dst: result_id, source: op1); |
6206 | inherit_expression_dependencies(dst: result_id, source: op2); |
6207 | } |
6208 | |
6209 | void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6210 | uint32_t op2, const char *op, SPIRType::BaseType input_type) |
6211 | { |
6212 | auto &out_type = get<SPIRType>(id: result_type); |
6213 | auto expected_type = out_type; |
6214 | expected_type.basetype = input_type; |
6215 | string cast_op0 = |
6216 | expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
6217 | string cast_op1 = |
6218 | expression_type(id: op1).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op1) : to_unpacked_expression(id: op1); |
6219 | string cast_op2 = |
6220 | expression_type(id: op2).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op2) : to_unpacked_expression(id: op2); |
6221 | |
6222 | string expr; |
6223 | if (out_type.basetype != input_type) |
6224 | { |
6225 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6226 | expr += '('; |
6227 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ", " , ts&: cast_op2, ts: ")" ); |
6228 | expr += ')'; |
6229 | } |
6230 | else |
6231 | { |
6232 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ", " , ts&: cast_op2, ts: ")" ); |
6233 | } |
6234 | |
6235 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2)); |
6236 | inherit_expression_dependencies(dst: result_id, source: op0); |
6237 | inherit_expression_dependencies(dst: result_id, source: op1); |
6238 | inherit_expression_dependencies(dst: result_id, source: op2); |
6239 | } |
6240 | |
6241 | void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, |
6242 | uint32_t op1, const char *op, SPIRType::BaseType input_type) |
6243 | { |
6244 | // Special purpose method for implementing clustered subgroup opcodes. |
6245 | // Main difference is that op1 does not participate in any casting, it needs to be a literal. |
6246 | auto &out_type = get<SPIRType>(id: result_type); |
6247 | auto expected_type = out_type; |
6248 | expected_type.basetype = input_type; |
6249 | string cast_op0 = |
6250 | expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
6251 | |
6252 | string expr; |
6253 | if (out_type.basetype != input_type) |
6254 | { |
6255 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6256 | expr += '('; |
6257 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts: to_expression(id: op1), ts: ")" ); |
6258 | expr += ')'; |
6259 | } |
6260 | else |
6261 | { |
6262 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts: to_expression(id: op1), ts: ")" ); |
6263 | } |
6264 | |
6265 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0)); |
6266 | inherit_expression_dependencies(dst: result_id, source: op0); |
6267 | } |
6268 | |
6269 | void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6270 | const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) |
6271 | { |
6272 | string cast_op0, cast_op1; |
6273 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); |
6274 | auto &out_type = get<SPIRType>(id: result_type); |
6275 | |
6276 | // Special case boolean outputs since relational opcodes output booleans instead of int/uint. |
6277 | string expr; |
6278 | if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) |
6279 | { |
6280 | expected_type.basetype = input_type; |
6281 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
6282 | expr += '('; |
6283 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ")" ); |
6284 | expr += ')'; |
6285 | } |
6286 | else |
6287 | { |
6288 | expr += join(ts&: op, ts: "(" , ts&: cast_op0, ts: ", " , ts&: cast_op1, ts: ")" ); |
6289 | } |
6290 | |
6291 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
6292 | inherit_expression_dependencies(dst: result_id, source: op0); |
6293 | inherit_expression_dependencies(dst: result_id, source: op1); |
6294 | } |
6295 | |
6296 | void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6297 | uint32_t op2, const char *op) |
6298 | { |
6299 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2); |
6300 | emit_op(result_type, result_id, |
6301 | rhs: join(ts&: op, ts: "(" , ts: to_unpacked_expression(id: op0), ts: ", " , ts: to_unpacked_expression(id: op1), ts: ", " , |
6302 | ts: to_unpacked_expression(id: op2), ts: ")" ), |
6303 | forwarding: forward); |
6304 | |
6305 | inherit_expression_dependencies(dst: result_id, source: op0); |
6306 | inherit_expression_dependencies(dst: result_id, source: op1); |
6307 | inherit_expression_dependencies(dst: result_id, source: op2); |
6308 | } |
6309 | |
6310 | void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6311 | uint32_t op2, uint32_t op3, const char *op) |
6312 | { |
6313 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3); |
6314 | emit_op(result_type, result_id, |
6315 | rhs: join(ts&: op, ts: "(" , ts: to_unpacked_expression(id: op0), ts: ", " , ts: to_unpacked_expression(id: op1), ts: ", " , |
6316 | ts: to_unpacked_expression(id: op2), ts: ", " , ts: to_unpacked_expression(id: op3), ts: ")" ), |
6317 | forwarding: forward); |
6318 | |
6319 | inherit_expression_dependencies(dst: result_id, source: op0); |
6320 | inherit_expression_dependencies(dst: result_id, source: op1); |
6321 | inherit_expression_dependencies(dst: result_id, source: op2); |
6322 | inherit_expression_dependencies(dst: result_id, source: op3); |
6323 | } |
6324 | |
6325 | void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6326 | uint32_t op2, uint32_t op3, const char *op, |
6327 | SPIRType::BaseType offset_count_type) |
6328 | { |
6329 | // Only need to cast offset/count arguments. Types of base/insert must be same as result type, |
6330 | // and bitfieldInsert is sign invariant. |
6331 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3); |
6332 | |
6333 | auto op0_expr = to_unpacked_expression(id: op0); |
6334 | auto op1_expr = to_unpacked_expression(id: op1); |
6335 | auto op2_expr = to_unpacked_expression(id: op2); |
6336 | auto op3_expr = to_unpacked_expression(id: op3); |
6337 | |
6338 | SPIRType target_type; |
6339 | target_type.vecsize = 1; |
6340 | target_type.basetype = offset_count_type; |
6341 | |
6342 | if (expression_type(id: op2).basetype != offset_count_type) |
6343 | { |
6344 | // Value-cast here. Input might be 16-bit. GLSL requires int. |
6345 | op2_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(" , ts&: op2_expr, ts: ")" ); |
6346 | } |
6347 | |
6348 | if (expression_type(id: op3).basetype != offset_count_type) |
6349 | { |
6350 | // Value-cast here. Input might be 16-bit. GLSL requires int. |
6351 | op3_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(" , ts&: op3_expr, ts: ")" ); |
6352 | } |
6353 | |
6354 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(" , ts&: op0_expr, ts: ", " , ts&: op1_expr, ts: ", " , ts&: op2_expr, ts: ", " , ts&: op3_expr, ts: ")" ), |
6355 | forwarding: forward); |
6356 | |
6357 | inherit_expression_dependencies(dst: result_id, source: op0); |
6358 | inherit_expression_dependencies(dst: result_id, source: op1); |
6359 | inherit_expression_dependencies(dst: result_id, source: op2); |
6360 | inherit_expression_dependencies(dst: result_id, source: op3); |
6361 | } |
6362 | |
6363 | string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) |
6364 | { |
6365 | const char *type; |
6366 | switch (imgtype.image.dim) |
6367 | { |
6368 | case spv::Dim1D: |
6369 | // Force 2D path for ES. |
6370 | if (options.es) |
6371 | type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D" ; |
6372 | else |
6373 | type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D" ; |
6374 | break; |
6375 | case spv::Dim2D: |
6376 | type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D" ; |
6377 | break; |
6378 | case spv::Dim3D: |
6379 | type = "3D" ; |
6380 | break; |
6381 | case spv::DimCube: |
6382 | type = "Cube" ; |
6383 | break; |
6384 | case spv::DimRect: |
6385 | type = "2DRect" ; |
6386 | break; |
6387 | case spv::DimBuffer: |
6388 | type = "Buffer" ; |
6389 | break; |
6390 | case spv::DimSubpassData: |
6391 | type = "2D" ; |
6392 | break; |
6393 | default: |
6394 | type = "" ; |
6395 | break; |
6396 | } |
6397 | |
6398 | // In legacy GLSL, an extension is required for textureLod in the fragment |
6399 | // shader or textureGrad anywhere. |
6400 | bool legacy_lod_ext = false; |
6401 | auto &execution = get_entry_point(); |
6402 | if (op == "textureGrad" || op == "textureProjGrad" || |
6403 | ((op == "textureLod" || op == "textureProjLod" ) && execution.model != ExecutionModelVertex)) |
6404 | { |
6405 | if (is_legacy_es()) |
6406 | { |
6407 | legacy_lod_ext = true; |
6408 | require_extension_internal(ext: "GL_EXT_shader_texture_lod" ); |
6409 | } |
6410 | else if (is_legacy_desktop()) |
6411 | require_extension_internal(ext: "GL_ARB_shader_texture_lod" ); |
6412 | } |
6413 | |
6414 | if (op == "textureLodOffset" || op == "textureProjLodOffset" ) |
6415 | { |
6416 | if (is_legacy_es()) |
6417 | SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES" )); |
6418 | |
6419 | require_extension_internal(ext: "GL_EXT_gpu_shader4" ); |
6420 | } |
6421 | |
6422 | // GLES has very limited support for shadow samplers. |
6423 | // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, |
6424 | // everything else can just throw |
6425 | bool is_comparison = is_depth_image(type: imgtype, id: tex); |
6426 | if (is_comparison && is_legacy_es()) |
6427 | { |
6428 | if (op == "texture" || op == "textureProj" ) |
6429 | require_extension_internal(ext: "GL_EXT_shadow_samplers" ); |
6430 | else |
6431 | SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES" )); |
6432 | } |
6433 | |
6434 | if (op == "textureSize" ) |
6435 | { |
6436 | if (is_legacy_es()) |
6437 | SPIRV_CROSS_THROW("textureSize not supported in legacy ES" ); |
6438 | if (is_comparison) |
6439 | SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL" ); |
6440 | require_extension_internal(ext: "GL_EXT_gpu_shader4" ); |
6441 | } |
6442 | |
6443 | if (op == "texelFetch" && is_legacy_es()) |
6444 | SPIRV_CROSS_THROW("texelFetch not supported in legacy ES" ); |
6445 | |
6446 | bool is_es_and_depth = is_legacy_es() && is_comparison; |
6447 | std::string type_prefix = is_comparison ? "shadow" : "texture" ; |
6448 | |
6449 | if (op == "texture" ) |
6450 | return is_es_and_depth ? join(ts&: type_prefix, ts&: type, ts: "EXT" ) : join(ts&: type_prefix, ts&: type); |
6451 | else if (op == "textureLod" ) |
6452 | return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "LodEXT" : "Lod" ); |
6453 | else if (op == "textureProj" ) |
6454 | return join(ts&: type_prefix, ts&: type, ts: is_es_and_depth ? "ProjEXT" : "Proj" ); |
6455 | else if (op == "textureGrad" ) |
6456 | return join(ts&: type_prefix, ts&: type, ts: is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad" ); |
6457 | else if (op == "textureProjLod" ) |
6458 | return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "ProjLodEXT" : "ProjLod" ); |
6459 | else if (op == "textureLodOffset" ) |
6460 | return join(ts&: type_prefix, ts&: type, ts: "LodOffset" ); |
6461 | else if (op == "textureProjGrad" ) |
6462 | return join(ts&: type_prefix, ts&: type, |
6463 | ts: is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad" ); |
6464 | else if (op == "textureProjLodOffset" ) |
6465 | return join(ts&: type_prefix, ts&: type, ts: "ProjLodOffset" ); |
6466 | else if (op == "textureSize" ) |
6467 | return join(ts: "textureSize" , ts&: type); |
6468 | else if (op == "texelFetch" ) |
6469 | return join(ts: "texelFetch" , ts&: type); |
6470 | else |
6471 | { |
6472 | SPIRV_CROSS_THROW(join("Unsupported legacy texture op: " , op)); |
6473 | } |
6474 | } |
6475 | |
6476 | bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) |
6477 | { |
6478 | auto *cleft = maybe_get<SPIRConstant>(id: left); |
6479 | auto *cright = maybe_get<SPIRConstant>(id: right); |
6480 | auto &lerptype = expression_type(id: lerp); |
6481 | |
6482 | // If our targets aren't constants, we cannot use construction. |
6483 | if (!cleft || !cright) |
6484 | return false; |
6485 | |
6486 | // If our targets are spec constants, we cannot use construction. |
6487 | if (cleft->specialization || cright->specialization) |
6488 | return false; |
6489 | |
6490 | auto &value_type = get<SPIRType>(id: cleft->constant_type); |
6491 | |
6492 | if (lerptype.basetype != SPIRType::Boolean) |
6493 | return false; |
6494 | if (value_type.basetype == SPIRType::Struct || is_array(type: value_type)) |
6495 | return false; |
6496 | if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize) |
6497 | return false; |
6498 | |
6499 | // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select. |
6500 | // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly. |
6501 | // Just avoid this case. |
6502 | if (value_type.columns > 1) |
6503 | return false; |
6504 | |
6505 | // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. |
6506 | bool ret = true; |
6507 | for (uint32_t row = 0; ret && row < value_type.vecsize; row++) |
6508 | { |
6509 | switch (type.basetype) |
6510 | { |
6511 | case SPIRType::Short: |
6512 | case SPIRType::UShort: |
6513 | ret = cleft->scalar_u16(col: 0, row) == 0 && cright->scalar_u16(col: 0, row) == 1; |
6514 | break; |
6515 | |
6516 | case SPIRType::Int: |
6517 | case SPIRType::UInt: |
6518 | ret = cleft->scalar(col: 0, row) == 0 && cright->scalar(col: 0, row) == 1; |
6519 | break; |
6520 | |
6521 | case SPIRType::Half: |
6522 | ret = cleft->scalar_f16(col: 0, row) == 0.0f && cright->scalar_f16(col: 0, row) == 1.0f; |
6523 | break; |
6524 | |
6525 | case SPIRType::Float: |
6526 | ret = cleft->scalar_f32(col: 0, row) == 0.0f && cright->scalar_f32(col: 0, row) == 1.0f; |
6527 | break; |
6528 | |
6529 | case SPIRType::Double: |
6530 | ret = cleft->scalar_f64(col: 0, row) == 0.0 && cright->scalar_f64(col: 0, row) == 1.0; |
6531 | break; |
6532 | |
6533 | case SPIRType::Int64: |
6534 | case SPIRType::UInt64: |
6535 | ret = cleft->scalar_u64(col: 0, row) == 0 && cright->scalar_u64(col: 0, row) == 1; |
6536 | break; |
6537 | |
6538 | default: |
6539 | ret = false; |
6540 | break; |
6541 | } |
6542 | } |
6543 | |
6544 | if (ret) |
6545 | op = type_to_glsl_constructor(type); |
6546 | return ret; |
6547 | } |
6548 | |
6549 | string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, |
6550 | uint32_t false_value) |
6551 | { |
6552 | string expr; |
6553 | auto &lerptype = expression_type(id: select); |
6554 | |
6555 | if (lerptype.vecsize == 1) |
6556 | expr = join(ts: to_enclosed_expression(id: select), ts: " ? " , ts: to_enclosed_pointer_expression(id: true_value), ts: " : " , |
6557 | ts: to_enclosed_pointer_expression(id: false_value)); |
6558 | else |
6559 | { |
6560 | auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(id: expression, index: i); }; |
6561 | |
6562 | expr = type_to_glsl_constructor(type: restype); |
6563 | expr += "(" ; |
6564 | for (uint32_t i = 0; i < restype.vecsize; i++) |
6565 | { |
6566 | expr += swiz(select, i); |
6567 | expr += " ? " ; |
6568 | expr += swiz(true_value, i); |
6569 | expr += " : " ; |
6570 | expr += swiz(false_value, i); |
6571 | if (i + 1 < restype.vecsize) |
6572 | expr += ", " ; |
6573 | } |
6574 | expr += ")" ; |
6575 | } |
6576 | |
6577 | return expr; |
6578 | } |
6579 | |
6580 | void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) |
6581 | { |
6582 | auto &lerptype = expression_type(id: lerp); |
6583 | auto &restype = get<SPIRType>(id: result_type); |
6584 | |
6585 | // If this results in a variable pointer, assume it may be written through. |
6586 | if (restype.pointer) |
6587 | { |
6588 | register_write(chain: left); |
6589 | register_write(chain: right); |
6590 | } |
6591 | |
6592 | string mix_op; |
6593 | bool has_boolean_mix = *backend.boolean_mix_function && |
6594 | ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); |
6595 | bool trivial_mix = to_trivial_mix_op(type: restype, op&: mix_op, left, right, lerp); |
6596 | |
6597 | // Cannot use boolean mix when the lerp argument is just one boolean, |
6598 | // fall back to regular trinary statements. |
6599 | if (lerptype.vecsize == 1) |
6600 | has_boolean_mix = false; |
6601 | |
6602 | // If we can reduce the mix to a simple cast, do so. |
6603 | // This helps for cases like int(bool), uint(bool) which is implemented with |
6604 | // OpSelect bool 1 0. |
6605 | if (trivial_mix) |
6606 | { |
6607 | emit_unary_func_op(result_type, result_id: id, op0: lerp, op: mix_op.c_str()); |
6608 | } |
6609 | else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) |
6610 | { |
6611 | // Boolean mix not supported on desktop without extension. |
6612 | // Was added in OpenGL 4.5 with ES 3.1 compat. |
6613 | // |
6614 | // Could use GL_EXT_shader_integer_mix on desktop at least, |
6615 | // but Apple doesn't support it. :( |
6616 | // Just implement it as ternary expressions. |
6617 | auto expr = to_ternary_expression(restype: get<SPIRType>(id: result_type), select: lerp, true_value: right, false_value: left); |
6618 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: left) && should_forward(id: right) && should_forward(id: lerp)); |
6619 | inherit_expression_dependencies(dst: id, source: left); |
6620 | inherit_expression_dependencies(dst: id, source: right); |
6621 | inherit_expression_dependencies(dst: id, source: lerp); |
6622 | } |
6623 | else if (lerptype.basetype == SPIRType::Boolean) |
6624 | emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: backend.boolean_mix_function); |
6625 | else |
6626 | emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: "mix" ); |
6627 | } |
6628 | |
6629 | string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) |
6630 | { |
6631 | // Keep track of the array indices we have used to load the image. |
6632 | // We'll need to use the same array index into the combined image sampler array. |
6633 | auto image_expr = to_non_uniform_aware_expression(id: image_id); |
6634 | string array_expr; |
6635 | auto array_index = image_expr.find_first_of(c: '['); |
6636 | if (array_index != string::npos) |
6637 | array_expr = image_expr.substr(pos: array_index, n: string::npos); |
6638 | |
6639 | auto &args = current_function->arguments; |
6640 | |
6641 | // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect |
6642 | // all possible combinations into new sampler2D uniforms. |
6643 | auto *image = maybe_get_backing_variable(chain: image_id); |
6644 | auto *samp = maybe_get_backing_variable(chain: samp_id); |
6645 | if (image) |
6646 | image_id = image->self; |
6647 | if (samp) |
6648 | samp_id = samp->self; |
6649 | |
6650 | auto image_itr = find_if(first: begin(cont&: args), last: end(cont&: args), |
6651 | pred: [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); |
6652 | |
6653 | auto sampler_itr = find_if(first: begin(cont&: args), last: end(cont&: args), |
6654 | pred: [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); |
6655 | |
6656 | if (image_itr != end(cont&: args) || sampler_itr != end(cont&: args)) |
6657 | { |
6658 | // If any parameter originates from a parameter, we will find it in our argument list. |
6659 | bool global_image = image_itr == end(cont&: args); |
6660 | bool global_sampler = sampler_itr == end(cont&: args); |
6661 | VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(cont&: args))); |
6662 | VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(cont&: args))); |
6663 | |
6664 | auto &combined = current_function->combined_parameters; |
6665 | auto itr = find_if(first: begin(cont&: combined), last: end(cont&: combined), pred: [=](const SPIRFunction::CombinedImageSamplerParameter &p) { |
6666 | return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && |
6667 | p.sampler_id == sid; |
6668 | }); |
6669 | |
6670 | if (itr != end(cont&: combined)) |
6671 | return to_expression(id: itr->id) + array_expr; |
6672 | else |
6673 | { |
6674 | SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " |
6675 | "build_combined_image_samplers() used " |
6676 | "before compile() was called?" ); |
6677 | } |
6678 | } |
6679 | else |
6680 | { |
6681 | // For global sampler2D, look directly at the global remapping table. |
6682 | auto &mapping = combined_image_samplers; |
6683 | auto itr = find_if(first: begin(cont&: mapping), last: end(cont&: mapping), pred: [image_id, samp_id](const CombinedImageSampler &combined) { |
6684 | return combined.image_id == image_id && combined.sampler_id == samp_id; |
6685 | }); |
6686 | |
6687 | if (itr != end(cont&: combined_image_samplers)) |
6688 | return to_expression(id: itr->combined_id) + array_expr; |
6689 | else |
6690 | { |
6691 | SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " |
6692 | "before compile() was called?" ); |
6693 | } |
6694 | } |
6695 | } |
6696 | |
6697 | bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) |
6698 | { |
6699 | switch (op) |
6700 | { |
6701 | case OpGroupNonUniformElect: |
6702 | case OpGroupNonUniformBallot: |
6703 | case OpGroupNonUniformBallotFindLSB: |
6704 | case OpGroupNonUniformBallotFindMSB: |
6705 | case OpGroupNonUniformBroadcast: |
6706 | case OpGroupNonUniformBroadcastFirst: |
6707 | case OpGroupNonUniformAll: |
6708 | case OpGroupNonUniformAny: |
6709 | case OpGroupNonUniformAllEqual: |
6710 | case OpControlBarrier: |
6711 | case OpMemoryBarrier: |
6712 | case OpGroupNonUniformBallotBitCount: |
6713 | case OpGroupNonUniformBallotBitExtract: |
6714 | case OpGroupNonUniformInverseBallot: |
6715 | return true; |
6716 | default: |
6717 | return false; |
6718 | } |
6719 | } |
6720 | |
6721 | void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) |
6722 | { |
6723 | if (options.vulkan_semantics && combined_image_samplers.empty()) |
6724 | { |
6725 | emit_binary_func_op(result_type, result_id, op0: image_id, op1: samp_id, |
6726 | op: type_to_glsl(type: get<SPIRType>(id: result_type), id: result_id).c_str()); |
6727 | } |
6728 | else |
6729 | { |
6730 | // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. |
6731 | emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forwarding: true, suppress_usage_tracking: true); |
6732 | } |
6733 | |
6734 | // Make sure to suppress usage tracking and any expression invalidation. |
6735 | // It is illegal to create temporaries of opaque types. |
6736 | forwarded_temporaries.erase(x: result_id); |
6737 | } |
6738 | |
6739 | static inline bool image_opcode_is_sample_no_dref(Op op) |
6740 | { |
6741 | switch (op) |
6742 | { |
6743 | case OpImageSampleExplicitLod: |
6744 | case OpImageSampleImplicitLod: |
6745 | case OpImageSampleProjExplicitLod: |
6746 | case OpImageSampleProjImplicitLod: |
6747 | case OpImageFetch: |
6748 | case OpImageRead: |
6749 | case OpImageSparseSampleExplicitLod: |
6750 | case OpImageSparseSampleImplicitLod: |
6751 | case OpImageSparseSampleProjExplicitLod: |
6752 | case OpImageSparseSampleProjImplicitLod: |
6753 | case OpImageSparseFetch: |
6754 | case OpImageSparseRead: |
6755 | return true; |
6756 | |
6757 | default: |
6758 | return false; |
6759 | } |
6760 | } |
6761 | |
6762 | void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, |
6763 | uint32_t &texel_id) |
6764 | { |
6765 | // Need to allocate two temporaries. |
6766 | if (options.es) |
6767 | SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL." ); |
6768 | require_extension_internal(ext: "GL_ARB_sparse_texture2" ); |
6769 | |
6770 | auto &temps = extra_sub_expressions[id]; |
6771 | if (temps == 0) |
6772 | temps = ir.increase_bound_by(count: 2); |
6773 | |
6774 | feedback_id = temps + 0; |
6775 | texel_id = temps + 1; |
6776 | |
6777 | auto &return_type = get<SPIRType>(id: result_type_id); |
6778 | if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) |
6779 | SPIRV_CROSS_THROW("Invalid return type for sparse feedback." ); |
6780 | emit_uninitialized_temporary(result_type: return_type.member_types[0], result_id: feedback_id); |
6781 | emit_uninitialized_temporary(result_type: return_type.member_types[1], result_id: texel_id); |
6782 | } |
6783 | |
6784 | uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const |
6785 | { |
6786 | auto itr = extra_sub_expressions.find(x: id); |
6787 | if (itr == extra_sub_expressions.end()) |
6788 | return 0; |
6789 | else |
6790 | return itr->second + 1; |
6791 | } |
6792 | |
6793 | void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) |
6794 | { |
6795 | auto *ops = stream(instr: i); |
6796 | auto op = static_cast<Op>(i.op); |
6797 | |
6798 | SmallVector<uint32_t> inherited_expressions; |
6799 | |
6800 | uint32_t result_type_id = ops[0]; |
6801 | uint32_t id = ops[1]; |
6802 | auto &return_type = get<SPIRType>(id: result_type_id); |
6803 | |
6804 | uint32_t sparse_code_id = 0; |
6805 | uint32_t sparse_texel_id = 0; |
6806 | if (sparse) |
6807 | emit_sparse_feedback_temporaries(result_type_id, id, feedback_id&: sparse_code_id, texel_id&: sparse_texel_id); |
6808 | |
6809 | bool forward = false; |
6810 | string expr = to_texture_op(i, sparse, forward: &forward, inherited_expressions); |
6811 | |
6812 | if (sparse) |
6813 | { |
6814 | statement(ts: to_expression(id: sparse_code_id), ts: " = " , ts&: expr, ts: ";" ); |
6815 | expr = join(ts: type_to_glsl(type: return_type), ts: "(" , ts: to_expression(id: sparse_code_id), ts: ", " , ts: to_expression(id: sparse_texel_id), |
6816 | ts: ")" ); |
6817 | forward = true; |
6818 | inherited_expressions.clear(); |
6819 | } |
6820 | |
6821 | emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward); |
6822 | for (auto &inherit : inherited_expressions) |
6823 | inherit_expression_dependencies(dst: id, source: inherit); |
6824 | |
6825 | // Do not register sparse ops as control dependent as they are always lowered to a temporary. |
6826 | switch (op) |
6827 | { |
6828 | case OpImageSampleDrefImplicitLod: |
6829 | case OpImageSampleImplicitLod: |
6830 | case OpImageSampleProjImplicitLod: |
6831 | case OpImageSampleProjDrefImplicitLod: |
6832 | register_control_dependent_expression(expr: id); |
6833 | break; |
6834 | |
6835 | default: |
6836 | break; |
6837 | } |
6838 | } |
6839 | |
6840 | std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, |
6841 | SmallVector<uint32_t> &inherited_expressions) |
6842 | { |
6843 | auto *ops = stream(instr: i); |
6844 | auto op = static_cast<Op>(i.op); |
6845 | uint32_t length = i.length; |
6846 | |
6847 | uint32_t result_type_id = ops[0]; |
6848 | VariableID img = ops[2]; |
6849 | uint32_t coord = ops[3]; |
6850 | uint32_t dref = 0; |
6851 | uint32_t comp = 0; |
6852 | bool gather = false; |
6853 | bool proj = false; |
6854 | bool fetch = false; |
6855 | bool nonuniform_expression = false; |
6856 | const uint32_t *opt = nullptr; |
6857 | |
6858 | auto &result_type = get<SPIRType>(id: result_type_id); |
6859 | |
6860 | inherited_expressions.push_back(t: coord); |
6861 | if (has_decoration(id: img, decoration: DecorationNonUniform) && !maybe_get_backing_variable(chain: img)) |
6862 | nonuniform_expression = true; |
6863 | |
6864 | switch (op) |
6865 | { |
6866 | case OpImageSampleDrefImplicitLod: |
6867 | case OpImageSampleDrefExplicitLod: |
6868 | case OpImageSparseSampleDrefImplicitLod: |
6869 | case OpImageSparseSampleDrefExplicitLod: |
6870 | dref = ops[4]; |
6871 | opt = &ops[5]; |
6872 | length -= 5; |
6873 | break; |
6874 | |
6875 | case OpImageSampleProjDrefImplicitLod: |
6876 | case OpImageSampleProjDrefExplicitLod: |
6877 | case OpImageSparseSampleProjDrefImplicitLod: |
6878 | case OpImageSparseSampleProjDrefExplicitLod: |
6879 | dref = ops[4]; |
6880 | opt = &ops[5]; |
6881 | length -= 5; |
6882 | proj = true; |
6883 | break; |
6884 | |
6885 | case OpImageDrefGather: |
6886 | case OpImageSparseDrefGather: |
6887 | dref = ops[4]; |
6888 | opt = &ops[5]; |
6889 | length -= 5; |
6890 | gather = true; |
6891 | if (options.es && options.version < 310) |
6892 | SPIRV_CROSS_THROW("textureGather requires ESSL 310." ); |
6893 | else if (!options.es && options.version < 400) |
6894 | SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400." ); |
6895 | break; |
6896 | |
6897 | case OpImageGather: |
6898 | case OpImageSparseGather: |
6899 | comp = ops[4]; |
6900 | opt = &ops[5]; |
6901 | length -= 5; |
6902 | gather = true; |
6903 | if (options.es && options.version < 310) |
6904 | SPIRV_CROSS_THROW("textureGather requires ESSL 310." ); |
6905 | else if (!options.es && options.version < 400) |
6906 | { |
6907 | if (!expression_is_constant_null(id: comp)) |
6908 | SPIRV_CROSS_THROW("textureGather with component requires GLSL 400." ); |
6909 | require_extension_internal(ext: "GL_ARB_texture_gather" ); |
6910 | } |
6911 | break; |
6912 | |
6913 | case OpImageFetch: |
6914 | case OpImageSparseFetch: |
6915 | case OpImageRead: // Reads == fetches in Metal (other langs will not get here) |
6916 | opt = &ops[4]; |
6917 | length -= 4; |
6918 | fetch = true; |
6919 | break; |
6920 | |
6921 | case OpImageSampleProjImplicitLod: |
6922 | case OpImageSampleProjExplicitLod: |
6923 | case OpImageSparseSampleProjImplicitLod: |
6924 | case OpImageSparseSampleProjExplicitLod: |
6925 | opt = &ops[4]; |
6926 | length -= 4; |
6927 | proj = true; |
6928 | break; |
6929 | |
6930 | default: |
6931 | opt = &ops[4]; |
6932 | length -= 4; |
6933 | break; |
6934 | } |
6935 | |
6936 | // Bypass pointers because we need the real image struct |
6937 | auto &type = expression_type(id: img); |
6938 | auto &imgtype = get<SPIRType>(id: type.self); |
6939 | |
6940 | uint32_t coord_components = 0; |
6941 | switch (imgtype.image.dim) |
6942 | { |
6943 | case spv::Dim1D: |
6944 | coord_components = 1; |
6945 | break; |
6946 | case spv::Dim2D: |
6947 | coord_components = 2; |
6948 | break; |
6949 | case spv::Dim3D: |
6950 | coord_components = 3; |
6951 | break; |
6952 | case spv::DimCube: |
6953 | coord_components = 3; |
6954 | break; |
6955 | case spv::DimBuffer: |
6956 | coord_components = 1; |
6957 | break; |
6958 | default: |
6959 | coord_components = 2; |
6960 | break; |
6961 | } |
6962 | |
6963 | if (dref) |
6964 | inherited_expressions.push_back(t: dref); |
6965 | |
6966 | if (proj) |
6967 | coord_components++; |
6968 | if (imgtype.image.arrayed) |
6969 | coord_components++; |
6970 | |
6971 | uint32_t bias = 0; |
6972 | uint32_t lod = 0; |
6973 | uint32_t grad_x = 0; |
6974 | uint32_t grad_y = 0; |
6975 | uint32_t coffset = 0; |
6976 | uint32_t offset = 0; |
6977 | uint32_t coffsets = 0; |
6978 | uint32_t sample = 0; |
6979 | uint32_t minlod = 0; |
6980 | uint32_t flags = 0; |
6981 | |
6982 | if (length) |
6983 | { |
6984 | flags = *opt++; |
6985 | length--; |
6986 | } |
6987 | |
6988 | auto test = [&](uint32_t &v, uint32_t flag) { |
6989 | if (length && (flags & flag)) |
6990 | { |
6991 | v = *opt++; |
6992 | inherited_expressions.push_back(t: v); |
6993 | length--; |
6994 | } |
6995 | }; |
6996 | |
6997 | test(bias, ImageOperandsBiasMask); |
6998 | test(lod, ImageOperandsLodMask); |
6999 | test(grad_x, ImageOperandsGradMask); |
7000 | test(grad_y, ImageOperandsGradMask); |
7001 | test(coffset, ImageOperandsConstOffsetMask); |
7002 | test(offset, ImageOperandsOffsetMask); |
7003 | test(coffsets, ImageOperandsConstOffsetsMask); |
7004 | test(sample, ImageOperandsSampleMask); |
7005 | test(minlod, ImageOperandsMinLodMask); |
7006 | |
7007 | TextureFunctionBaseArguments base_args = {}; |
7008 | base_args.img = img; |
7009 | base_args.imgtype = &imgtype; |
7010 | base_args.is_fetch = fetch != 0; |
7011 | base_args.is_gather = gather != 0; |
7012 | base_args.is_proj = proj != 0; |
7013 | |
7014 | string expr; |
7015 | TextureFunctionNameArguments name_args = {}; |
7016 | |
7017 | name_args.base = base_args; |
7018 | name_args.has_array_offsets = coffsets != 0; |
7019 | name_args.has_offset = coffset != 0 || offset != 0; |
7020 | name_args.has_grad = grad_x != 0 || grad_y != 0; |
7021 | name_args.has_dref = dref != 0; |
7022 | name_args.is_sparse_feedback = sparse; |
7023 | name_args.has_min_lod = minlod != 0; |
7024 | name_args.lod = lod; |
7025 | expr += to_function_name(args: name_args); |
7026 | expr += "(" ; |
7027 | |
7028 | uint32_t sparse_texel_id = 0; |
7029 | if (sparse) |
7030 | sparse_texel_id = get_sparse_feedback_texel_id(id: ops[1]); |
7031 | |
7032 | TextureFunctionArguments args = {}; |
7033 | args.base = base_args; |
7034 | args.coord = coord; |
7035 | args.coord_components = coord_components; |
7036 | args.dref = dref; |
7037 | args.grad_x = grad_x; |
7038 | args.grad_y = grad_y; |
7039 | args.lod = lod; |
7040 | args.coffset = coffset; |
7041 | args.offset = offset; |
7042 | args.bias = bias; |
7043 | args.component = comp; |
7044 | args.sample = sample; |
7045 | args.sparse_texel = sparse_texel_id; |
7046 | args.min_lod = minlod; |
7047 | args.nonuniform_expression = nonuniform_expression; |
7048 | expr += to_function_args(args, p_forward: forward); |
7049 | expr += ")" ; |
7050 | |
7051 | // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. |
7052 | if (is_legacy() && !options.es && is_depth_image(type: imgtype, id: img)) |
7053 | expr += ".r" ; |
7054 | |
7055 | // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. |
7056 | // Remap back to 4 components as sampling opcodes expect. |
7057 | if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) |
7058 | { |
7059 | bool image_is_depth = false; |
7060 | const auto *combined = maybe_get<SPIRCombinedImageSampler>(id: img); |
7061 | VariableID image_id = combined ? combined->image : img; |
7062 | |
7063 | if (combined && is_depth_image(type: imgtype, id: combined->image)) |
7064 | image_is_depth = true; |
7065 | else if (is_depth_image(type: imgtype, id: img)) |
7066 | image_is_depth = true; |
7067 | |
7068 | // We must also check the backing variable for the image. |
7069 | // We might have loaded an OpImage, and used that handle for two different purposes. |
7070 | // Once with comparison, once without. |
7071 | auto *image_variable = maybe_get_backing_variable(chain: image_id); |
7072 | if (image_variable && is_depth_image(type: get<SPIRType>(id: image_variable->basetype), id: image_variable->self)) |
7073 | image_is_depth = true; |
7074 | |
7075 | if (image_is_depth) |
7076 | expr = remap_swizzle(out_type: result_type, input_components: 1, expr); |
7077 | } |
7078 | |
7079 | if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) |
7080 | { |
7081 | // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. |
7082 | // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. |
7083 | expr = join(ts: type_to_glsl_constructor(type: result_type), ts: "(" , ts&: expr, ts: ")" ); |
7084 | } |
7085 | |
7086 | // Deals with reads from MSL. We might need to downconvert to fewer components. |
7087 | if (op == OpImageRead) |
7088 | expr = remap_swizzle(out_type: result_type, input_components: 4, expr); |
7089 | |
7090 | return expr; |
7091 | } |
7092 | |
7093 | bool CompilerGLSL::expression_is_constant_null(uint32_t id) const |
7094 | { |
7095 | auto *c = maybe_get<SPIRConstant>(id); |
7096 | if (!c) |
7097 | return false; |
7098 | return c->constant_is_null(); |
7099 | } |
7100 | |
7101 | bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) |
7102 | { |
7103 | auto &type = expression_type(id: ptr); |
7104 | if (type.array.empty()) |
7105 | return false; |
7106 | |
7107 | if (!backend.array_is_value_type) |
7108 | return true; |
7109 | |
7110 | auto *var = maybe_get_backing_variable(chain: ptr); |
7111 | if (!var) |
7112 | return false; |
7113 | |
7114 | auto &backed_type = get<SPIRType>(id: var->basetype); |
7115 | return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct && |
7116 | has_member_decoration(id: backed_type.self, index: 0, decoration: DecorationOffset); |
7117 | } |
7118 | |
7119 | // Returns the function name for a texture sampling function for the specified image and sampling characteristics. |
7120 | // For some subclasses, the function is a method on the specified image. |
7121 | string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) |
7122 | { |
7123 | if (args.has_min_lod) |
7124 | { |
7125 | if (options.es) |
7126 | SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL." ); |
7127 | require_extension_internal(ext: "GL_ARB_sparse_texture_clamp" ); |
7128 | } |
7129 | |
7130 | string fname; |
7131 | auto &imgtype = *args.base.imgtype; |
7132 | VariableID tex = args.base.img; |
7133 | |
7134 | // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. |
7135 | // To emulate this, we will have to use textureGrad with a constant gradient of 0. |
7136 | // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. |
7137 | // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. |
7138 | bool workaround_lod_array_shadow_as_grad = false; |
7139 | if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && |
7140 | is_depth_image(type: imgtype, id: tex) && args.lod && !args.base.is_fetch) |
7141 | { |
7142 | if (!expression_is_constant_null(id: args.lod)) |
7143 | { |
7144 | SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " |
7145 | "expressed in GLSL." ); |
7146 | } |
7147 | workaround_lod_array_shadow_as_grad = true; |
7148 | } |
7149 | |
7150 | if (args.is_sparse_feedback) |
7151 | fname += "sparse" ; |
7152 | |
7153 | if (args.base.is_fetch) |
7154 | fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch" ; |
7155 | else |
7156 | { |
7157 | fname += args.is_sparse_feedback ? "Texture" : "texture" ; |
7158 | |
7159 | if (args.base.is_gather) |
7160 | fname += "Gather" ; |
7161 | if (args.has_array_offsets) |
7162 | fname += "Offsets" ; |
7163 | if (args.base.is_proj) |
7164 | fname += "Proj" ; |
7165 | if (args.has_grad || workaround_lod_array_shadow_as_grad) |
7166 | fname += "Grad" ; |
7167 | if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) |
7168 | fname += "Lod" ; |
7169 | } |
7170 | |
7171 | if (args.has_offset) |
7172 | fname += "Offset" ; |
7173 | |
7174 | if (args.has_min_lod) |
7175 | fname += "Clamp" ; |
7176 | |
7177 | if (args.is_sparse_feedback || args.has_min_lod) |
7178 | fname += "ARB" ; |
7179 | |
7180 | return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(op: fname, imgtype, tex) : fname; |
7181 | } |
7182 | |
7183 | std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) |
7184 | { |
7185 | auto *var = maybe_get_backing_variable(chain: id); |
7186 | |
7187 | // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. |
7188 | // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. |
7189 | if (var) |
7190 | { |
7191 | auto &type = get<SPIRType>(id: var->basetype); |
7192 | if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) |
7193 | { |
7194 | if (options.vulkan_semantics) |
7195 | { |
7196 | if (dummy_sampler_id) |
7197 | { |
7198 | // Don't need to consider Shadow state since the dummy sampler is always non-shadow. |
7199 | auto sampled_type = type; |
7200 | sampled_type.basetype = SPIRType::SampledImage; |
7201 | return join(ts: type_to_glsl(type: sampled_type), ts: "(" , ts: to_non_uniform_aware_expression(id), ts: ", " , |
7202 | ts: to_expression(id: dummy_sampler_id), ts: ")" ); |
7203 | } |
7204 | else |
7205 | { |
7206 | // Newer glslang supports this extension to deal with texture2D as argument to texture functions. |
7207 | require_extension_internal(ext: "GL_EXT_samplerless_texture_functions" ); |
7208 | } |
7209 | } |
7210 | else |
7211 | { |
7212 | if (!dummy_sampler_id) |
7213 | SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " |
7214 | "build_dummy_sampler_for_combined_images() called?" ); |
7215 | |
7216 | return to_combined_image_sampler(image_id: id, samp_id: dummy_sampler_id); |
7217 | } |
7218 | } |
7219 | } |
7220 | |
7221 | return to_non_uniform_aware_expression(id); |
7222 | } |
7223 | |
7224 | // Returns the function args for a texture sampling function for the specified image and sampling characteristics. |
7225 | string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) |
7226 | { |
7227 | VariableID img = args.base.img; |
7228 | auto &imgtype = *args.base.imgtype; |
7229 | |
7230 | string farg_str; |
7231 | if (args.base.is_fetch) |
7232 | farg_str = convert_separate_image_to_expression(id: img); |
7233 | else |
7234 | farg_str = to_non_uniform_aware_expression(id: img); |
7235 | |
7236 | if (args.nonuniform_expression && farg_str.find_first_of(c: '[') != string::npos) |
7237 | { |
7238 | // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way. |
7239 | farg_str = join(ts&: backend.nonuniform_qualifier, ts: "(" , ts&: farg_str, ts: ")" ); |
7240 | } |
7241 | |
7242 | bool swizz_func = backend.swizzle_is_function; |
7243 | auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { |
7244 | if (comps == in_comps) |
7245 | return "" ; |
7246 | |
7247 | switch (comps) |
7248 | { |
7249 | case 1: |
7250 | return ".x" ; |
7251 | case 2: |
7252 | return swizz_func ? ".xy()" : ".xy" ; |
7253 | case 3: |
7254 | return swizz_func ? ".xyz()" : ".xyz" ; |
7255 | default: |
7256 | return "" ; |
7257 | } |
7258 | }; |
7259 | |
7260 | bool forward = should_forward(id: args.coord); |
7261 | |
7262 | // The IR can give us more components than we need, so chop them off as needed. |
7263 | auto swizzle_expr = swizzle(args.coord_components, expression_type(id: args.coord).vecsize); |
7264 | // Only enclose the UV expression if needed. |
7265 | auto coord_expr = |
7266 | (*swizzle_expr == '\0') ? to_expression(id: args.coord) : (to_enclosed_expression(id: args.coord) + swizzle_expr); |
7267 | |
7268 | // texelFetch only takes int, not uint. |
7269 | auto &coord_type = expression_type(id: args.coord); |
7270 | if (coord_type.basetype == SPIRType::UInt) |
7271 | { |
7272 | auto expected_type = coord_type; |
7273 | expected_type.vecsize = args.coord_components; |
7274 | expected_type.basetype = SPIRType::Int; |
7275 | coord_expr = bitcast_expression(target_type: expected_type, expr_type: coord_type.basetype, expr: coord_expr); |
7276 | } |
7277 | |
7278 | // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. |
7279 | // To emulate this, we will have to use textureGrad with a constant gradient of 0. |
7280 | // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. |
7281 | // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. |
7282 | bool workaround_lod_array_shadow_as_grad = |
7283 | ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && |
7284 | is_depth_image(type: imgtype, id: img) && args.lod != 0 && !args.base.is_fetch; |
7285 | |
7286 | if (args.dref) |
7287 | { |
7288 | forward = forward && should_forward(id: args.dref); |
7289 | |
7290 | // SPIR-V splits dref and coordinate. |
7291 | if (args.base.is_gather || |
7292 | args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. |
7293 | { |
7294 | farg_str += ", " ; |
7295 | farg_str += to_expression(id: args.coord); |
7296 | farg_str += ", " ; |
7297 | farg_str += to_expression(id: args.dref); |
7298 | } |
7299 | else if (args.base.is_proj) |
7300 | { |
7301 | // Have to reshuffle so we get vec4(coord, dref, proj), special case. |
7302 | // Other shading languages splits up the arguments for coord and compare value like SPIR-V. |
7303 | // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. |
7304 | farg_str += ", vec4(" ; |
7305 | |
7306 | if (imgtype.image.dim == Dim1D) |
7307 | { |
7308 | // Could reuse coord_expr, but we will mess up the temporary usage checking. |
7309 | farg_str += to_enclosed_expression(id: args.coord) + ".x" ; |
7310 | farg_str += ", " ; |
7311 | farg_str += "0.0, " ; |
7312 | farg_str += to_expression(id: args.dref); |
7313 | farg_str += ", " ; |
7314 | farg_str += to_enclosed_expression(id: args.coord) + ".y)" ; |
7315 | } |
7316 | else if (imgtype.image.dim == Dim2D) |
7317 | { |
7318 | // Could reuse coord_expr, but we will mess up the temporary usage checking. |
7319 | farg_str += to_enclosed_expression(id: args.coord) + (swizz_func ? ".xy()" : ".xy" ); |
7320 | farg_str += ", " ; |
7321 | farg_str += to_expression(id: args.dref); |
7322 | farg_str += ", " ; |
7323 | farg_str += to_enclosed_expression(id: args.coord) + ".z)" ; |
7324 | } |
7325 | else |
7326 | SPIRV_CROSS_THROW("Invalid type for textureProj with shadow." ); |
7327 | } |
7328 | else |
7329 | { |
7330 | // Create a composite which merges coord/dref into a single vector. |
7331 | auto type = expression_type(id: args.coord); |
7332 | type.vecsize = args.coord_components + 1; |
7333 | if (imgtype.image.dim == Dim1D && options.es) |
7334 | type.vecsize++; |
7335 | farg_str += ", " ; |
7336 | farg_str += type_to_glsl_constructor(type); |
7337 | farg_str += "(" ; |
7338 | |
7339 | if (imgtype.image.dim == Dim1D && options.es) |
7340 | { |
7341 | if (imgtype.image.arrayed) |
7342 | { |
7343 | farg_str += enclose_expression(expr: coord_expr) + ".x" ; |
7344 | farg_str += ", 0.0, " ; |
7345 | farg_str += enclose_expression(expr: coord_expr) + ".y" ; |
7346 | } |
7347 | else |
7348 | { |
7349 | farg_str += coord_expr; |
7350 | farg_str += ", 0.0" ; |
7351 | } |
7352 | } |
7353 | else |
7354 | farg_str += coord_expr; |
7355 | |
7356 | farg_str += ", " ; |
7357 | farg_str += to_expression(id: args.dref); |
7358 | farg_str += ")" ; |
7359 | } |
7360 | } |
7361 | else |
7362 | { |
7363 | if (imgtype.image.dim == Dim1D && options.es) |
7364 | { |
7365 | // Have to fake a second coordinate. |
7366 | if (type_is_floating_point(type: coord_type)) |
7367 | { |
7368 | // Cannot mix proj and array. |
7369 | if (imgtype.image.arrayed || args.base.is_proj) |
7370 | { |
7371 | coord_expr = join(ts: "vec3(" , ts: enclose_expression(expr: coord_expr), ts: ".x, 0.0, " , |
7372 | ts: enclose_expression(expr: coord_expr), ts: ".y)" ); |
7373 | } |
7374 | else |
7375 | coord_expr = join(ts: "vec2(" , ts&: coord_expr, ts: ", 0.0)" ); |
7376 | } |
7377 | else |
7378 | { |
7379 | if (imgtype.image.arrayed) |
7380 | { |
7381 | coord_expr = join(ts: "ivec3(" , ts: enclose_expression(expr: coord_expr), |
7382 | ts: ".x, 0, " , |
7383 | ts: enclose_expression(expr: coord_expr), ts: ".y)" ); |
7384 | } |
7385 | else |
7386 | coord_expr = join(ts: "ivec2(" , ts&: coord_expr, ts: ", 0)" ); |
7387 | } |
7388 | } |
7389 | |
7390 | farg_str += ", " ; |
7391 | farg_str += coord_expr; |
7392 | } |
7393 | |
7394 | if (args.grad_x || args.grad_y) |
7395 | { |
7396 | forward = forward && should_forward(id: args.grad_x); |
7397 | forward = forward && should_forward(id: args.grad_y); |
7398 | farg_str += ", " ; |
7399 | farg_str += to_expression(id: args.grad_x); |
7400 | farg_str += ", " ; |
7401 | farg_str += to_expression(id: args.grad_y); |
7402 | } |
7403 | |
7404 | if (args.lod) |
7405 | { |
7406 | if (workaround_lod_array_shadow_as_grad) |
7407 | { |
7408 | // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. |
7409 | // Implementing this as plain texture() is not safe on some implementations. |
7410 | if (imgtype.image.dim == Dim2D) |
7411 | farg_str += ", vec2(0.0), vec2(0.0)" ; |
7412 | else if (imgtype.image.dim == DimCube) |
7413 | farg_str += ", vec3(0.0), vec3(0.0)" ; |
7414 | } |
7415 | else |
7416 | { |
7417 | forward = forward && should_forward(id: args.lod); |
7418 | farg_str += ", " ; |
7419 | |
7420 | // Lod expression for TexelFetch in GLSL must be int, and only int. |
7421 | if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) |
7422 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.lod); |
7423 | else |
7424 | farg_str += to_expression(id: args.lod); |
7425 | } |
7426 | } |
7427 | else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) |
7428 | { |
7429 | // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. |
7430 | farg_str += ", 0" ; |
7431 | } |
7432 | |
7433 | if (args.coffset) |
7434 | { |
7435 | forward = forward && should_forward(id: args.coffset); |
7436 | farg_str += ", " ; |
7437 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.coffset); |
7438 | } |
7439 | else if (args.offset) |
7440 | { |
7441 | forward = forward && should_forward(id: args.offset); |
7442 | farg_str += ", " ; |
7443 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.offset); |
7444 | } |
7445 | |
7446 | if (args.sample) |
7447 | { |
7448 | farg_str += ", " ; |
7449 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.sample); |
7450 | } |
7451 | |
7452 | if (args.min_lod) |
7453 | { |
7454 | farg_str += ", " ; |
7455 | farg_str += to_expression(id: args.min_lod); |
7456 | } |
7457 | |
7458 | if (args.sparse_texel) |
7459 | { |
7460 | // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. |
7461 | farg_str += ", " ; |
7462 | farg_str += to_expression(id: args.sparse_texel); |
7463 | } |
7464 | |
7465 | if (args.bias) |
7466 | { |
7467 | forward = forward && should_forward(id: args.bias); |
7468 | farg_str += ", " ; |
7469 | farg_str += to_expression(id: args.bias); |
7470 | } |
7471 | |
7472 | if (args.component && !expression_is_constant_null(id: args.component)) |
7473 | { |
7474 | forward = forward && should_forward(id: args.component); |
7475 | farg_str += ", " ; |
7476 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.component); |
7477 | } |
7478 | |
7479 | *p_forward = forward; |
7480 | |
7481 | return farg_str; |
7482 | } |
7483 | |
7484 | Op CompilerGLSL::get_remapped_spirv_op(Op op) const |
7485 | { |
7486 | if (options.relax_nan_checks) |
7487 | { |
7488 | switch (op) |
7489 | { |
7490 | case OpFUnordLessThan: |
7491 | op = OpFOrdLessThan; |
7492 | break; |
7493 | case OpFUnordLessThanEqual: |
7494 | op = OpFOrdLessThanEqual; |
7495 | break; |
7496 | case OpFUnordGreaterThan: |
7497 | op = OpFOrdGreaterThan; |
7498 | break; |
7499 | case OpFUnordGreaterThanEqual: |
7500 | op = OpFOrdGreaterThanEqual; |
7501 | break; |
7502 | case OpFUnordEqual: |
7503 | op = OpFOrdEqual; |
7504 | break; |
7505 | case OpFOrdNotEqual: |
7506 | op = OpFUnordNotEqual; |
7507 | break; |
7508 | |
7509 | default: |
7510 | break; |
7511 | } |
7512 | } |
7513 | |
7514 | return op; |
7515 | } |
7516 | |
7517 | GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const |
7518 | { |
7519 | // Relax to non-NaN aware opcodes. |
7520 | if (options.relax_nan_checks) |
7521 | { |
7522 | switch (std450_op) |
7523 | { |
7524 | case GLSLstd450NClamp: |
7525 | std450_op = GLSLstd450FClamp; |
7526 | break; |
7527 | case GLSLstd450NMin: |
7528 | std450_op = GLSLstd450FMin; |
7529 | break; |
7530 | case GLSLstd450NMax: |
7531 | std450_op = GLSLstd450FMax; |
7532 | break; |
7533 | default: |
7534 | break; |
7535 | } |
7536 | } |
7537 | |
7538 | return std450_op; |
7539 | } |
7540 | |
7541 | void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) |
7542 | { |
7543 | auto op = static_cast<GLSLstd450>(eop); |
7544 | |
7545 | if (is_legacy() && is_unsigned_glsl_opcode(op)) |
7546 | SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets." ); |
7547 | |
7548 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
7549 | uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length); |
7550 | auto int_type = to_signed_basetype(width: integer_width); |
7551 | auto uint_type = to_unsigned_basetype(width: integer_width); |
7552 | |
7553 | op = get_remapped_glsl_op(std450_op: op); |
7554 | |
7555 | switch (op) |
7556 | { |
7557 | // FP fiddling |
7558 | case GLSLstd450Round: |
7559 | if (!is_legacy()) |
7560 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round" ); |
7561 | else |
7562 | { |
7563 | auto op0 = to_enclosed_expression(id: args[0]); |
7564 | auto &op0_type = expression_type(id: args[0]); |
7565 | auto expr = join(ts: "floor(" , ts&: op0, ts: " + " , ts: type_to_glsl_constructor(type: op0_type), ts: "(0.5))" ); |
7566 | bool forward = should_forward(id: args[0]); |
7567 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
7568 | inherit_expression_dependencies(dst: id, source: args[0]); |
7569 | } |
7570 | break; |
7571 | |
7572 | case GLSLstd450RoundEven: |
7573 | if (!is_legacy()) |
7574 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "roundEven" ); |
7575 | else if (!options.es) |
7576 | { |
7577 | // This extension provides round() with round-to-even semantics. |
7578 | require_extension_internal(ext: "GL_EXT_gpu_shader4" ); |
7579 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round" ); |
7580 | } |
7581 | else |
7582 | SPIRV_CROSS_THROW("roundEven supported only in ESSL 300." ); |
7583 | break; |
7584 | |
7585 | case GLSLstd450Trunc: |
7586 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "trunc" ); |
7587 | break; |
7588 | case GLSLstd450SAbs: |
7589 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "abs" , input_type: int_type, expected_result_type: int_type); |
7590 | break; |
7591 | case GLSLstd450FAbs: |
7592 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "abs" ); |
7593 | break; |
7594 | case GLSLstd450SSign: |
7595 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "sign" , input_type: int_type, expected_result_type: int_type); |
7596 | break; |
7597 | case GLSLstd450FSign: |
7598 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sign" ); |
7599 | break; |
7600 | case GLSLstd450Floor: |
7601 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "floor" ); |
7602 | break; |
7603 | case GLSLstd450Ceil: |
7604 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "ceil" ); |
7605 | break; |
7606 | case GLSLstd450Fract: |
7607 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "fract" ); |
7608 | break; |
7609 | case GLSLstd450Radians: |
7610 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "radians" ); |
7611 | break; |
7612 | case GLSLstd450Degrees: |
7613 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "degrees" ); |
7614 | break; |
7615 | case GLSLstd450Fma: |
7616 | if ((!options.es && options.version < 400) || (options.es && options.version < 320)) |
7617 | { |
7618 | auto expr = join(ts: to_enclosed_expression(id: args[0]), ts: " * " , ts: to_enclosed_expression(id: args[1]), ts: " + " , |
7619 | ts: to_enclosed_expression(id: args[2])); |
7620 | |
7621 | emit_op(result_type, result_id: id, rhs: expr, |
7622 | forwarding: should_forward(id: args[0]) && should_forward(id: args[1]) && should_forward(id: args[2])); |
7623 | for (uint32_t i = 0; i < 3; i++) |
7624 | inherit_expression_dependencies(dst: id, source: args[i]); |
7625 | } |
7626 | else |
7627 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "fma" ); |
7628 | break; |
7629 | case GLSLstd450Modf: |
7630 | register_call_out_argument(id: args[1]); |
7631 | forced_temporaries.insert(x: id); |
7632 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "modf" ); |
7633 | break; |
7634 | |
7635 | case GLSLstd450ModfStruct: |
7636 | { |
7637 | auto &type = get<SPIRType>(id: result_type); |
7638 | emit_uninitialized_temporary_expression(type: result_type, id); |
7639 | statement(ts: to_expression(id), ts: "." , ts: to_member_name(type, index: 0), ts: " = " , ts: "modf(" , ts: to_expression(id: args[0]), ts: ", " , |
7640 | ts: to_expression(id), ts: "." , ts: to_member_name(type, index: 1), ts: ");" ); |
7641 | break; |
7642 | } |
7643 | |
7644 | // Minmax |
7645 | case GLSLstd450UMin: |
7646 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min" , input_type: uint_type, skip_cast_if_equal_type: false); |
7647 | break; |
7648 | |
7649 | case GLSLstd450SMin: |
7650 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min" , input_type: int_type, skip_cast_if_equal_type: false); |
7651 | break; |
7652 | |
7653 | case GLSLstd450FMin: |
7654 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "min" ); |
7655 | break; |
7656 | |
7657 | case GLSLstd450FMax: |
7658 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "max" ); |
7659 | break; |
7660 | |
7661 | case GLSLstd450UMax: |
7662 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max" , input_type: uint_type, skip_cast_if_equal_type: false); |
7663 | break; |
7664 | |
7665 | case GLSLstd450SMax: |
7666 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max" , input_type: int_type, skip_cast_if_equal_type: false); |
7667 | break; |
7668 | |
7669 | case GLSLstd450FClamp: |
7670 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp" ); |
7671 | break; |
7672 | |
7673 | case GLSLstd450UClamp: |
7674 | emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp" , input_type: uint_type); |
7675 | break; |
7676 | |
7677 | case GLSLstd450SClamp: |
7678 | emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp" , input_type: int_type); |
7679 | break; |
7680 | |
7681 | // Trig |
7682 | case GLSLstd450Sin: |
7683 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sin" ); |
7684 | break; |
7685 | case GLSLstd450Cos: |
7686 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cos" ); |
7687 | break; |
7688 | case GLSLstd450Tan: |
7689 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tan" ); |
7690 | break; |
7691 | case GLSLstd450Asin: |
7692 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asin" ); |
7693 | break; |
7694 | case GLSLstd450Acos: |
7695 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acos" ); |
7696 | break; |
7697 | case GLSLstd450Atan: |
7698 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atan" ); |
7699 | break; |
7700 | case GLSLstd450Sinh: |
7701 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sinh" ); |
7702 | break; |
7703 | case GLSLstd450Cosh: |
7704 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cosh" ); |
7705 | break; |
7706 | case GLSLstd450Tanh: |
7707 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tanh" ); |
7708 | break; |
7709 | case GLSLstd450Asinh: |
7710 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asinh" ); |
7711 | break; |
7712 | case GLSLstd450Acosh: |
7713 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acosh" ); |
7714 | break; |
7715 | case GLSLstd450Atanh: |
7716 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atanh" ); |
7717 | break; |
7718 | case GLSLstd450Atan2: |
7719 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "atan" ); |
7720 | break; |
7721 | |
7722 | // Exponentials |
7723 | case GLSLstd450Pow: |
7724 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "pow" ); |
7725 | break; |
7726 | case GLSLstd450Exp: |
7727 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp" ); |
7728 | break; |
7729 | case GLSLstd450Log: |
7730 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log" ); |
7731 | break; |
7732 | case GLSLstd450Exp2: |
7733 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp2" ); |
7734 | break; |
7735 | case GLSLstd450Log2: |
7736 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log2" ); |
7737 | break; |
7738 | case GLSLstd450Sqrt: |
7739 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sqrt" ); |
7740 | break; |
7741 | case GLSLstd450InverseSqrt: |
7742 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inversesqrt" ); |
7743 | break; |
7744 | |
7745 | // Matrix math |
7746 | case GLSLstd450Determinant: |
7747 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "determinant" ); |
7748 | break; |
7749 | case GLSLstd450MatrixInverse: |
7750 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inverse" ); |
7751 | break; |
7752 | |
7753 | // Lerping |
7754 | case GLSLstd450FMix: |
7755 | case GLSLstd450IMix: |
7756 | { |
7757 | emit_mix_op(result_type, id, left: args[0], right: args[1], lerp: args[2]); |
7758 | break; |
7759 | } |
7760 | case GLSLstd450Step: |
7761 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "step" ); |
7762 | break; |
7763 | case GLSLstd450SmoothStep: |
7764 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "smoothstep" ); |
7765 | break; |
7766 | |
7767 | // Packing |
7768 | case GLSLstd450Frexp: |
7769 | register_call_out_argument(id: args[1]); |
7770 | forced_temporaries.insert(x: id); |
7771 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "frexp" ); |
7772 | break; |
7773 | |
7774 | case GLSLstd450FrexpStruct: |
7775 | { |
7776 | auto &type = get<SPIRType>(id: result_type); |
7777 | emit_uninitialized_temporary_expression(type: result_type, id); |
7778 | statement(ts: to_expression(id), ts: "." , ts: to_member_name(type, index: 0), ts: " = " , ts: "frexp(" , ts: to_expression(id: args[0]), ts: ", " , |
7779 | ts: to_expression(id), ts: "." , ts: to_member_name(type, index: 1), ts: ");" ); |
7780 | break; |
7781 | } |
7782 | |
7783 | case GLSLstd450Ldexp: |
7784 | { |
7785 | bool forward = should_forward(id: args[0]) && should_forward(id: args[1]); |
7786 | |
7787 | auto op0 = to_unpacked_expression(id: args[0]); |
7788 | auto op1 = to_unpacked_expression(id: args[1]); |
7789 | auto &op1_type = expression_type(id: args[1]); |
7790 | if (op1_type.basetype != SPIRType::Int) |
7791 | { |
7792 | // Need a value cast here. |
7793 | auto target_type = op1_type; |
7794 | target_type.basetype = SPIRType::Int; |
7795 | op1 = join(ts: type_to_glsl_constructor(type: target_type), ts: "(" , ts&: op1, ts: ")" ); |
7796 | } |
7797 | |
7798 | auto expr = join(ts: "ldexp(" , ts&: op0, ts: ", " , ts&: op1, ts: ")" ); |
7799 | |
7800 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
7801 | inherit_expression_dependencies(dst: id, source: args[0]); |
7802 | inherit_expression_dependencies(dst: id, source: args[1]); |
7803 | break; |
7804 | } |
7805 | |
7806 | case GLSLstd450PackSnorm4x8: |
7807 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm4x8" ); |
7808 | break; |
7809 | case GLSLstd450PackUnorm4x8: |
7810 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm4x8" ); |
7811 | break; |
7812 | case GLSLstd450PackSnorm2x16: |
7813 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm2x16" ); |
7814 | break; |
7815 | case GLSLstd450PackUnorm2x16: |
7816 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm2x16" ); |
7817 | break; |
7818 | case GLSLstd450PackHalf2x16: |
7819 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packHalf2x16" ); |
7820 | break; |
7821 | case GLSLstd450UnpackSnorm4x8: |
7822 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm4x8" ); |
7823 | break; |
7824 | case GLSLstd450UnpackUnorm4x8: |
7825 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm4x8" ); |
7826 | break; |
7827 | case GLSLstd450UnpackSnorm2x16: |
7828 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm2x16" ); |
7829 | break; |
7830 | case GLSLstd450UnpackUnorm2x16: |
7831 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm2x16" ); |
7832 | break; |
7833 | case GLSLstd450UnpackHalf2x16: |
7834 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackHalf2x16" ); |
7835 | break; |
7836 | |
7837 | case GLSLstd450PackDouble2x32: |
7838 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packDouble2x32" ); |
7839 | break; |
7840 | case GLSLstd450UnpackDouble2x32: |
7841 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackDouble2x32" ); |
7842 | break; |
7843 | |
7844 | // Vector math |
7845 | case GLSLstd450Length: |
7846 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "length" ); |
7847 | break; |
7848 | case GLSLstd450Distance: |
7849 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "distance" ); |
7850 | break; |
7851 | case GLSLstd450Cross: |
7852 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "cross" ); |
7853 | break; |
7854 | case GLSLstd450Normalize: |
7855 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "normalize" ); |
7856 | break; |
7857 | case GLSLstd450FaceForward: |
7858 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "faceforward" ); |
7859 | break; |
7860 | case GLSLstd450Reflect: |
7861 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "reflect" ); |
7862 | break; |
7863 | case GLSLstd450Refract: |
7864 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "refract" ); |
7865 | break; |
7866 | |
7867 | // Bit-fiddling |
7868 | case GLSLstd450FindILsb: |
7869 | // findLSB always returns int. |
7870 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findLSB" , input_type: expression_type(id: args[0]).basetype, expected_result_type: int_type); |
7871 | break; |
7872 | |
7873 | case GLSLstd450FindSMsb: |
7874 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB" , input_type: int_type, expected_result_type: int_type); |
7875 | break; |
7876 | |
7877 | case GLSLstd450FindUMsb: |
7878 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB" , input_type: uint_type, |
7879 | expected_result_type: int_type); // findMSB always returns int. |
7880 | break; |
7881 | |
7882 | // Multisampled varying |
7883 | case GLSLstd450InterpolateAtCentroid: |
7884 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "interpolateAtCentroid" ); |
7885 | break; |
7886 | case GLSLstd450InterpolateAtSample: |
7887 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtSample" ); |
7888 | break; |
7889 | case GLSLstd450InterpolateAtOffset: |
7890 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtOffset" ); |
7891 | break; |
7892 | |
7893 | case GLSLstd450NMin: |
7894 | case GLSLstd450NMax: |
7895 | { |
7896 | emit_nminmax_op(result_type, id, op0: args[0], op1: args[1], op); |
7897 | break; |
7898 | } |
7899 | |
7900 | case GLSLstd450NClamp: |
7901 | { |
7902 | // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. |
7903 | // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. |
7904 | uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; |
7905 | if (!max_id) |
7906 | max_id = ir.increase_bound_by(count: 1); |
7907 | |
7908 | // Inherit precision qualifiers. |
7909 | ir.meta[max_id] = ir.meta[id]; |
7910 | |
7911 | emit_nminmax_op(result_type, id: max_id, op0: args[0], op1: args[1], op: GLSLstd450NMax); |
7912 | emit_nminmax_op(result_type, id, op0: max_id, op1: args[2], op: GLSLstd450NMin); |
7913 | break; |
7914 | } |
7915 | |
7916 | default: |
7917 | statement(ts: "// unimplemented GLSL op " , ts&: eop); |
7918 | break; |
7919 | } |
7920 | } |
7921 | |
7922 | void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) |
7923 | { |
7924 | // Need to emulate this call. |
7925 | uint32_t &ids = extra_sub_expressions[id]; |
7926 | if (!ids) |
7927 | { |
7928 | ids = ir.increase_bound_by(count: 5); |
7929 | auto btype = get<SPIRType>(id: result_type); |
7930 | btype.basetype = SPIRType::Boolean; |
7931 | set<SPIRType>(id: ids, args&: btype); |
7932 | } |
7933 | |
7934 | uint32_t btype_id = ids + 0; |
7935 | uint32_t left_nan_id = ids + 1; |
7936 | uint32_t right_nan_id = ids + 2; |
7937 | uint32_t tmp_id = ids + 3; |
7938 | uint32_t mixed_first_id = ids + 4; |
7939 | |
7940 | // Inherit precision qualifiers. |
7941 | ir.meta[tmp_id] = ir.meta[id]; |
7942 | ir.meta[mixed_first_id] = ir.meta[id]; |
7943 | |
7944 | emit_unary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op: "isnan" ); |
7945 | emit_unary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op: "isnan" ); |
7946 | emit_binary_func_op(result_type, result_id: tmp_id, op0, op1, op: op == GLSLstd450NMin ? "min" : "max" ); |
7947 | emit_mix_op(result_type, id: mixed_first_id, left: tmp_id, right: op1, lerp: left_nan_id); |
7948 | emit_mix_op(result_type, id, left: mixed_first_id, right: op0, lerp: right_nan_id); |
7949 | } |
7950 | |
7951 | void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, |
7952 | uint32_t) |
7953 | { |
7954 | require_extension_internal(ext: "GL_AMD_shader_ballot" ); |
7955 | |
7956 | enum AMDShaderBallot |
7957 | { |
7958 | SwizzleInvocationsAMD = 1, |
7959 | SwizzleInvocationsMaskedAMD = 2, |
7960 | WriteInvocationAMD = 3, |
7961 | MbcntAMD = 4 |
7962 | }; |
7963 | |
7964 | auto op = static_cast<AMDShaderBallot>(eop); |
7965 | |
7966 | switch (op) |
7967 | { |
7968 | case SwizzleInvocationsAMD: |
7969 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsAMD" ); |
7970 | register_control_dependent_expression(expr: id); |
7971 | break; |
7972 | |
7973 | case SwizzleInvocationsMaskedAMD: |
7974 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsMaskedAMD" ); |
7975 | register_control_dependent_expression(expr: id); |
7976 | break; |
7977 | |
7978 | case WriteInvocationAMD: |
7979 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "writeInvocationAMD" ); |
7980 | register_control_dependent_expression(expr: id); |
7981 | break; |
7982 | |
7983 | case MbcntAMD: |
7984 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "mbcntAMD" ); |
7985 | register_control_dependent_expression(expr: id); |
7986 | break; |
7987 | |
7988 | default: |
7989 | statement(ts: "// unimplemented SPV AMD shader ballot op " , ts&: eop); |
7990 | break; |
7991 | } |
7992 | } |
7993 | |
7994 | void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, |
7995 | const uint32_t *args, uint32_t) |
7996 | { |
7997 | require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter" ); |
7998 | |
7999 | enum AMDShaderExplicitVertexParameter |
8000 | { |
8001 | InterpolateAtVertexAMD = 1 |
8002 | }; |
8003 | |
8004 | auto op = static_cast<AMDShaderExplicitVertexParameter>(eop); |
8005 | |
8006 | switch (op) |
8007 | { |
8008 | case InterpolateAtVertexAMD: |
8009 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtVertexAMD" ); |
8010 | break; |
8011 | |
8012 | default: |
8013 | statement(ts: "// unimplemented SPV AMD shader explicit vertex parameter op " , ts&: eop); |
8014 | break; |
8015 | } |
8016 | } |
8017 | |
8018 | void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, |
8019 | const uint32_t *args, uint32_t) |
8020 | { |
8021 | require_extension_internal(ext: "GL_AMD_shader_trinary_minmax" ); |
8022 | |
8023 | enum AMDShaderTrinaryMinMax |
8024 | { |
8025 | FMin3AMD = 1, |
8026 | UMin3AMD = 2, |
8027 | SMin3AMD = 3, |
8028 | FMax3AMD = 4, |
8029 | UMax3AMD = 5, |
8030 | SMax3AMD = 6, |
8031 | FMid3AMD = 7, |
8032 | UMid3AMD = 8, |
8033 | SMid3AMD = 9 |
8034 | }; |
8035 | |
8036 | auto op = static_cast<AMDShaderTrinaryMinMax>(eop); |
8037 | |
8038 | switch (op) |
8039 | { |
8040 | case FMin3AMD: |
8041 | case UMin3AMD: |
8042 | case SMin3AMD: |
8043 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "min3" ); |
8044 | break; |
8045 | |
8046 | case FMax3AMD: |
8047 | case UMax3AMD: |
8048 | case SMax3AMD: |
8049 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "max3" ); |
8050 | break; |
8051 | |
8052 | case FMid3AMD: |
8053 | case UMid3AMD: |
8054 | case SMid3AMD: |
8055 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "mid3" ); |
8056 | break; |
8057 | |
8058 | default: |
8059 | statement(ts: "// unimplemented SPV AMD shader trinary minmax op " , ts&: eop); |
8060 | break; |
8061 | } |
8062 | } |
8063 | |
8064 | void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, |
8065 | uint32_t) |
8066 | { |
8067 | require_extension_internal(ext: "GL_AMD_gcn_shader" ); |
8068 | |
8069 | enum AMDGCNShader |
8070 | { |
8071 | CubeFaceIndexAMD = 1, |
8072 | CubeFaceCoordAMD = 2, |
8073 | TimeAMD = 3 |
8074 | }; |
8075 | |
8076 | auto op = static_cast<AMDGCNShader>(eop); |
8077 | |
8078 | switch (op) |
8079 | { |
8080 | case CubeFaceIndexAMD: |
8081 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceIndexAMD" ); |
8082 | break; |
8083 | case CubeFaceCoordAMD: |
8084 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceCoordAMD" ); |
8085 | break; |
8086 | case TimeAMD: |
8087 | { |
8088 | string expr = "timeAMD()" ; |
8089 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
8090 | register_control_dependent_expression(expr: id); |
8091 | break; |
8092 | } |
8093 | |
8094 | default: |
8095 | statement(ts: "// unimplemented SPV AMD gcn shader op " , ts&: eop); |
8096 | break; |
8097 | } |
8098 | } |
8099 | |
8100 | void CompilerGLSL::emit_subgroup_op(const Instruction &i) |
8101 | { |
8102 | const uint32_t *ops = stream(instr: i); |
8103 | auto op = static_cast<Op>(i.op); |
8104 | |
8105 | if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op)) |
8106 | SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics." ); |
8107 | |
8108 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
8109 | uint32_t integer_width = get_integer_width_for_instruction(instr: i); |
8110 | auto int_type = to_signed_basetype(width: integer_width); |
8111 | auto uint_type = to_unsigned_basetype(width: integer_width); |
8112 | |
8113 | switch (op) |
8114 | { |
8115 | case OpGroupNonUniformElect: |
8116 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupElect); |
8117 | break; |
8118 | |
8119 | case OpGroupNonUniformBallotBitCount: |
8120 | { |
8121 | const GroupOperation operation = static_cast<GroupOperation>(ops[3]); |
8122 | if (operation == GroupOperationReduce) |
8123 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitCount); |
8124 | else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) |
8125 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); |
8126 | } |
8127 | break; |
8128 | |
8129 | case OpGroupNonUniformBallotBitExtract: |
8130 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); |
8131 | break; |
8132 | |
8133 | case OpGroupNonUniformInverseBallot: |
8134 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); |
8135 | break; |
8136 | |
8137 | case OpGroupNonUniformBallot: |
8138 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallot); |
8139 | break; |
8140 | |
8141 | case OpGroupNonUniformBallotFindLSB: |
8142 | case OpGroupNonUniformBallotFindMSB: |
8143 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); |
8144 | break; |
8145 | |
8146 | case OpGroupNonUniformBroadcast: |
8147 | case OpGroupNonUniformBroadcastFirst: |
8148 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBroadcast_First); |
8149 | break; |
8150 | |
8151 | case OpGroupNonUniformShuffle: |
8152 | case OpGroupNonUniformShuffleXor: |
8153 | require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle" ); |
8154 | break; |
8155 | |
8156 | case OpGroupNonUniformShuffleUp: |
8157 | case OpGroupNonUniformShuffleDown: |
8158 | require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle_relative" ); |
8159 | break; |
8160 | |
8161 | case OpGroupNonUniformAll: |
8162 | case OpGroupNonUniformAny: |
8163 | case OpGroupNonUniformAllEqual: |
8164 | { |
8165 | const SPIRType &type = expression_type(id: ops[3]); |
8166 | if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) |
8167 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); |
8168 | else |
8169 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAllEqualT); |
8170 | } |
8171 | break; |
8172 | |
8173 | case OpGroupNonUniformFAdd: |
8174 | case OpGroupNonUniformFMul: |
8175 | case OpGroupNonUniformFMin: |
8176 | case OpGroupNonUniformFMax: |
8177 | case OpGroupNonUniformIAdd: |
8178 | case OpGroupNonUniformIMul: |
8179 | case OpGroupNonUniformSMin: |
8180 | case OpGroupNonUniformSMax: |
8181 | case OpGroupNonUniformUMin: |
8182 | case OpGroupNonUniformUMax: |
8183 | case OpGroupNonUniformBitwiseAnd: |
8184 | case OpGroupNonUniformBitwiseOr: |
8185 | case OpGroupNonUniformBitwiseXor: |
8186 | case OpGroupNonUniformLogicalAnd: |
8187 | case OpGroupNonUniformLogicalOr: |
8188 | case OpGroupNonUniformLogicalXor: |
8189 | { |
8190 | auto operation = static_cast<GroupOperation>(ops[3]); |
8191 | if (operation == GroupOperationClusteredReduce) |
8192 | { |
8193 | require_extension_internal(ext: "GL_KHR_shader_subgroup_clustered" ); |
8194 | } |
8195 | else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || |
8196 | operation == GroupOperationReduce) |
8197 | { |
8198 | require_extension_internal(ext: "GL_KHR_shader_subgroup_arithmetic" ); |
8199 | } |
8200 | else |
8201 | SPIRV_CROSS_THROW("Invalid group operation." ); |
8202 | break; |
8203 | } |
8204 | |
8205 | case OpGroupNonUniformQuadSwap: |
8206 | case OpGroupNonUniformQuadBroadcast: |
8207 | require_extension_internal(ext: "GL_KHR_shader_subgroup_quad" ); |
8208 | break; |
8209 | |
8210 | default: |
8211 | SPIRV_CROSS_THROW("Invalid opcode for subgroup." ); |
8212 | } |
8213 | |
8214 | uint32_t result_type = ops[0]; |
8215 | uint32_t id = ops[1]; |
8216 | |
8217 | auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2])); |
8218 | if (scope != ScopeSubgroup) |
8219 | SPIRV_CROSS_THROW("Only subgroup scope is supported." ); |
8220 | |
8221 | switch (op) |
8222 | { |
8223 | case OpGroupNonUniformElect: |
8224 | emit_op(result_type, result_id: id, rhs: "subgroupElect()" , forwarding: true); |
8225 | break; |
8226 | |
8227 | case OpGroupNonUniformBroadcast: |
8228 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBroadcast" ); |
8229 | break; |
8230 | |
8231 | case OpGroupNonUniformBroadcastFirst: |
8232 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBroadcastFirst" ); |
8233 | break; |
8234 | |
8235 | case OpGroupNonUniformBallot: |
8236 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallot" ); |
8237 | break; |
8238 | |
8239 | case OpGroupNonUniformInverseBallot: |
8240 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupInverseBallot" ); |
8241 | break; |
8242 | |
8243 | case OpGroupNonUniformBallotBitExtract: |
8244 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBallotBitExtract" ); |
8245 | break; |
8246 | |
8247 | case OpGroupNonUniformBallotFindLSB: |
8248 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindLSB" ); |
8249 | break; |
8250 | |
8251 | case OpGroupNonUniformBallotFindMSB: |
8252 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindMSB" ); |
8253 | break; |
8254 | |
8255 | case OpGroupNonUniformBallotBitCount: |
8256 | { |
8257 | auto operation = static_cast<GroupOperation>(ops[3]); |
8258 | if (operation == GroupOperationReduce) |
8259 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotBitCount" ); |
8260 | else if (operation == GroupOperationInclusiveScan) |
8261 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotInclusiveBitCount" ); |
8262 | else if (operation == GroupOperationExclusiveScan) |
8263 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotExclusiveBitCount" ); |
8264 | else |
8265 | SPIRV_CROSS_THROW("Invalid BitCount operation." ); |
8266 | break; |
8267 | } |
8268 | |
8269 | case OpGroupNonUniformShuffle: |
8270 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffle" ); |
8271 | break; |
8272 | |
8273 | case OpGroupNonUniformShuffleXor: |
8274 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleXor" ); |
8275 | break; |
8276 | |
8277 | case OpGroupNonUniformShuffleUp: |
8278 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleUp" ); |
8279 | break; |
8280 | |
8281 | case OpGroupNonUniformShuffleDown: |
8282 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleDown" ); |
8283 | break; |
8284 | |
8285 | case OpGroupNonUniformAll: |
8286 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAll" ); |
8287 | break; |
8288 | |
8289 | case OpGroupNonUniformAny: |
8290 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAny" ); |
8291 | break; |
8292 | |
8293 | case OpGroupNonUniformAllEqual: |
8294 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAllEqual" ); |
8295 | break; |
8296 | |
8297 | // clang-format off |
8298 | #define GLSL_GROUP_OP(op, glsl_op) \ |
8299 | case OpGroupNonUniform##op: \ |
8300 | { \ |
8301 | auto operation = static_cast<GroupOperation>(ops[3]); \ |
8302 | if (operation == GroupOperationReduce) \ |
8303 | emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ |
8304 | else if (operation == GroupOperationInclusiveScan) \ |
8305 | emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ |
8306 | else if (operation == GroupOperationExclusiveScan) \ |
8307 | emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ |
8308 | else if (operation == GroupOperationClusteredReduce) \ |
8309 | emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ |
8310 | else \ |
8311 | SPIRV_CROSS_THROW("Invalid group operation."); \ |
8312 | break; \ |
8313 | } |
8314 | |
8315 | #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ |
8316 | case OpGroupNonUniform##op: \ |
8317 | { \ |
8318 | auto operation = static_cast<GroupOperation>(ops[3]); \ |
8319 | if (operation == GroupOperationReduce) \ |
8320 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ |
8321 | else if (operation == GroupOperationInclusiveScan) \ |
8322 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ |
8323 | else if (operation == GroupOperationExclusiveScan) \ |
8324 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ |
8325 | else if (operation == GroupOperationClusteredReduce) \ |
8326 | emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ |
8327 | else \ |
8328 | SPIRV_CROSS_THROW("Invalid group operation."); \ |
8329 | break; \ |
8330 | } |
8331 | |
8332 | GLSL_GROUP_OP(FAdd, Add) |
8333 | GLSL_GROUP_OP(FMul, Mul) |
8334 | GLSL_GROUP_OP(FMin, Min) |
8335 | GLSL_GROUP_OP(FMax, Max) |
8336 | GLSL_GROUP_OP(IAdd, Add) |
8337 | GLSL_GROUP_OP(IMul, Mul) |
8338 | GLSL_GROUP_OP_CAST(SMin, Min, int_type) |
8339 | GLSL_GROUP_OP_CAST(SMax, Max, int_type) |
8340 | GLSL_GROUP_OP_CAST(UMin, Min, uint_type) |
8341 | GLSL_GROUP_OP_CAST(UMax, Max, uint_type) |
8342 | GLSL_GROUP_OP(BitwiseAnd, And) |
8343 | GLSL_GROUP_OP(BitwiseOr, Or) |
8344 | GLSL_GROUP_OP(BitwiseXor, Xor) |
8345 | GLSL_GROUP_OP(LogicalAnd, And) |
8346 | GLSL_GROUP_OP(LogicalOr, Or) |
8347 | GLSL_GROUP_OP(LogicalXor, Xor) |
8348 | #undef GLSL_GROUP_OP |
8349 | #undef GLSL_GROUP_OP_CAST |
8350 | // clang-format on |
8351 | |
8352 | case OpGroupNonUniformQuadSwap: |
8353 | { |
8354 | uint32_t direction = evaluate_constant_u32(id: ops[4]); |
8355 | if (direction == 0) |
8356 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapHorizontal" ); |
8357 | else if (direction == 1) |
8358 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapVertical" ); |
8359 | else if (direction == 2) |
8360 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapDiagonal" ); |
8361 | else |
8362 | SPIRV_CROSS_THROW("Invalid quad swap direction." ); |
8363 | break; |
8364 | } |
8365 | |
8366 | case OpGroupNonUniformQuadBroadcast: |
8367 | { |
8368 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupQuadBroadcast" ); |
8369 | break; |
8370 | } |
8371 | |
8372 | default: |
8373 | SPIRV_CROSS_THROW("Invalid opcode for subgroup." ); |
8374 | } |
8375 | |
8376 | register_control_dependent_expression(expr: id); |
8377 | } |
8378 | |
8379 | string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) |
8380 | { |
8381 | // OpBitcast can deal with pointers. |
8382 | if (out_type.pointer || in_type.pointer) |
8383 | { |
8384 | if (out_type.vecsize == 2 || in_type.vecsize == 2) |
8385 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2" ); |
8386 | return type_to_glsl(type: out_type); |
8387 | } |
8388 | |
8389 | if (out_type.basetype == in_type.basetype) |
8390 | return "" ; |
8391 | |
8392 | assert(out_type.basetype != SPIRType::Boolean); |
8393 | assert(in_type.basetype != SPIRType::Boolean); |
8394 | |
8395 | bool integral_cast = type_is_integral(type: out_type) && type_is_integral(type: in_type); |
8396 | bool same_size_cast = out_type.width == in_type.width; |
8397 | |
8398 | // Trivial bitcast case, casts between integers. |
8399 | if (integral_cast && same_size_cast) |
8400 | return type_to_glsl(type: out_type); |
8401 | |
8402 | // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). |
8403 | if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) |
8404 | return "unpack8" ; |
8405 | else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) |
8406 | return "pack16" ; |
8407 | else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) |
8408 | return "pack32" ; |
8409 | |
8410 | // Floating <-> Integer special casts. Just have to enumerate all cases. :( |
8411 | // 16-bit, 32-bit and 64-bit floats. |
8412 | if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) |
8413 | { |
8414 | if (is_legacy_es()) |
8415 | SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL." ); |
8416 | else if (!options.es && options.version < 330) |
8417 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding" ); |
8418 | return "floatBitsToUint" ; |
8419 | } |
8420 | else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) |
8421 | { |
8422 | if (is_legacy_es()) |
8423 | SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL." ); |
8424 | else if (!options.es && options.version < 330) |
8425 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding" ); |
8426 | return "floatBitsToInt" ; |
8427 | } |
8428 | else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) |
8429 | { |
8430 | if (is_legacy_es()) |
8431 | SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL." ); |
8432 | else if (!options.es && options.version < 330) |
8433 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding" ); |
8434 | return "uintBitsToFloat" ; |
8435 | } |
8436 | else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) |
8437 | { |
8438 | if (is_legacy_es()) |
8439 | SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL." ); |
8440 | else if (!options.es && options.version < 330) |
8441 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding" ); |
8442 | return "intBitsToFloat" ; |
8443 | } |
8444 | |
8445 | else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) |
8446 | return "doubleBitsToInt64" ; |
8447 | else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) |
8448 | return "doubleBitsToUint64" ; |
8449 | else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) |
8450 | return "int64BitsToDouble" ; |
8451 | else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) |
8452 | return "uint64BitsToDouble" ; |
8453 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) |
8454 | return "float16BitsToInt16" ; |
8455 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) |
8456 | return "float16BitsToUint16" ; |
8457 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) |
8458 | return "int16BitsToFloat16" ; |
8459 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) |
8460 | return "uint16BitsToFloat16" ; |
8461 | |
8462 | // And finally, some even more special purpose casts. |
8463 | if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) |
8464 | return "packUint2x32" ; |
8465 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) |
8466 | return "unpackUint2x32" ; |
8467 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) |
8468 | return "unpackFloat2x16" ; |
8469 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) |
8470 | return "packFloat2x16" ; |
8471 | else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) |
8472 | return "packInt2x16" ; |
8473 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) |
8474 | return "unpackInt2x16" ; |
8475 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) |
8476 | return "packUint2x16" ; |
8477 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) |
8478 | return "unpackUint2x16" ; |
8479 | else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) |
8480 | return "packInt4x16" ; |
8481 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) |
8482 | return "unpackInt4x16" ; |
8483 | else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) |
8484 | return "packUint4x16" ; |
8485 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) |
8486 | return "unpackUint4x16" ; |
8487 | |
8488 | return "" ; |
8489 | } |
8490 | |
8491 | string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) |
8492 | { |
8493 | auto op = bitcast_glsl_op(out_type: result_type, in_type: expression_type(id: argument)); |
8494 | if (op.empty()) |
8495 | return to_enclosed_unpacked_expression(id: argument); |
8496 | else |
8497 | return join(ts&: op, ts: "(" , ts: to_unpacked_expression(id: argument), ts: ")" ); |
8498 | } |
8499 | |
8500 | std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) |
8501 | { |
8502 | auto expr = to_expression(id: arg); |
8503 | auto &src_type = expression_type(id: arg); |
8504 | if (src_type.basetype != target_type) |
8505 | { |
8506 | auto target = src_type; |
8507 | target.basetype = target_type; |
8508 | expr = join(ts: bitcast_glsl_op(out_type: target, in_type: src_type), ts: "(" , ts&: expr, ts: ")" ); |
8509 | } |
8510 | |
8511 | return expr; |
8512 | } |
8513 | |
8514 | std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, |
8515 | const std::string &expr) |
8516 | { |
8517 | if (target_type.basetype == expr_type) |
8518 | return expr; |
8519 | |
8520 | auto src_type = target_type; |
8521 | src_type.basetype = expr_type; |
8522 | return join(ts: bitcast_glsl_op(out_type: target_type, in_type: src_type), ts: "(" , ts: expr, ts: ")" ); |
8523 | } |
8524 | |
8525 | string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) |
8526 | { |
8527 | switch (builtin) |
8528 | { |
8529 | case BuiltInPosition: |
8530 | return "gl_Position" ; |
8531 | case BuiltInPointSize: |
8532 | return "gl_PointSize" ; |
8533 | case BuiltInClipDistance: |
8534 | return "gl_ClipDistance" ; |
8535 | case BuiltInCullDistance: |
8536 | return "gl_CullDistance" ; |
8537 | case BuiltInVertexId: |
8538 | if (options.vulkan_semantics) |
8539 | SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " |
8540 | "with GL semantics." ); |
8541 | return "gl_VertexID" ; |
8542 | case BuiltInInstanceId: |
8543 | if (options.vulkan_semantics) |
8544 | { |
8545 | auto model = get_entry_point().model; |
8546 | switch (model) |
8547 | { |
8548 | case spv::ExecutionModelIntersectionKHR: |
8549 | case spv::ExecutionModelAnyHitKHR: |
8550 | case spv::ExecutionModelClosestHitKHR: |
8551 | // gl_InstanceID is allowed in these shaders. |
8552 | break; |
8553 | |
8554 | default: |
8555 | SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " |
8556 | "created with GL semantics." ); |
8557 | } |
8558 | } |
8559 | if (!options.es && options.version < 140) |
8560 | { |
8561 | require_extension_internal(ext: "GL_ARB_draw_instanced" ); |
8562 | } |
8563 | return "gl_InstanceID" ; |
8564 | case BuiltInVertexIndex: |
8565 | if (options.vulkan_semantics) |
8566 | return "gl_VertexIndex" ; |
8567 | else |
8568 | return "gl_VertexID" ; // gl_VertexID already has the base offset applied. |
8569 | case BuiltInInstanceIndex: |
8570 | if (options.vulkan_semantics) |
8571 | return "gl_InstanceIndex" ; |
8572 | |
8573 | if (!options.es && options.version < 140) |
8574 | { |
8575 | require_extension_internal(ext: "GL_ARB_draw_instanced" ); |
8576 | } |
8577 | |
8578 | if (options.vertex.support_nonzero_base_instance) |
8579 | { |
8580 | if (!options.vulkan_semantics) |
8581 | { |
8582 | // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. |
8583 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8584 | } |
8585 | return "(gl_InstanceID + SPIRV_Cross_BaseInstance)" ; // ... but not gl_InstanceID. |
8586 | } |
8587 | else |
8588 | return "gl_InstanceID" ; |
8589 | case BuiltInPrimitiveId: |
8590 | if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) |
8591 | return "gl_PrimitiveIDIn" ; |
8592 | else |
8593 | return "gl_PrimitiveID" ; |
8594 | case BuiltInInvocationId: |
8595 | return "gl_InvocationID" ; |
8596 | case BuiltInLayer: |
8597 | return "gl_Layer" ; |
8598 | case BuiltInViewportIndex: |
8599 | return "gl_ViewportIndex" ; |
8600 | case BuiltInTessLevelOuter: |
8601 | return "gl_TessLevelOuter" ; |
8602 | case BuiltInTessLevelInner: |
8603 | return "gl_TessLevelInner" ; |
8604 | case BuiltInTessCoord: |
8605 | return "gl_TessCoord" ; |
8606 | case BuiltInFragCoord: |
8607 | return "gl_FragCoord" ; |
8608 | case BuiltInPointCoord: |
8609 | return "gl_PointCoord" ; |
8610 | case BuiltInFrontFacing: |
8611 | return "gl_FrontFacing" ; |
8612 | case BuiltInFragDepth: |
8613 | return "gl_FragDepth" ; |
8614 | case BuiltInNumWorkgroups: |
8615 | return "gl_NumWorkGroups" ; |
8616 | case BuiltInWorkgroupSize: |
8617 | return "gl_WorkGroupSize" ; |
8618 | case BuiltInWorkgroupId: |
8619 | return "gl_WorkGroupID" ; |
8620 | case BuiltInLocalInvocationId: |
8621 | return "gl_LocalInvocationID" ; |
8622 | case BuiltInGlobalInvocationId: |
8623 | return "gl_GlobalInvocationID" ; |
8624 | case BuiltInLocalInvocationIndex: |
8625 | return "gl_LocalInvocationIndex" ; |
8626 | case BuiltInHelperInvocation: |
8627 | return "gl_HelperInvocation" ; |
8628 | |
8629 | case BuiltInBaseVertex: |
8630 | if (options.es) |
8631 | SPIRV_CROSS_THROW("BaseVertex not supported in ES profile." ); |
8632 | |
8633 | if (options.vulkan_semantics) |
8634 | { |
8635 | if (options.version < 460) |
8636 | { |
8637 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8638 | return "gl_BaseVertexARB" ; |
8639 | } |
8640 | return "gl_BaseVertex" ; |
8641 | } |
8642 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
8643 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8644 | return "SPIRV_Cross_BaseVertex" ; |
8645 | |
8646 | case BuiltInBaseInstance: |
8647 | if (options.es) |
8648 | SPIRV_CROSS_THROW("BaseInstance not supported in ES profile." ); |
8649 | |
8650 | if (options.vulkan_semantics) |
8651 | { |
8652 | if (options.version < 460) |
8653 | { |
8654 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8655 | return "gl_BaseInstanceARB" ; |
8656 | } |
8657 | return "gl_BaseInstance" ; |
8658 | } |
8659 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
8660 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8661 | return "SPIRV_Cross_BaseInstance" ; |
8662 | |
8663 | case BuiltInDrawIndex: |
8664 | if (options.es) |
8665 | SPIRV_CROSS_THROW("DrawIndex not supported in ES profile." ); |
8666 | |
8667 | if (options.vulkan_semantics) |
8668 | { |
8669 | if (options.version < 460) |
8670 | { |
8671 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8672 | return "gl_DrawIDARB" ; |
8673 | } |
8674 | return "gl_DrawID" ; |
8675 | } |
8676 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
8677 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters" ); |
8678 | return "gl_DrawIDARB" ; |
8679 | |
8680 | case BuiltInSampleId: |
8681 | if (options.es && options.version < 320) |
8682 | require_extension_internal(ext: "GL_OES_sample_variables" ); |
8683 | if (!options.es && options.version < 400) |
8684 | SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400." ); |
8685 | return "gl_SampleID" ; |
8686 | |
8687 | case BuiltInSampleMask: |
8688 | if (options.es && options.version < 320) |
8689 | require_extension_internal(ext: "GL_OES_sample_variables" ); |
8690 | if (!options.es && options.version < 400) |
8691 | SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400." ); |
8692 | |
8693 | if (storage == StorageClassInput) |
8694 | return "gl_SampleMaskIn" ; |
8695 | else |
8696 | return "gl_SampleMask" ; |
8697 | |
8698 | case BuiltInSamplePosition: |
8699 | if (options.es && options.version < 320) |
8700 | require_extension_internal(ext: "GL_OES_sample_variables" ); |
8701 | if (!options.es && options.version < 400) |
8702 | SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400." ); |
8703 | return "gl_SamplePosition" ; |
8704 | |
8705 | case BuiltInViewIndex: |
8706 | if (options.vulkan_semantics) |
8707 | return "gl_ViewIndex" ; |
8708 | else |
8709 | return "gl_ViewID_OVR" ; |
8710 | |
8711 | case BuiltInNumSubgroups: |
8712 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::NumSubgroups); |
8713 | return "gl_NumSubgroups" ; |
8714 | |
8715 | case BuiltInSubgroupId: |
8716 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupID); |
8717 | return "gl_SubgroupID" ; |
8718 | |
8719 | case BuiltInSubgroupSize: |
8720 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupSize); |
8721 | return "gl_SubgroupSize" ; |
8722 | |
8723 | case BuiltInSubgroupLocalInvocationId: |
8724 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInvocationID); |
8725 | return "gl_SubgroupInvocationID" ; |
8726 | |
8727 | case BuiltInSubgroupEqMask: |
8728 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
8729 | return "gl_SubgroupEqMask" ; |
8730 | |
8731 | case BuiltInSubgroupGeMask: |
8732 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
8733 | return "gl_SubgroupGeMask" ; |
8734 | |
8735 | case BuiltInSubgroupGtMask: |
8736 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
8737 | return "gl_SubgroupGtMask" ; |
8738 | |
8739 | case BuiltInSubgroupLeMask: |
8740 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
8741 | return "gl_SubgroupLeMask" ; |
8742 | |
8743 | case BuiltInSubgroupLtMask: |
8744 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
8745 | return "gl_SubgroupLtMask" ; |
8746 | |
8747 | case BuiltInLaunchIdKHR: |
8748 | return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV" ; |
8749 | case BuiltInLaunchSizeKHR: |
8750 | return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV" ; |
8751 | case BuiltInWorldRayOriginKHR: |
8752 | return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV" ; |
8753 | case BuiltInWorldRayDirectionKHR: |
8754 | return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV" ; |
8755 | case BuiltInObjectRayOriginKHR: |
8756 | return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV" ; |
8757 | case BuiltInObjectRayDirectionKHR: |
8758 | return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV" ; |
8759 | case BuiltInRayTminKHR: |
8760 | return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV" ; |
8761 | case BuiltInRayTmaxKHR: |
8762 | return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV" ; |
8763 | case BuiltInInstanceCustomIndexKHR: |
8764 | return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV" ; |
8765 | case BuiltInObjectToWorldKHR: |
8766 | return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV" ; |
8767 | case BuiltInWorldToObjectKHR: |
8768 | return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV" ; |
8769 | case BuiltInHitTNV: |
8770 | // gl_HitTEXT is an alias of RayTMax in KHR. |
8771 | return "gl_HitTNV" ; |
8772 | case BuiltInHitKindKHR: |
8773 | return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV" ; |
8774 | case BuiltInIncomingRayFlagsKHR: |
8775 | return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV" ; |
8776 | |
8777 | case BuiltInBaryCoordKHR: |
8778 | { |
8779 | if (options.es && options.version < 320) |
8780 | SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320." ); |
8781 | else if (!options.es && options.version < 450) |
8782 | SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450." ); |
8783 | |
8784 | if (barycentric_is_nv) |
8785 | { |
8786 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric" ); |
8787 | return "gl_BaryCoordNV" ; |
8788 | } |
8789 | else |
8790 | { |
8791 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric" ); |
8792 | return "gl_BaryCoordEXT" ; |
8793 | } |
8794 | } |
8795 | |
8796 | case BuiltInBaryCoordNoPerspNV: |
8797 | { |
8798 | if (options.es && options.version < 320) |
8799 | SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320." ); |
8800 | else if (!options.es && options.version < 450) |
8801 | SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450." ); |
8802 | |
8803 | if (barycentric_is_nv) |
8804 | { |
8805 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric" ); |
8806 | return "gl_BaryCoordNoPerspNV" ; |
8807 | } |
8808 | else |
8809 | { |
8810 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric" ); |
8811 | return "gl_BaryCoordNoPerspEXT" ; |
8812 | } |
8813 | } |
8814 | |
8815 | case BuiltInFragStencilRefEXT: |
8816 | { |
8817 | if (!options.es) |
8818 | { |
8819 | require_extension_internal(ext: "GL_ARB_shader_stencil_export" ); |
8820 | return "gl_FragStencilRefARB" ; |
8821 | } |
8822 | else |
8823 | SPIRV_CROSS_THROW("Stencil export not supported in GLES." ); |
8824 | } |
8825 | |
8826 | case BuiltInPrimitiveShadingRateKHR: |
8827 | { |
8828 | if (!options.vulkan_semantics) |
8829 | SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL." ); |
8830 | require_extension_internal(ext: "GL_EXT_fragment_shading_rate" ); |
8831 | return "gl_PrimitiveShadingRateEXT" ; |
8832 | } |
8833 | |
8834 | case BuiltInShadingRateKHR: |
8835 | { |
8836 | if (!options.vulkan_semantics) |
8837 | SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL." ); |
8838 | require_extension_internal(ext: "GL_EXT_fragment_shading_rate" ); |
8839 | return "gl_ShadingRateEXT" ; |
8840 | } |
8841 | |
8842 | case BuiltInDeviceIndex: |
8843 | if (!options.vulkan_semantics) |
8844 | SPIRV_CROSS_THROW("Need Vulkan semantics for device group support." ); |
8845 | require_extension_internal(ext: "GL_EXT_device_group" ); |
8846 | return "gl_DeviceIndex" ; |
8847 | |
8848 | case BuiltInFullyCoveredEXT: |
8849 | if (!options.es) |
8850 | require_extension_internal(ext: "GL_NV_conservative_raster_underestimation" ); |
8851 | else |
8852 | SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation." ); |
8853 | return "gl_FragFullyCoveredNV" ; |
8854 | |
8855 | default: |
8856 | return join(ts: "gl_BuiltIn_" , ts: convert_to_string(t: builtin)); |
8857 | } |
8858 | } |
8859 | |
8860 | const char *CompilerGLSL::index_to_swizzle(uint32_t index) |
8861 | { |
8862 | switch (index) |
8863 | { |
8864 | case 0: |
8865 | return "x" ; |
8866 | case 1: |
8867 | return "y" ; |
8868 | case 2: |
8869 | return "z" ; |
8870 | case 3: |
8871 | return "w" ; |
8872 | default: |
8873 | return "x" ; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. |
8874 | } |
8875 | } |
8876 | |
8877 | void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, |
8878 | AccessChainFlags flags, bool & /*access_chain_is_arrayed*/, |
8879 | uint32_t index) |
8880 | { |
8881 | bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; |
8882 | bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; |
8883 | |
8884 | expr += "[" ; |
8885 | |
8886 | if (index_is_literal) |
8887 | expr += convert_to_string(t: index); |
8888 | else |
8889 | expr += to_unpacked_expression(id: index, register_expression_read); |
8890 | |
8891 | expr += "]" ; |
8892 | } |
8893 | |
8894 | bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) |
8895 | { |
8896 | return true; |
8897 | } |
8898 | |
8899 | string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, |
8900 | AccessChainFlags flags, AccessChainMeta *meta) |
8901 | { |
8902 | string expr; |
8903 | |
8904 | bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; |
8905 | bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; |
8906 | bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; |
8907 | bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; |
8908 | bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; |
8909 | bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; |
8910 | |
8911 | if (!chain_only) |
8912 | { |
8913 | // We handle transpose explicitly, so don't resolve that here. |
8914 | auto *e = maybe_get<SPIRExpression>(id: base); |
8915 | bool old_transpose = e && e->need_transpose; |
8916 | if (e) |
8917 | e->need_transpose = false; |
8918 | expr = to_enclosed_expression(id: base, register_expression_read); |
8919 | if (e) |
8920 | e->need_transpose = old_transpose; |
8921 | } |
8922 | |
8923 | // Start traversing type hierarchy at the proper non-pointer types, |
8924 | // but keep type_id referencing the original pointer for use below. |
8925 | uint32_t type_id = expression_type_id(id: base); |
8926 | |
8927 | if (!backend.native_pointers) |
8928 | { |
8929 | if (ptr_chain) |
8930 | SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain." ); |
8931 | |
8932 | // Wrapped buffer reference pointer types will need to poke into the internal "value" member before |
8933 | // continuing the access chain. |
8934 | if (should_dereference(id: base)) |
8935 | { |
8936 | auto &type = get<SPIRType>(id: type_id); |
8937 | expr = dereference_expression(expr_type: type, expr); |
8938 | } |
8939 | } |
8940 | |
8941 | const auto *type = &get_pointee_type(type_id); |
8942 | |
8943 | bool access_chain_is_arrayed = expr.find_first_of(c: '[') != string::npos; |
8944 | bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(id: base); |
8945 | bool is_packed = has_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
8946 | uint32_t physical_type = get_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypeID); |
8947 | bool is_invariant = has_decoration(id: base, decoration: DecorationInvariant); |
8948 | bool relaxed_precision = has_decoration(id: base, decoration: DecorationRelaxedPrecision); |
8949 | bool pending_array_enclose = false; |
8950 | bool dimension_flatten = false; |
8951 | |
8952 | const auto append_index = [&](uint32_t index, bool is_literal) { |
8953 | AccessChainFlags mod_flags = flags; |
8954 | if (!is_literal) |
8955 | mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; |
8956 | access_chain_internal_append_index(expr, base, type, flags: mod_flags, access_chain_is_arrayed, index); |
8957 | }; |
8958 | |
8959 | for (uint32_t i = 0; i < count; i++) |
8960 | { |
8961 | uint32_t index = indices[i]; |
8962 | |
8963 | bool is_literal = index_is_literal; |
8964 | if (is_literal && msb_is_id && (index >> 31u) != 0u) |
8965 | { |
8966 | is_literal = false; |
8967 | index &= 0x7fffffffu; |
8968 | } |
8969 | |
8970 | // Pointer chains |
8971 | if (ptr_chain && i == 0) |
8972 | { |
8973 | // If we are flattening multidimensional arrays, only create opening bracket on first |
8974 | // array index. |
8975 | if (options.flatten_multidimensional_arrays) |
8976 | { |
8977 | dimension_flatten = type->array.size() >= 1; |
8978 | pending_array_enclose = dimension_flatten; |
8979 | if (pending_array_enclose) |
8980 | expr += "[" ; |
8981 | } |
8982 | |
8983 | if (options.flatten_multidimensional_arrays && dimension_flatten) |
8984 | { |
8985 | // If we are flattening multidimensional arrays, do manual stride computation. |
8986 | if (is_literal) |
8987 | expr += convert_to_string(t: index); |
8988 | else |
8989 | expr += to_enclosed_expression(id: index, register_expression_read); |
8990 | |
8991 | for (auto j = uint32_t(type->array.size()); j; j--) |
8992 | { |
8993 | expr += " * " ; |
8994 | expr += enclose_expression(expr: to_array_size(type: *type, index: j - 1)); |
8995 | } |
8996 | |
8997 | if (type->array.empty()) |
8998 | pending_array_enclose = false; |
8999 | else |
9000 | expr += " + " ; |
9001 | |
9002 | if (!pending_array_enclose) |
9003 | expr += "]" ; |
9004 | } |
9005 | else |
9006 | { |
9007 | append_index(index, is_literal); |
9008 | } |
9009 | |
9010 | if (type->basetype == SPIRType::ControlPointArray) |
9011 | { |
9012 | type_id = type->parent_type; |
9013 | type = &get<SPIRType>(id: type_id); |
9014 | } |
9015 | |
9016 | access_chain_is_arrayed = true; |
9017 | } |
9018 | // Arrays |
9019 | else if (!type->array.empty()) |
9020 | { |
9021 | // If we are flattening multidimensional arrays, only create opening bracket on first |
9022 | // array index. |
9023 | if (options.flatten_multidimensional_arrays && !pending_array_enclose) |
9024 | { |
9025 | dimension_flatten = type->array.size() > 1; |
9026 | pending_array_enclose = dimension_flatten; |
9027 | if (pending_array_enclose) |
9028 | expr += "[" ; |
9029 | } |
9030 | |
9031 | assert(type->parent_type); |
9032 | |
9033 | auto *var = maybe_get<SPIRVariable>(id: base); |
9034 | if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(var: *var) && |
9035 | !has_decoration(id: type->self, decoration: DecorationBlock)) |
9036 | { |
9037 | // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. |
9038 | // Normally, these variables live in blocks when compiled from GLSL, |
9039 | // but HLSL seems to just emit straight arrays here. |
9040 | // We must pretend this access goes through gl_in/gl_out arrays |
9041 | // to be able to access certain builtins as arrays. |
9042 | auto builtin = ir.meta[base].decoration.builtin_type; |
9043 | switch (builtin) |
9044 | { |
9045 | // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. |
9046 | // case BuiltInClipDistance: |
9047 | case BuiltInPosition: |
9048 | case BuiltInPointSize: |
9049 | if (var->storage == StorageClassInput) |
9050 | expr = join(ts: "gl_in[" , ts: to_expression(id: index, register_expression_read), ts: "]." , ts&: expr); |
9051 | else if (var->storage == StorageClassOutput) |
9052 | expr = join(ts: "gl_out[" , ts: to_expression(id: index, register_expression_read), ts: "]." , ts&: expr); |
9053 | else |
9054 | append_index(index, is_literal); |
9055 | break; |
9056 | |
9057 | default: |
9058 | append_index(index, is_literal); |
9059 | break; |
9060 | } |
9061 | } |
9062 | else if (options.flatten_multidimensional_arrays && dimension_flatten) |
9063 | { |
9064 | // If we are flattening multidimensional arrays, do manual stride computation. |
9065 | auto &parent_type = get<SPIRType>(id: type->parent_type); |
9066 | |
9067 | if (is_literal) |
9068 | expr += convert_to_string(t: index); |
9069 | else |
9070 | expr += to_enclosed_expression(id: index, register_expression_read); |
9071 | |
9072 | for (auto j = uint32_t(parent_type.array.size()); j; j--) |
9073 | { |
9074 | expr += " * " ; |
9075 | expr += enclose_expression(expr: to_array_size(type: parent_type, index: j - 1)); |
9076 | } |
9077 | |
9078 | if (parent_type.array.empty()) |
9079 | pending_array_enclose = false; |
9080 | else |
9081 | expr += " + " ; |
9082 | |
9083 | if (!pending_array_enclose) |
9084 | expr += "]" ; |
9085 | } |
9086 | // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. |
9087 | // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. |
9088 | else if (!builtin_translates_to_nonarray(builtin: BuiltIn(get_decoration(id: base, decoration: DecorationBuiltIn)))) |
9089 | { |
9090 | append_index(index, is_literal); |
9091 | } |
9092 | |
9093 | type_id = type->parent_type; |
9094 | type = &get<SPIRType>(id: type_id); |
9095 | |
9096 | access_chain_is_arrayed = true; |
9097 | } |
9098 | // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping. |
9099 | // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. |
9100 | else if (type->basetype == SPIRType::Struct) |
9101 | { |
9102 | if (!is_literal) |
9103 | index = evaluate_constant_u32(id: index); |
9104 | |
9105 | if (index < uint32_t(type->member_type_index_redirection.size())) |
9106 | index = type->member_type_index_redirection[index]; |
9107 | |
9108 | if (index >= type->member_types.size()) |
9109 | SPIRV_CROSS_THROW("Member index is out of bounds!" ); |
9110 | |
9111 | BuiltIn builtin; |
9112 | if (is_member_builtin(type: *type, index, builtin: &builtin) && access_chain_needs_stage_io_builtin_translation(base)) |
9113 | { |
9114 | if (access_chain_is_arrayed) |
9115 | { |
9116 | expr += "." ; |
9117 | expr += builtin_to_glsl(builtin, storage: type->storage); |
9118 | } |
9119 | else |
9120 | expr = builtin_to_glsl(builtin, storage: type->storage); |
9121 | } |
9122 | else |
9123 | { |
9124 | // If the member has a qualified name, use it as the entire chain |
9125 | string qual_mbr_name = get_member_qualified_name(type_id, index); |
9126 | if (!qual_mbr_name.empty()) |
9127 | expr = qual_mbr_name; |
9128 | else if (flatten_member_reference) |
9129 | expr += join(ts: "_" , ts: to_member_name(type: *type, index)); |
9130 | else |
9131 | expr += to_member_reference(base, type: *type, index, ptr_chain); |
9132 | } |
9133 | |
9134 | if (has_member_decoration(id: type->self, index, decoration: DecorationInvariant)) |
9135 | is_invariant = true; |
9136 | if (has_member_decoration(id: type->self, index, decoration: DecorationRelaxedPrecision)) |
9137 | relaxed_precision = true; |
9138 | |
9139 | is_packed = member_is_packed_physical_type(type: *type, index); |
9140 | if (member_is_remapped_physical_type(type: *type, index)) |
9141 | physical_type = get_extended_member_decoration(type: type->self, index, decoration: SPIRVCrossDecorationPhysicalTypeID); |
9142 | else |
9143 | physical_type = 0; |
9144 | |
9145 | row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(type: *type, index); |
9146 | type = &get<SPIRType>(id: type->member_types[index]); |
9147 | } |
9148 | // Matrix -> Vector |
9149 | else if (type->columns > 1) |
9150 | { |
9151 | // If we have a row-major matrix here, we need to defer any transpose in case this access chain |
9152 | // is used to store a column. We can resolve it right here and now if we access a scalar directly, |
9153 | // by flipping indexing order of the matrix. |
9154 | |
9155 | expr += "[" ; |
9156 | if (is_literal) |
9157 | expr += convert_to_string(t: index); |
9158 | else |
9159 | expr += to_unpacked_expression(id: index, register_expression_read); |
9160 | expr += "]" ; |
9161 | |
9162 | type_id = type->parent_type; |
9163 | type = &get<SPIRType>(id: type_id); |
9164 | } |
9165 | // Vector -> Scalar |
9166 | else if (type->vecsize > 1) |
9167 | { |
9168 | string deferred_index; |
9169 | if (row_major_matrix_needs_conversion) |
9170 | { |
9171 | // Flip indexing order. |
9172 | auto column_index = expr.find_last_of(c: '['); |
9173 | if (column_index != string::npos) |
9174 | { |
9175 | deferred_index = expr.substr(pos: column_index); |
9176 | expr.resize(n: column_index); |
9177 | } |
9178 | } |
9179 | |
9180 | // Internally, access chain implementation can also be used on composites, |
9181 | // ignore scalar access workarounds in this case. |
9182 | StorageClass effective_storage = StorageClassGeneric; |
9183 | bool ignore_potential_sliced_writes = false; |
9184 | if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) |
9185 | { |
9186 | if (expression_type(id: base).pointer) |
9187 | effective_storage = get_expression_effective_storage_class(ptr: base); |
9188 | |
9189 | // Special consideration for control points. |
9190 | // Control points can only be written by InvocationID, so there is no need |
9191 | // to consider scalar access chains here. |
9192 | // Cleans up some cases where it's very painful to determine the accurate storage class |
9193 | // since blocks can be partially masked ... |
9194 | auto *var = maybe_get_backing_variable(chain: base); |
9195 | if (var && var->storage == StorageClassOutput && |
9196 | get_execution_model() == ExecutionModelTessellationControl && |
9197 | !has_decoration(id: var->self, decoration: DecorationPatch)) |
9198 | { |
9199 | ignore_potential_sliced_writes = true; |
9200 | } |
9201 | } |
9202 | else |
9203 | ignore_potential_sliced_writes = true; |
9204 | |
9205 | if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) |
9206 | { |
9207 | // On some backends, we might not be able to safely access individual scalars in a vector. |
9208 | // To work around this, we might have to cast the access chain reference to something which can, |
9209 | // like a pointer to scalar, which we can then index into. |
9210 | prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage, |
9211 | is_packed); |
9212 | } |
9213 | |
9214 | if (is_literal) |
9215 | { |
9216 | bool out_of_bounds = (index >= type->vecsize); |
9217 | |
9218 | if (!is_packed && !row_major_matrix_needs_conversion) |
9219 | { |
9220 | expr += "." ; |
9221 | expr += index_to_swizzle(index: out_of_bounds ? 0 : index); |
9222 | } |
9223 | else |
9224 | { |
9225 | // For packed vectors, we can only access them as an array, not by swizzle. |
9226 | expr += join(ts: "[" , ts: out_of_bounds ? 0 : index, ts: "]" ); |
9227 | } |
9228 | } |
9229 | else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) |
9230 | { |
9231 | auto &c = get<SPIRConstant>(id: index); |
9232 | bool out_of_bounds = (c.scalar() >= type->vecsize); |
9233 | |
9234 | if (c.specialization) |
9235 | { |
9236 | // If the index is a spec constant, we cannot turn extract into a swizzle. |
9237 | expr += join(ts: "[" , ts: out_of_bounds ? "0" : to_expression(id: index), ts: "]" ); |
9238 | } |
9239 | else |
9240 | { |
9241 | expr += "." ; |
9242 | expr += index_to_swizzle(index: out_of_bounds ? 0 : c.scalar()); |
9243 | } |
9244 | } |
9245 | else |
9246 | { |
9247 | expr += "[" ; |
9248 | expr += to_unpacked_expression(id: index, register_expression_read); |
9249 | expr += "]" ; |
9250 | } |
9251 | |
9252 | if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) |
9253 | { |
9254 | prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage, |
9255 | is_packed); |
9256 | } |
9257 | |
9258 | expr += deferred_index; |
9259 | row_major_matrix_needs_conversion = false; |
9260 | |
9261 | is_packed = false; |
9262 | physical_type = 0; |
9263 | type_id = type->parent_type; |
9264 | type = &get<SPIRType>(id: type_id); |
9265 | } |
9266 | else if (!backend.allow_truncated_access_chain) |
9267 | SPIRV_CROSS_THROW("Cannot subdivide a scalar value!" ); |
9268 | } |
9269 | |
9270 | if (pending_array_enclose) |
9271 | { |
9272 | SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " |
9273 | "but the access chain was terminated in the middle of a multidimensional array. " |
9274 | "This is not supported." ); |
9275 | } |
9276 | |
9277 | if (meta) |
9278 | { |
9279 | meta->need_transpose = row_major_matrix_needs_conversion; |
9280 | meta->storage_is_packed = is_packed; |
9281 | meta->storage_is_invariant = is_invariant; |
9282 | meta->storage_physical_type = physical_type; |
9283 | meta->relaxed_precision = relaxed_precision; |
9284 | } |
9285 | |
9286 | return expr; |
9287 | } |
9288 | |
9289 | void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) |
9290 | { |
9291 | } |
9292 | |
9293 | string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) |
9294 | { |
9295 | auto ret = join(ts: basename, ts: "_" , ts: to_member_name(type, index)); |
9296 | ParsedIR::sanitize_underscores(str&: ret); |
9297 | return ret; |
9298 | } |
9299 | |
9300 | string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, |
9301 | AccessChainMeta *meta, bool ptr_chain) |
9302 | { |
9303 | if (flattened_buffer_blocks.count(x: base)) |
9304 | { |
9305 | uint32_t matrix_stride = 0; |
9306 | uint32_t array_stride = 0; |
9307 | bool need_transpose = false; |
9308 | flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset: 0, word_stride: 16, need_transpose: &need_transpose, matrix_stride: &matrix_stride, |
9309 | array_stride: &array_stride, ptr_chain); |
9310 | |
9311 | if (meta) |
9312 | { |
9313 | meta->need_transpose = target_type.columns > 1 && need_transpose; |
9314 | meta->storage_is_packed = false; |
9315 | } |
9316 | |
9317 | return flattened_access_chain(base, indices, count, target_type, offset: 0, matrix_stride, array_stride, |
9318 | need_transpose); |
9319 | } |
9320 | else if (flattened_structs.count(x: base) && count > 0) |
9321 | { |
9322 | AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; |
9323 | if (ptr_chain) |
9324 | flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; |
9325 | |
9326 | if (flattened_structs[base]) |
9327 | { |
9328 | flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; |
9329 | if (meta) |
9330 | meta->flattened_struct = target_type.basetype == SPIRType::Struct; |
9331 | } |
9332 | |
9333 | auto chain = access_chain_internal(base, indices, count, flags, meta: nullptr).substr(pos: 1); |
9334 | if (meta) |
9335 | { |
9336 | meta->need_transpose = false; |
9337 | meta->storage_is_packed = false; |
9338 | } |
9339 | |
9340 | auto basename = to_flattened_access_chain_expression(id: base); |
9341 | auto ret = join(ts&: basename, ts: "_" , ts&: chain); |
9342 | ParsedIR::sanitize_underscores(str&: ret); |
9343 | return ret; |
9344 | } |
9345 | else |
9346 | { |
9347 | AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; |
9348 | if (ptr_chain) |
9349 | flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; |
9350 | return access_chain_internal(base, indices, count, flags, meta); |
9351 | } |
9352 | } |
9353 | |
9354 | string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) |
9355 | { |
9356 | auto expr = type_to_glsl_constructor(type); |
9357 | expr += '('; |
9358 | |
9359 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
9360 | { |
9361 | if (i) |
9362 | expr += ", " ; |
9363 | |
9364 | auto &member_type = get<SPIRType>(id: type.member_types[i]); |
9365 | if (member_type.basetype == SPIRType::Struct) |
9366 | expr += load_flattened_struct(basename: to_flattened_struct_member(basename, type, index: i), type: member_type); |
9367 | else |
9368 | expr += to_flattened_struct_member(basename, type, index: i); |
9369 | } |
9370 | expr += ')'; |
9371 | return expr; |
9372 | } |
9373 | |
9374 | std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) |
9375 | { |
9376 | // Do not use to_expression as that will unflatten access chains. |
9377 | string basename; |
9378 | if (const auto *var = maybe_get<SPIRVariable>(id)) |
9379 | basename = to_name(id: var->self); |
9380 | else if (const auto *expr = maybe_get<SPIRExpression>(id)) |
9381 | basename = expr->expression; |
9382 | else |
9383 | basename = to_expression(id); |
9384 | |
9385 | return basename; |
9386 | } |
9387 | |
9388 | void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, |
9389 | const SmallVector<uint32_t> &indices) |
9390 | { |
9391 | SmallVector<uint32_t> sub_indices = indices; |
9392 | sub_indices.push_back(t: 0); |
9393 | |
9394 | auto *member_type = &type; |
9395 | for (auto &index : indices) |
9396 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
9397 | |
9398 | for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) |
9399 | { |
9400 | sub_indices.back() = i; |
9401 | auto lhs = join(ts: basename, ts: "_" , ts: to_member_name(type: *member_type, index: i)); |
9402 | ParsedIR::sanitize_underscores(str&: lhs); |
9403 | |
9404 | if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct) |
9405 | { |
9406 | store_flattened_struct(basename: lhs, rhs_id, type, indices: sub_indices); |
9407 | } |
9408 | else |
9409 | { |
9410 | auto rhs = to_expression(id: rhs_id) + to_multi_member_reference(type, indices: sub_indices); |
9411 | statement(ts&: lhs, ts: " = " , ts&: rhs, ts: ";" ); |
9412 | } |
9413 | } |
9414 | } |
9415 | |
9416 | void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) |
9417 | { |
9418 | auto &type = expression_type(id: lhs_id); |
9419 | auto basename = to_flattened_access_chain_expression(id: lhs_id); |
9420 | store_flattened_struct(basename, rhs_id: value, type, indices: {}); |
9421 | } |
9422 | |
9423 | std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, |
9424 | const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, |
9425 | uint32_t /* array_stride */, bool need_transpose) |
9426 | { |
9427 | if (!target_type.array.empty()) |
9428 | SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened" ); |
9429 | else if (target_type.basetype == SPIRType::Struct) |
9430 | return flattened_access_chain_struct(base, indices, count, target_type, offset); |
9431 | else if (target_type.columns > 1) |
9432 | return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); |
9433 | else |
9434 | return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); |
9435 | } |
9436 | |
9437 | std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, |
9438 | const SPIRType &target_type, uint32_t offset) |
9439 | { |
9440 | std::string expr; |
9441 | |
9442 | if (backend.can_declare_struct_inline) |
9443 | { |
9444 | expr += type_to_glsl_constructor(type: target_type); |
9445 | expr += "(" ; |
9446 | } |
9447 | else |
9448 | expr += "{" ; |
9449 | |
9450 | for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) |
9451 | { |
9452 | if (i != 0) |
9453 | expr += ", " ; |
9454 | |
9455 | const SPIRType &member_type = get<SPIRType>(id: target_type.member_types[i]); |
9456 | uint32_t member_offset = type_struct_member_offset(type: target_type, index: i); |
9457 | |
9458 | // The access chain terminates at the struct, so we need to find matrix strides and row-major information |
9459 | // ahead of time. |
9460 | bool need_transpose = false; |
9461 | uint32_t matrix_stride = 0; |
9462 | if (member_type.columns > 1) |
9463 | { |
9464 | need_transpose = combined_decoration_for_member(type: target_type, index: i).get(bit: DecorationRowMajor); |
9465 | matrix_stride = type_struct_member_matrix_stride(type: target_type, index: i); |
9466 | } |
9467 | |
9468 | auto tmp = flattened_access_chain(base, indices, count, target_type: member_type, offset: offset + member_offset, matrix_stride, |
9469 | 0 /* array_stride */, need_transpose); |
9470 | |
9471 | // Cannot forward transpositions, so resolve them here. |
9472 | if (need_transpose) |
9473 | expr += convert_row_major_matrix(exp_str: tmp, exp_type: member_type, physical_type_id: 0, is_packed: false); |
9474 | else |
9475 | expr += tmp; |
9476 | } |
9477 | |
9478 | expr += backend.can_declare_struct_inline ? ")" : "}" ; |
9479 | |
9480 | return expr; |
9481 | } |
9482 | |
9483 | std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, |
9484 | const SPIRType &target_type, uint32_t offset, |
9485 | uint32_t matrix_stride, bool need_transpose) |
9486 | { |
9487 | assert(matrix_stride); |
9488 | SPIRType tmp_type = target_type; |
9489 | if (need_transpose) |
9490 | swap(a&: tmp_type.vecsize, b&: tmp_type.columns); |
9491 | |
9492 | std::string expr; |
9493 | |
9494 | expr += type_to_glsl_constructor(type: tmp_type); |
9495 | expr += "(" ; |
9496 | |
9497 | for (uint32_t i = 0; i < tmp_type.columns; i++) |
9498 | { |
9499 | if (i != 0) |
9500 | expr += ", " ; |
9501 | |
9502 | expr += flattened_access_chain_vector(base, indices, count, target_type: tmp_type, offset: offset + i * matrix_stride, matrix_stride, |
9503 | /* need_transpose= */ false); |
9504 | } |
9505 | |
9506 | expr += ")" ; |
9507 | |
9508 | return expr; |
9509 | } |
9510 | |
9511 | std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, |
9512 | const SPIRType &target_type, uint32_t offset, |
9513 | uint32_t matrix_stride, bool need_transpose) |
9514 | { |
9515 | auto result = flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset, word_stride: 16); |
9516 | |
9517 | auto buffer_name = to_name(id: expression_type(id: base).self); |
9518 | |
9519 | if (need_transpose) |
9520 | { |
9521 | std::string expr; |
9522 | |
9523 | if (target_type.vecsize > 1) |
9524 | { |
9525 | expr += type_to_glsl_constructor(type: target_type); |
9526 | expr += "(" ; |
9527 | } |
9528 | |
9529 | for (uint32_t i = 0; i < target_type.vecsize; ++i) |
9530 | { |
9531 | if (i != 0) |
9532 | expr += ", " ; |
9533 | |
9534 | uint32_t component_offset = result.second + i * matrix_stride; |
9535 | |
9536 | assert(component_offset % (target_type.width / 8) == 0); |
9537 | uint32_t index = component_offset / (target_type.width / 8); |
9538 | |
9539 | expr += buffer_name; |
9540 | expr += "[" ; |
9541 | expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + |
9542 | expr += convert_to_string(t: index / 4); |
9543 | expr += "]" ; |
9544 | |
9545 | expr += vector_swizzle(vecsize: 1, index: index % 4); |
9546 | } |
9547 | |
9548 | if (target_type.vecsize > 1) |
9549 | { |
9550 | expr += ")" ; |
9551 | } |
9552 | |
9553 | return expr; |
9554 | } |
9555 | else |
9556 | { |
9557 | assert(result.second % (target_type.width / 8) == 0); |
9558 | uint32_t index = result.second / (target_type.width / 8); |
9559 | |
9560 | std::string expr; |
9561 | |
9562 | expr += buffer_name; |
9563 | expr += "[" ; |
9564 | expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + |
9565 | expr += convert_to_string(t: index / 4); |
9566 | expr += "]" ; |
9567 | |
9568 | expr += vector_swizzle(vecsize: target_type.vecsize, index: index % 4); |
9569 | |
9570 | return expr; |
9571 | } |
9572 | } |
9573 | |
9574 | std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset( |
9575 | const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, |
9576 | bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) |
9577 | { |
9578 | // Start traversing type hierarchy at the proper non-pointer types. |
9579 | const auto *type = &get_pointee_type(type: basetype); |
9580 | |
9581 | std::string expr; |
9582 | |
9583 | // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. |
9584 | bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; |
9585 | uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; |
9586 | uint32_t array_stride = out_array_stride ? *out_array_stride : 0; |
9587 | |
9588 | for (uint32_t i = 0; i < count; i++) |
9589 | { |
9590 | uint32_t index = indices[i]; |
9591 | |
9592 | // Pointers |
9593 | if (ptr_chain && i == 0) |
9594 | { |
9595 | // Here, the pointer type will be decorated with an array stride. |
9596 | array_stride = get_decoration(id: basetype.self, decoration: DecorationArrayStride); |
9597 | if (!array_stride) |
9598 | SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block." ); |
9599 | |
9600 | auto *constant = maybe_get<SPIRConstant>(id: index); |
9601 | if (constant) |
9602 | { |
9603 | // Constant array access. |
9604 | offset += constant->scalar() * array_stride; |
9605 | } |
9606 | else |
9607 | { |
9608 | // Dynamic array access. |
9609 | if (array_stride % word_stride) |
9610 | { |
9611 | SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " |
9612 | "of a 4-component vector. " |
9613 | "Likely culprit here is a float or vec2 array inside a push " |
9614 | "constant block which is std430. " |
9615 | "This cannot be flattened. Try using std140 layout instead." ); |
9616 | } |
9617 | |
9618 | expr += to_enclosed_expression(id: index); |
9619 | expr += " * " ; |
9620 | expr += convert_to_string(t: array_stride / word_stride); |
9621 | expr += " + " ; |
9622 | } |
9623 | } |
9624 | // Arrays |
9625 | else if (!type->array.empty()) |
9626 | { |
9627 | auto *constant = maybe_get<SPIRConstant>(id: index); |
9628 | if (constant) |
9629 | { |
9630 | // Constant array access. |
9631 | offset += constant->scalar() * array_stride; |
9632 | } |
9633 | else |
9634 | { |
9635 | // Dynamic array access. |
9636 | if (array_stride % word_stride) |
9637 | { |
9638 | SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " |
9639 | "of a 4-component vector. " |
9640 | "Likely culprit here is a float or vec2 array inside a push " |
9641 | "constant block which is std430. " |
9642 | "This cannot be flattened. Try using std140 layout instead." ); |
9643 | } |
9644 | |
9645 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
9646 | expr += " * " ; |
9647 | expr += convert_to_string(t: array_stride / word_stride); |
9648 | expr += " + " ; |
9649 | } |
9650 | |
9651 | uint32_t parent_type = type->parent_type; |
9652 | type = &get<SPIRType>(id: parent_type); |
9653 | |
9654 | if (!type->array.empty()) |
9655 | array_stride = get_decoration(id: parent_type, decoration: DecorationArrayStride); |
9656 | } |
9657 | // For structs, the index refers to a constant, which indexes into the members. |
9658 | // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. |
9659 | else if (type->basetype == SPIRType::Struct) |
9660 | { |
9661 | index = evaluate_constant_u32(id: index); |
9662 | |
9663 | if (index >= type->member_types.size()) |
9664 | SPIRV_CROSS_THROW("Member index is out of bounds!" ); |
9665 | |
9666 | offset += type_struct_member_offset(type: *type, index); |
9667 | |
9668 | auto &struct_type = *type; |
9669 | type = &get<SPIRType>(id: type->member_types[index]); |
9670 | |
9671 | if (type->columns > 1) |
9672 | { |
9673 | matrix_stride = type_struct_member_matrix_stride(type: struct_type, index); |
9674 | row_major_matrix_needs_conversion = |
9675 | combined_decoration_for_member(type: struct_type, index).get(bit: DecorationRowMajor); |
9676 | } |
9677 | else |
9678 | row_major_matrix_needs_conversion = false; |
9679 | |
9680 | if (!type->array.empty()) |
9681 | array_stride = type_struct_member_array_stride(type: struct_type, index); |
9682 | } |
9683 | // Matrix -> Vector |
9684 | else if (type->columns > 1) |
9685 | { |
9686 | auto *constant = maybe_get<SPIRConstant>(id: index); |
9687 | if (constant) |
9688 | { |
9689 | index = evaluate_constant_u32(id: index); |
9690 | offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); |
9691 | } |
9692 | else |
9693 | { |
9694 | uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; |
9695 | // Dynamic array access. |
9696 | if (indexing_stride % word_stride) |
9697 | { |
9698 | SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " |
9699 | "4-component vector. " |
9700 | "Likely culprit here is a row-major matrix being accessed dynamically. " |
9701 | "This cannot be flattened. Try using std140 layout instead." ); |
9702 | } |
9703 | |
9704 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
9705 | expr += " * " ; |
9706 | expr += convert_to_string(t: indexing_stride / word_stride); |
9707 | expr += " + " ; |
9708 | } |
9709 | |
9710 | type = &get<SPIRType>(id: type->parent_type); |
9711 | } |
9712 | // Vector -> Scalar |
9713 | else if (type->vecsize > 1) |
9714 | { |
9715 | auto *constant = maybe_get<SPIRConstant>(id: index); |
9716 | if (constant) |
9717 | { |
9718 | index = evaluate_constant_u32(id: index); |
9719 | offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); |
9720 | } |
9721 | else |
9722 | { |
9723 | uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); |
9724 | |
9725 | // Dynamic array access. |
9726 | if (indexing_stride % word_stride) |
9727 | { |
9728 | SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " |
9729 | "size of a 4-component vector. " |
9730 | "This cannot be flattened in legacy targets." ); |
9731 | } |
9732 | |
9733 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
9734 | expr += " * " ; |
9735 | expr += convert_to_string(t: indexing_stride / word_stride); |
9736 | expr += " + " ; |
9737 | } |
9738 | |
9739 | type = &get<SPIRType>(id: type->parent_type); |
9740 | } |
9741 | else |
9742 | SPIRV_CROSS_THROW("Cannot subdivide a scalar value!" ); |
9743 | } |
9744 | |
9745 | if (need_transpose) |
9746 | *need_transpose = row_major_matrix_needs_conversion; |
9747 | if (out_matrix_stride) |
9748 | *out_matrix_stride = matrix_stride; |
9749 | if (out_array_stride) |
9750 | *out_array_stride = array_stride; |
9751 | |
9752 | return std::make_pair(x&: expr, y&: offset); |
9753 | } |
9754 | |
9755 | bool CompilerGLSL::should_dereference(uint32_t id) |
9756 | { |
9757 | const auto &type = expression_type(id); |
9758 | // Non-pointer expressions don't need to be dereferenced. |
9759 | if (!type.pointer) |
9760 | return false; |
9761 | |
9762 | // Handles shouldn't be dereferenced either. |
9763 | if (!expression_is_lvalue(id)) |
9764 | return false; |
9765 | |
9766 | // If id is a variable but not a phi variable, we should not dereference it. |
9767 | if (auto *var = maybe_get<SPIRVariable>(id)) |
9768 | return var->phi_variable; |
9769 | |
9770 | // If id is an access chain, we should not dereference it. |
9771 | if (auto *expr = maybe_get<SPIRExpression>(id)) |
9772 | return !expr->access_chain; |
9773 | |
9774 | // Otherwise, we should dereference this pointer expression. |
9775 | return true; |
9776 | } |
9777 | |
9778 | bool CompilerGLSL::should_forward(uint32_t id) const |
9779 | { |
9780 | // If id is a variable we will try to forward it regardless of force_temporary check below |
9781 | // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL |
9782 | |
9783 | auto *var = maybe_get<SPIRVariable>(id); |
9784 | if (var) |
9785 | { |
9786 | // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. |
9787 | return !(has_decoration(id, decoration: DecorationBuiltIn) && has_decoration(id, decoration: DecorationVolatile)); |
9788 | } |
9789 | |
9790 | // For debugging emit temporary variables for all expressions |
9791 | if (options.force_temporary) |
9792 | return false; |
9793 | |
9794 | // If an expression carries enough dependencies we need to stop forwarding at some point, |
9795 | // or we explode compilers. There are usually limits to how much we can nest expressions. |
9796 | auto *expr = maybe_get<SPIRExpression>(id); |
9797 | const uint32_t max_expression_dependencies = 64; |
9798 | if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) |
9799 | return false; |
9800 | |
9801 | if (expr && expr->loaded_from |
9802 | && has_decoration(id: expr->loaded_from, decoration: DecorationBuiltIn) |
9803 | && has_decoration(id: expr->loaded_from, decoration: DecorationVolatile)) |
9804 | { |
9805 | // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. |
9806 | return false; |
9807 | } |
9808 | |
9809 | // Immutable expression can always be forwarded. |
9810 | if (is_immutable(id)) |
9811 | return true; |
9812 | |
9813 | return false; |
9814 | } |
9815 | |
9816 | bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const |
9817 | { |
9818 | // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. |
9819 | return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); |
9820 | } |
9821 | |
9822 | void CompilerGLSL::track_expression_read(uint32_t id) |
9823 | { |
9824 | switch (ir.ids[id].get_type()) |
9825 | { |
9826 | case TypeExpression: |
9827 | { |
9828 | auto &e = get<SPIRExpression>(id); |
9829 | for (auto implied_read : e.implied_read_expressions) |
9830 | track_expression_read(id: implied_read); |
9831 | break; |
9832 | } |
9833 | |
9834 | case TypeAccessChain: |
9835 | { |
9836 | auto &e = get<SPIRAccessChain>(id); |
9837 | for (auto implied_read : e.implied_read_expressions) |
9838 | track_expression_read(id: implied_read); |
9839 | break; |
9840 | } |
9841 | |
9842 | default: |
9843 | break; |
9844 | } |
9845 | |
9846 | // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. |
9847 | // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. |
9848 | if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) |
9849 | { |
9850 | auto &v = expression_usage_counts[id]; |
9851 | v++; |
9852 | |
9853 | // If we create an expression outside a loop, |
9854 | // but access it inside a loop, we're implicitly reading it multiple times. |
9855 | // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion |
9856 | // working inside the backend compiler. |
9857 | if (expression_read_implies_multiple_reads(id)) |
9858 | v++; |
9859 | |
9860 | if (v >= 2) |
9861 | { |
9862 | //if (v == 2) |
9863 | // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); |
9864 | |
9865 | // Force a recompile after this pass to avoid forwarding this variable. |
9866 | force_temporary_and_recompile(id); |
9867 | } |
9868 | } |
9869 | } |
9870 | |
9871 | bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) |
9872 | { |
9873 | if (forced_temporaries.find(x: id) != end(cont&: forced_temporaries)) |
9874 | return false; |
9875 | |
9876 | for (uint32_t i = 0; i < num_args; i++) |
9877 | if (!should_forward(id: args[i])) |
9878 | return false; |
9879 | |
9880 | // We need to forward globals as well. |
9881 | if (!pure) |
9882 | { |
9883 | for (auto global : global_variables) |
9884 | if (!should_forward(id: global)) |
9885 | return false; |
9886 | for (auto aliased : aliased_variables) |
9887 | if (!should_forward(id: aliased)) |
9888 | return false; |
9889 | } |
9890 | |
9891 | return true; |
9892 | } |
9893 | |
9894 | void CompilerGLSL::register_impure_function_call() |
9895 | { |
9896 | // Impure functions can modify globals and aliased variables, so invalidate them as well. |
9897 | for (auto global : global_variables) |
9898 | flush_dependees(var&: get<SPIRVariable>(id: global)); |
9899 | for (auto aliased : aliased_variables) |
9900 | flush_dependees(var&: get<SPIRVariable>(id: aliased)); |
9901 | } |
9902 | |
9903 | void CompilerGLSL::register_call_out_argument(uint32_t id) |
9904 | { |
9905 | register_write(chain: id); |
9906 | |
9907 | auto *var = maybe_get<SPIRVariable>(id); |
9908 | if (var) |
9909 | flush_variable_declaration(id: var->self); |
9910 | } |
9911 | |
9912 | string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) |
9913 | { |
9914 | // These variables are always function local, |
9915 | // so make sure we emit the variable without storage qualifiers. |
9916 | // Some backends will inject custom variables locally in a function |
9917 | // with a storage qualifier which is not function-local. |
9918 | auto old_storage = var.storage; |
9919 | var.storage = StorageClassFunction; |
9920 | auto expr = variable_decl(variable: var); |
9921 | var.storage = old_storage; |
9922 | return expr; |
9923 | } |
9924 | |
9925 | void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) |
9926 | { |
9927 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
9928 | if (var.allocate_temporary_copy && !flushed_phi_variables.count(x: var.self)) |
9929 | { |
9930 | auto &type = get<SPIRType>(id: var.basetype); |
9931 | auto &flags = get_decoration_bitset(id: var.self); |
9932 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: join(ts: "_" , ts: var.self, ts: "_copy" )), ts: ";" ); |
9933 | flushed_phi_variables.insert(x: var.self); |
9934 | } |
9935 | } |
9936 | |
9937 | void CompilerGLSL::flush_variable_declaration(uint32_t id) |
9938 | { |
9939 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
9940 | auto *var = maybe_get<SPIRVariable>(id); |
9941 | if (var && var->deferred_declaration) |
9942 | { |
9943 | string initializer; |
9944 | if (options.force_zero_initialized_variables && |
9945 | (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || |
9946 | var->storage == StorageClassPrivate) && |
9947 | !var->initializer && type_can_zero_initialize(type: get_variable_data_type(var: *var))) |
9948 | { |
9949 | initializer = join(ts: " = " , ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: *var))); |
9950 | } |
9951 | |
9952 | statement(ts: variable_decl_function_local(var&: *var), ts&: initializer, ts: ";" ); |
9953 | var->deferred_declaration = false; |
9954 | } |
9955 | if (var) |
9956 | { |
9957 | emit_variable_temporary_copies(var: *var); |
9958 | } |
9959 | } |
9960 | |
9961 | bool CompilerGLSL::remove_duplicate_swizzle(string &op) |
9962 | { |
9963 | auto pos = op.find_last_of(c: '.'); |
9964 | if (pos == string::npos || pos == 0) |
9965 | return false; |
9966 | |
9967 | string final_swiz = op.substr(pos: pos + 1, n: string::npos); |
9968 | |
9969 | if (backend.swizzle_is_function) |
9970 | { |
9971 | if (final_swiz.size() < 2) |
9972 | return false; |
9973 | |
9974 | if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()" ) |
9975 | final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos); |
9976 | else |
9977 | return false; |
9978 | } |
9979 | |
9980 | // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. |
9981 | // If so, and previous swizzle is of same length, |
9982 | // we can drop the final swizzle altogether. |
9983 | for (uint32_t i = 0; i < final_swiz.size(); i++) |
9984 | { |
9985 | static const char expected[] = { 'x', 'y', 'z', 'w' }; |
9986 | if (i >= 4 || final_swiz[i] != expected[i]) |
9987 | return false; |
9988 | } |
9989 | |
9990 | auto prevpos = op.find_last_of(c: '.', pos: pos - 1); |
9991 | if (prevpos == string::npos) |
9992 | return false; |
9993 | |
9994 | prevpos++; |
9995 | |
9996 | // Make sure there are only swizzles here ... |
9997 | for (auto i = prevpos; i < pos; i++) |
9998 | { |
9999 | if (op[i] < 'w' || op[i] > 'z') |
10000 | { |
10001 | // If swizzles are foo.xyz() like in C++ backend for example, check for that. |
10002 | if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') |
10003 | break; |
10004 | return false; |
10005 | } |
10006 | } |
10007 | |
10008 | // If original swizzle is large enough, just carve out the components we need. |
10009 | // E.g. foobar.wyx.xy will turn into foobar.wy. |
10010 | if (pos - prevpos >= final_swiz.size()) |
10011 | { |
10012 | op.erase(pos: prevpos + final_swiz.size(), n: string::npos); |
10013 | |
10014 | // Add back the function call ... |
10015 | if (backend.swizzle_is_function) |
10016 | op += "()" ; |
10017 | } |
10018 | return true; |
10019 | } |
10020 | |
10021 | // Optimizes away vector swizzles where we have something like |
10022 | // vec3 foo; |
10023 | // foo.xyz <-- swizzle expression does nothing. |
10024 | // This is a very common pattern after OpCompositeCombine. |
10025 | bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) |
10026 | { |
10027 | auto pos = op.find_last_of(c: '.'); |
10028 | if (pos == string::npos || pos == 0) |
10029 | return false; |
10030 | |
10031 | string final_swiz = op.substr(pos: pos + 1, n: string::npos); |
10032 | |
10033 | if (backend.swizzle_is_function) |
10034 | { |
10035 | if (final_swiz.size() < 2) |
10036 | return false; |
10037 | |
10038 | if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()" ) |
10039 | final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos); |
10040 | else |
10041 | return false; |
10042 | } |
10043 | |
10044 | // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. |
10045 | // If so, and previous swizzle is of same length, |
10046 | // we can drop the final swizzle altogether. |
10047 | for (uint32_t i = 0; i < final_swiz.size(); i++) |
10048 | { |
10049 | static const char expected[] = { 'x', 'y', 'z', 'w' }; |
10050 | if (i >= 4 || final_swiz[i] != expected[i]) |
10051 | return false; |
10052 | } |
10053 | |
10054 | auto &type = expression_type(id: base); |
10055 | |
10056 | // Sanity checking ... |
10057 | assert(type.columns == 1 && type.array.empty()); |
10058 | |
10059 | if (type.vecsize == final_swiz.size()) |
10060 | op.erase(pos: pos, n: string::npos); |
10061 | return true; |
10062 | } |
10063 | |
10064 | string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) |
10065 | { |
10066 | ID base = 0; |
10067 | string op; |
10068 | string subop; |
10069 | |
10070 | // Can only merge swizzles for vectors. |
10071 | auto &type = get<SPIRType>(id: return_type); |
10072 | bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; |
10073 | bool swizzle_optimization = false; |
10074 | |
10075 | for (uint32_t i = 0; i < length; i++) |
10076 | { |
10077 | auto *e = maybe_get<SPIRExpression>(id: elems[i]); |
10078 | |
10079 | // If we're merging another scalar which belongs to the same base |
10080 | // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! |
10081 | if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) |
10082 | { |
10083 | // Only supposed to be used for vector swizzle -> scalar. |
10084 | assert(!e->expression.empty() && e->expression.front() == '.'); |
10085 | subop += e->expression.substr(pos: 1, n: string::npos); |
10086 | swizzle_optimization = true; |
10087 | } |
10088 | else |
10089 | { |
10090 | // We'll likely end up with duplicated swizzles, e.g. |
10091 | // foobar.xyz.xyz from patterns like |
10092 | // OpVectorShuffle |
10093 | // OpCompositeExtract x 3 |
10094 | // OpCompositeConstruct 3x + other scalar. |
10095 | // Just modify op in-place. |
10096 | if (swizzle_optimization) |
10097 | { |
10098 | if (backend.swizzle_is_function) |
10099 | subop += "()" ; |
10100 | |
10101 | // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. |
10102 | // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. |
10103 | // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. |
10104 | // Essentially, we can only remove one set of swizzles, since that's what we have control over ... |
10105 | // Case 1: |
10106 | // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. |
10107 | // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. |
10108 | // Case 2: |
10109 | // foo.xyz: Duplicate swizzle won't kick in. |
10110 | // If foo is vec3, we can remove xyz, giving just foo. |
10111 | if (!remove_duplicate_swizzle(op&: subop)) |
10112 | remove_unity_swizzle(base, op&: subop); |
10113 | |
10114 | // Strips away redundant parens if we created them during component extraction. |
10115 | strip_enclosed_expression(expr&: subop); |
10116 | swizzle_optimization = false; |
10117 | op += subop; |
10118 | } |
10119 | else |
10120 | op += subop; |
10121 | |
10122 | if (i) |
10123 | op += ", " ; |
10124 | |
10125 | bool uses_buffer_offset = |
10126 | type.basetype == SPIRType::Struct && has_member_decoration(id: type.self, index: i, decoration: DecorationOffset); |
10127 | subop = to_composite_constructor_expression(id: elems[i], block_like_type: uses_buffer_offset); |
10128 | } |
10129 | |
10130 | base = e ? e->base_expression : ID(0); |
10131 | } |
10132 | |
10133 | if (swizzle_optimization) |
10134 | { |
10135 | if (backend.swizzle_is_function) |
10136 | subop += "()" ; |
10137 | |
10138 | if (!remove_duplicate_swizzle(op&: subop)) |
10139 | remove_unity_swizzle(base, op&: subop); |
10140 | // Strips away redundant parens if we created them during component extraction. |
10141 | strip_enclosed_expression(expr&: subop); |
10142 | } |
10143 | |
10144 | op += subop; |
10145 | return op; |
10146 | } |
10147 | |
10148 | bool CompilerGLSL::skip_argument(uint32_t id) const |
10149 | { |
10150 | if (!combined_image_samplers.empty() || !options.vulkan_semantics) |
10151 | { |
10152 | auto &type = expression_type(id); |
10153 | if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) |
10154 | return true; |
10155 | } |
10156 | return false; |
10157 | } |
10158 | |
10159 | bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) |
10160 | { |
10161 | // Do this with strings because we have a very clear pattern we can check for and it avoids |
10162 | // adding lots of special cases to the code emission. |
10163 | if (rhs.size() < lhs.size() + 3) |
10164 | return false; |
10165 | |
10166 | // Do not optimize matrices. They are a bit awkward to reason about in general |
10167 | // (in which order does operation happen?), and it does not work on MSL anyways. |
10168 | if (type.vecsize > 1 && type.columns > 1) |
10169 | return false; |
10170 | |
10171 | auto index = rhs.find(str: lhs); |
10172 | if (index != 0) |
10173 | return false; |
10174 | |
10175 | // TODO: Shift operators, but it's not important for now. |
10176 | auto op = rhs.find_first_of(s: "+-/*%|&^" , pos: lhs.size() + 1); |
10177 | if (op != lhs.size() + 1) |
10178 | return false; |
10179 | |
10180 | // Check that the op is followed by space. This excludes && and ||. |
10181 | if (rhs[op + 1] != ' ') |
10182 | return false; |
10183 | |
10184 | char bop = rhs[op]; |
10185 | auto expr = rhs.substr(pos: lhs.size() + 3); |
10186 | // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. |
10187 | // Find some common patterns which are equivalent. |
10188 | if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)" )) |
10189 | statement(ts: lhs, ts&: bop, ts&: bop, ts: ";" ); |
10190 | else |
10191 | statement(ts: lhs, ts: " " , ts&: bop, ts: "= " , ts&: expr, ts: ";" ); |
10192 | return true; |
10193 | } |
10194 | |
10195 | void CompilerGLSL::register_control_dependent_expression(uint32_t expr) |
10196 | { |
10197 | if (forwarded_temporaries.find(x: expr) == end(cont&: forwarded_temporaries)) |
10198 | return; |
10199 | |
10200 | assert(current_emitting_block); |
10201 | current_emitting_block->invalidate_expressions.push_back(t: expr); |
10202 | } |
10203 | |
10204 | void CompilerGLSL::emit_block_instructions(SPIRBlock &block) |
10205 | { |
10206 | current_emitting_block = █ |
10207 | |
10208 | if (backend.requires_relaxed_precision_analysis) |
10209 | { |
10210 | // If PHI variables are consumed in unexpected precision contexts, copy them here. |
10211 | for (auto &phi : block.phi_variables) |
10212 | { |
10213 | auto itr = temporary_to_mirror_precision_alias.find(x: phi.function_variable); |
10214 | if (itr != temporary_to_mirror_precision_alias.end()) |
10215 | { |
10216 | // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, |
10217 | // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). |
10218 | EmbeddedInstruction inst; |
10219 | inst.op = OpCopyObject; |
10220 | inst.length = 3; |
10221 | inst.ops.push_back(t: expression_type_id(id: itr->first)); |
10222 | inst.ops.push_back(t: itr->second); |
10223 | inst.ops.push_back(t: itr->first); |
10224 | emit_instruction(instr: inst); |
10225 | } |
10226 | } |
10227 | } |
10228 | |
10229 | for (auto &op : block.ops) |
10230 | { |
10231 | auto temporary_copy = handle_instruction_precision(instr: op); |
10232 | emit_instruction(instr: op); |
10233 | if (temporary_copy.dst_id) |
10234 | { |
10235 | // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, |
10236 | // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). |
10237 | EmbeddedInstruction inst; |
10238 | inst.op = OpCopyObject; |
10239 | inst.length = 3; |
10240 | inst.ops.push_back(t: expression_type_id(id: temporary_copy.src_id)); |
10241 | inst.ops.push_back(t: temporary_copy.dst_id); |
10242 | inst.ops.push_back(t: temporary_copy.src_id); |
10243 | |
10244 | // Never attempt to hoist mirrored temporaries. |
10245 | // They are hoisted in lock-step with their parents. |
10246 | block_temporary_hoisting = true; |
10247 | emit_instruction(instr: inst); |
10248 | block_temporary_hoisting = false; |
10249 | } |
10250 | } |
10251 | |
10252 | current_emitting_block = nullptr; |
10253 | } |
10254 | |
10255 | void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) |
10256 | { |
10257 | // Allow trivially forwarded expressions like OpLoad or trivial shuffles, |
10258 | // these will be marked as having suppressed usage tracking. |
10259 | // Our only concern is to make sure arithmetic operations are done in similar ways. |
10260 | if (expression_is_forwarded(id: expr.self) && !expression_suppresses_usage_tracking(id: expr.self) && |
10261 | forced_invariant_temporaries.count(x: expr.self) == 0) |
10262 | { |
10263 | force_temporary_and_recompile(id: expr.self); |
10264 | forced_invariant_temporaries.insert(x: expr.self); |
10265 | |
10266 | for (auto &dependent : expr.expression_dependencies) |
10267 | disallow_forwarding_in_expression_chain(expr: get<SPIRExpression>(id: dependent)); |
10268 | } |
10269 | } |
10270 | |
10271 | void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) |
10272 | { |
10273 | // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to |
10274 | // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary |
10275 | // in one translation unit, but not another, e.g. due to multiple use of an expression. |
10276 | // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent |
10277 | // expressions to be temporaries. |
10278 | // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough |
10279 | // for all reasonable uses of invariant. |
10280 | if (!has_decoration(id: store_id, decoration: DecorationInvariant)) |
10281 | return; |
10282 | |
10283 | auto *expr = maybe_get<SPIRExpression>(id: value_id); |
10284 | if (!expr) |
10285 | return; |
10286 | |
10287 | disallow_forwarding_in_expression_chain(expr: *expr); |
10288 | } |
10289 | |
10290 | void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) |
10291 | { |
10292 | auto rhs = to_pointer_expression(id: rhs_expression); |
10293 | |
10294 | // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. |
10295 | if (!rhs.empty()) |
10296 | { |
10297 | handle_store_to_invariant_variable(store_id: lhs_expression, value_id: rhs_expression); |
10298 | |
10299 | if (!unroll_array_to_complex_store(target_id: lhs_expression, source_id: rhs_expression)) |
10300 | { |
10301 | auto lhs = to_dereferenced_expression(id: lhs_expression); |
10302 | if (has_decoration(id: lhs_expression, decoration: DecorationNonUniform)) |
10303 | convert_non_uniform_expression(expr&: lhs, ptr_id: lhs_expression); |
10304 | |
10305 | // We might need to cast in order to store to a builtin. |
10306 | cast_to_variable_store(target_id: lhs_expression, expr&: rhs, expr_type: expression_type(id: rhs_expression)); |
10307 | |
10308 | // Tries to optimize assignments like "<lhs> = <lhs> op expr". |
10309 | // While this is purely cosmetic, this is important for legacy ESSL where loop |
10310 | // variable increments must be in either i++ or i += const-expr. |
10311 | // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. |
10312 | if (!optimize_read_modify_write(type: expression_type(id: rhs_expression), lhs, rhs)) |
10313 | statement(ts&: lhs, ts: " = " , ts&: rhs, ts: ";" ); |
10314 | } |
10315 | register_write(chain: lhs_expression); |
10316 | } |
10317 | } |
10318 | |
10319 | uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const |
10320 | { |
10321 | if (instr.length < 3) |
10322 | return 32; |
10323 | |
10324 | auto *ops = stream(instr); |
10325 | |
10326 | switch (instr.op) |
10327 | { |
10328 | case OpSConvert: |
10329 | case OpConvertSToF: |
10330 | case OpUConvert: |
10331 | case OpConvertUToF: |
10332 | case OpIEqual: |
10333 | case OpINotEqual: |
10334 | case OpSLessThan: |
10335 | case OpSLessThanEqual: |
10336 | case OpSGreaterThan: |
10337 | case OpSGreaterThanEqual: |
10338 | case OpULessThan: |
10339 | case OpULessThanEqual: |
10340 | case OpUGreaterThan: |
10341 | case OpUGreaterThanEqual: |
10342 | return expression_type(id: ops[2]).width; |
10343 | |
10344 | default: |
10345 | { |
10346 | // We can look at result type which is more robust. |
10347 | auto *type = maybe_get<SPIRType>(id: ops[0]); |
10348 | if (type && type_is_integral(type: *type)) |
10349 | return type->width; |
10350 | else |
10351 | return 32; |
10352 | } |
10353 | } |
10354 | } |
10355 | |
10356 | uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const |
10357 | { |
10358 | if (length < 1) |
10359 | return 32; |
10360 | |
10361 | switch (op) |
10362 | { |
10363 | case GLSLstd450SAbs: |
10364 | case GLSLstd450SSign: |
10365 | case GLSLstd450UMin: |
10366 | case GLSLstd450SMin: |
10367 | case GLSLstd450UMax: |
10368 | case GLSLstd450SMax: |
10369 | case GLSLstd450UClamp: |
10370 | case GLSLstd450SClamp: |
10371 | case GLSLstd450FindSMsb: |
10372 | case GLSLstd450FindUMsb: |
10373 | return expression_type(id: ops[0]).width; |
10374 | |
10375 | default: |
10376 | { |
10377 | // We don't need to care about other opcodes, just return 32. |
10378 | return 32; |
10379 | } |
10380 | } |
10381 | } |
10382 | |
10383 | void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) |
10384 | { |
10385 | // Only GLSL supports RelaxedPrecision directly. |
10386 | // We cannot implement this in HLSL or MSL because it is tied to the type system. |
10387 | // In SPIR-V, everything must masquerade as 32-bit. |
10388 | if (!backend.requires_relaxed_precision_analysis) |
10389 | return; |
10390 | |
10391 | auto input_precision = analyze_expression_precision(args, length); |
10392 | |
10393 | // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. |
10394 | // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. |
10395 | if (input_precision == Options::Mediump) |
10396 | set_decoration(id: dst_id, decoration: DecorationRelaxedPrecision); |
10397 | } |
10398 | |
10399 | CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const |
10400 | { |
10401 | // Now, analyze the precision at which the arguments would run. |
10402 | // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision |
10403 | // for the inputs. Constants do not have inherent precision and do not contribute to this decision. |
10404 | // If all inputs are constants, they inherit precision from outer expressions, including an l-value. |
10405 | // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with |
10406 | // correct precision. |
10407 | bool expression_has_highp = false; |
10408 | bool expression_has_mediump = false; |
10409 | |
10410 | for (uint32_t i = 0; i < length; i++) |
10411 | { |
10412 | uint32_t arg = args[i]; |
10413 | |
10414 | auto handle_type = ir.ids[arg].get_type(); |
10415 | if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) |
10416 | continue; |
10417 | |
10418 | if (has_decoration(id: arg, decoration: DecorationRelaxedPrecision)) |
10419 | expression_has_mediump = true; |
10420 | else |
10421 | expression_has_highp = true; |
10422 | } |
10423 | |
10424 | if (expression_has_highp) |
10425 | return Options::Highp; |
10426 | else if (expression_has_mediump) |
10427 | return Options::Mediump; |
10428 | else |
10429 | return Options::DontCare; |
10430 | } |
10431 | |
10432 | void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) |
10433 | { |
10434 | if (!backend.requires_relaxed_precision_analysis) |
10435 | return; |
10436 | |
10437 | auto &type = get<SPIRType>(id: type_id); |
10438 | |
10439 | // RelaxedPrecision only applies to 32-bit values. |
10440 | if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) |
10441 | return; |
10442 | |
10443 | bool operation_is_highp = !has_decoration(id: dst_id, decoration: DecorationRelaxedPrecision); |
10444 | |
10445 | auto input_precision = analyze_expression_precision(args, length); |
10446 | if (input_precision == Options::DontCare) |
10447 | { |
10448 | consume_temporary_in_precision_context(type_id, id: dst_id, precision: input_precision); |
10449 | return; |
10450 | } |
10451 | |
10452 | // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. |
10453 | // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. |
10454 | // However, if the expression is not, inputs must be expanded to 32-bit first, |
10455 | // since the operation must run at high precision. |
10456 | // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, |
10457 | // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations |
10458 | // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. |
10459 | if ((operation_is_highp && input_precision == Options::Mediump) || |
10460 | (!operation_is_highp && input_precision == Options::Highp)) |
10461 | { |
10462 | auto precision = operation_is_highp ? Options::Highp : Options::Mediump; |
10463 | for (uint32_t i = 0; i < length; i++) |
10464 | { |
10465 | // Rewrites the opcode so that we consume an ID in correct precision context. |
10466 | // This is pretty hacky, but it's the most straight forward way of implementing this without adding |
10467 | // lots of extra passes to rewrite all code blocks. |
10468 | args[i] = consume_temporary_in_precision_context(type_id: expression_type_id(id: args[i]), id: args[i], precision); |
10469 | } |
10470 | } |
10471 | } |
10472 | |
10473 | // This is probably not exhaustive ... |
10474 | static bool opcode_is_precision_sensitive_operation(Op op) |
10475 | { |
10476 | switch (op) |
10477 | { |
10478 | case OpFAdd: |
10479 | case OpFSub: |
10480 | case OpFMul: |
10481 | case OpFNegate: |
10482 | case OpIAdd: |
10483 | case OpISub: |
10484 | case OpIMul: |
10485 | case OpSNegate: |
10486 | case OpFMod: |
10487 | case OpFDiv: |
10488 | case OpFRem: |
10489 | case OpSMod: |
10490 | case OpSDiv: |
10491 | case OpSRem: |
10492 | case OpUMod: |
10493 | case OpUDiv: |
10494 | case OpVectorTimesMatrix: |
10495 | case OpMatrixTimesVector: |
10496 | case OpMatrixTimesMatrix: |
10497 | case OpDPdx: |
10498 | case OpDPdy: |
10499 | case OpDPdxCoarse: |
10500 | case OpDPdyCoarse: |
10501 | case OpDPdxFine: |
10502 | case OpDPdyFine: |
10503 | case OpFwidth: |
10504 | case OpFwidthCoarse: |
10505 | case OpFwidthFine: |
10506 | case OpVectorTimesScalar: |
10507 | case OpMatrixTimesScalar: |
10508 | case OpOuterProduct: |
10509 | case OpFConvert: |
10510 | case OpSConvert: |
10511 | case OpUConvert: |
10512 | case OpConvertSToF: |
10513 | case OpConvertUToF: |
10514 | case OpConvertFToU: |
10515 | case OpConvertFToS: |
10516 | return true; |
10517 | |
10518 | default: |
10519 | return false; |
10520 | } |
10521 | } |
10522 | |
10523 | // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. |
10524 | // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only |
10525 | // relevant when operating on the IDs, not when shuffling things around. |
10526 | static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) |
10527 | { |
10528 | switch (op) |
10529 | { |
10530 | case OpLoad: |
10531 | case OpAccessChain: |
10532 | case OpInBoundsAccessChain: |
10533 | case OpCompositeExtract: |
10534 | case OpVectorExtractDynamic: |
10535 | case OpSampledImage: |
10536 | case OpImage: |
10537 | case OpCopyObject: |
10538 | |
10539 | case OpImageRead: |
10540 | case OpImageFetch: |
10541 | case OpImageSampleImplicitLod: |
10542 | case OpImageSampleProjImplicitLod: |
10543 | case OpImageSampleDrefImplicitLod: |
10544 | case OpImageSampleProjDrefImplicitLod: |
10545 | case OpImageSampleExplicitLod: |
10546 | case OpImageSampleProjExplicitLod: |
10547 | case OpImageSampleDrefExplicitLod: |
10548 | case OpImageSampleProjDrefExplicitLod: |
10549 | case OpImageGather: |
10550 | case OpImageDrefGather: |
10551 | case OpImageSparseRead: |
10552 | case OpImageSparseFetch: |
10553 | case OpImageSparseSampleImplicitLod: |
10554 | case OpImageSparseSampleProjImplicitLod: |
10555 | case OpImageSparseSampleDrefImplicitLod: |
10556 | case OpImageSparseSampleProjDrefImplicitLod: |
10557 | case OpImageSparseSampleExplicitLod: |
10558 | case OpImageSparseSampleProjExplicitLod: |
10559 | case OpImageSparseSampleDrefExplicitLod: |
10560 | case OpImageSparseSampleProjDrefExplicitLod: |
10561 | case OpImageSparseGather: |
10562 | case OpImageSparseDrefGather: |
10563 | arg_count = 1; |
10564 | return true; |
10565 | |
10566 | case OpVectorShuffle: |
10567 | arg_count = 2; |
10568 | return true; |
10569 | |
10570 | case OpCompositeConstruct: |
10571 | return true; |
10572 | |
10573 | default: |
10574 | break; |
10575 | } |
10576 | |
10577 | return false; |
10578 | } |
10579 | |
10580 | CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) |
10581 | { |
10582 | auto ops = stream_mutable(instr: instruction); |
10583 | auto opcode = static_cast<Op>(instruction.op); |
10584 | uint32_t length = instruction.length; |
10585 | |
10586 | if (backend.requires_relaxed_precision_analysis) |
10587 | { |
10588 | if (length > 2) |
10589 | { |
10590 | uint32_t forwarding_length = length - 2; |
10591 | |
10592 | if (opcode_is_precision_sensitive_operation(op: opcode)) |
10593 | analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[2], length: forwarding_length); |
10594 | else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(id: ops[2]).ext == SPIRExtension::GLSL) |
10595 | analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[4], length: forwarding_length - 2); |
10596 | else if (opcode_is_precision_forwarding_instruction(op: opcode, arg_count&: forwarding_length)) |
10597 | forward_relaxed_precision(dst_id: ops[1], args: &ops[2], length: forwarding_length); |
10598 | } |
10599 | |
10600 | uint32_t result_type = 0, result_id = 0; |
10601 | if (instruction_to_result_type(result_type, result_id, op: opcode, args: ops, length)) |
10602 | { |
10603 | auto itr = temporary_to_mirror_precision_alias.find(x: ops[1]); |
10604 | if (itr != temporary_to_mirror_precision_alias.end()) |
10605 | return { .dst_id: itr->second, .src_id: itr->first }; |
10606 | } |
10607 | } |
10608 | |
10609 | return {}; |
10610 | } |
10611 | |
10612 | void CompilerGLSL::emit_instruction(const Instruction &instruction) |
10613 | { |
10614 | auto ops = stream(instr: instruction); |
10615 | auto opcode = static_cast<Op>(instruction.op); |
10616 | uint32_t length = instruction.length; |
10617 | |
10618 | #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) |
10619 | #define GLSL_BOP_CAST(op, type) \ |
10620 | emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) |
10621 | #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) |
10622 | #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) |
10623 | #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) |
10624 | #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) |
10625 | #define GLSL_BFOP_CAST(op, type) \ |
10626 | emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) |
10627 | #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) |
10628 | #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) |
10629 | |
10630 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
10631 | uint32_t integer_width = get_integer_width_for_instruction(instr: instruction); |
10632 | auto int_type = to_signed_basetype(width: integer_width); |
10633 | auto uint_type = to_unsigned_basetype(width: integer_width); |
10634 | |
10635 | opcode = get_remapped_spirv_op(op: opcode); |
10636 | |
10637 | switch (opcode) |
10638 | { |
10639 | // Dealing with memory |
10640 | case OpLoad: |
10641 | { |
10642 | uint32_t result_type = ops[0]; |
10643 | uint32_t id = ops[1]; |
10644 | uint32_t ptr = ops[2]; |
10645 | |
10646 | flush_variable_declaration(id: ptr); |
10647 | |
10648 | // If we're loading from memory that cannot be changed by the shader, |
10649 | // just forward the expression directly to avoid needless temporaries. |
10650 | // If an expression is mutable and forwardable, we speculate that it is immutable. |
10651 | bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
10652 | |
10653 | // If loading a non-native row-major matrix, mark the expression as need_transpose. |
10654 | bool need_transpose = false; |
10655 | bool old_need_transpose = false; |
10656 | |
10657 | auto *ptr_expression = maybe_get<SPIRExpression>(id: ptr); |
10658 | |
10659 | if (forward) |
10660 | { |
10661 | // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while |
10662 | // taking the expression. |
10663 | if (ptr_expression && ptr_expression->need_transpose) |
10664 | { |
10665 | old_need_transpose = true; |
10666 | ptr_expression->need_transpose = false; |
10667 | need_transpose = true; |
10668 | } |
10669 | else if (is_non_native_row_major_matrix(id: ptr)) |
10670 | need_transpose = true; |
10671 | } |
10672 | |
10673 | // If we are forwarding this load, |
10674 | // don't register the read to access chain here, defer that to when we actually use the expression, |
10675 | // using the add_implied_read_expression mechanism. |
10676 | string expr; |
10677 | |
10678 | bool is_packed = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
10679 | bool is_remapped = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID); |
10680 | if (forward || (!is_packed && !is_remapped)) |
10681 | { |
10682 | // For the simple case, we do not need to deal with repacking. |
10683 | expr = to_dereferenced_expression(id: ptr, register_expression_read: false); |
10684 | } |
10685 | else |
10686 | { |
10687 | // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before |
10688 | // storing the expression to a temporary. |
10689 | expr = to_unpacked_expression(id: ptr); |
10690 | } |
10691 | |
10692 | auto &type = get<SPIRType>(id: result_type); |
10693 | auto &expr_type = expression_type(id: ptr); |
10694 | |
10695 | // If the expression has more vector components than the result type, insert |
10696 | // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might |
10697 | // happen with e.g. the MSL backend replacing the type of an input variable. |
10698 | if (expr_type.vecsize > type.vecsize) |
10699 | expr = enclose_expression(expr: expr + vector_swizzle(vecsize: type.vecsize, index: 0)); |
10700 | |
10701 | // We might need to cast in order to load from a builtin. |
10702 | cast_from_variable_load(source_id: ptr, expr, expr_type: type); |
10703 | |
10704 | // We might be trying to load a gl_Position[N], where we should be |
10705 | // doing float4[](gl_in[i].gl_Position, ...) instead. |
10706 | // Similar workarounds are required for input arrays in tessellation. |
10707 | // Also, loading from gl_SampleMask array needs special unroll. |
10708 | unroll_array_from_complex_load(target_id: id, source_id: ptr, expr); |
10709 | |
10710 | if (!type_is_opaque_value(type) && has_decoration(id: ptr, decoration: DecorationNonUniform)) |
10711 | { |
10712 | // If we're loading something non-opaque, we need to handle non-uniform descriptor access. |
10713 | convert_non_uniform_expression(expr, ptr_id: ptr); |
10714 | } |
10715 | |
10716 | if (forward && ptr_expression) |
10717 | ptr_expression->need_transpose = old_need_transpose; |
10718 | |
10719 | bool flattened = ptr_expression && flattened_buffer_blocks.count(x: ptr_expression->loaded_from) != 0; |
10720 | |
10721 | if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(id: ptr) && !flattened) |
10722 | rewrite_load_for_wrapped_row_major(expr, loaded_type: result_type, ptr); |
10723 | |
10724 | // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. |
10725 | // However, if we try to load a complex, composite object from a flattened buffer, |
10726 | // we should avoid emitting the same code over and over and lower the result to a temporary. |
10727 | bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); |
10728 | |
10729 | SPIRExpression *e = nullptr; |
10730 | if (!forward && expression_is_non_value_type_array(ptr)) |
10731 | { |
10732 | // Complicated load case where we need to make a copy of ptr, but we cannot, because |
10733 | // it is an array, and our backend does not support arrays as value types. |
10734 | // Emit the temporary, and copy it explicitly. |
10735 | e = &emit_uninitialized_temporary_expression(type: result_type, id); |
10736 | emit_array_copy(lhs: to_expression(id), lhs_id: id, rhs_id: ptr, lhs_storage: StorageClassFunction, rhs_storage: get_expression_effective_storage_class(ptr)); |
10737 | } |
10738 | else |
10739 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: forward, suppress_usage_tracking: !usage_tracking); |
10740 | |
10741 | e->need_transpose = need_transpose; |
10742 | register_read(expr: id, chain: ptr, forwarded: forward); |
10743 | |
10744 | if (forward) |
10745 | { |
10746 | // Pass through whether the result is of a packed type and the physical type ID. |
10747 | if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
10748 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
10749 | if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID)) |
10750 | { |
10751 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, |
10752 | value: get_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID)); |
10753 | } |
10754 | } |
10755 | else |
10756 | { |
10757 | // This might have been set on an earlier compilation iteration, force it to be unset. |
10758 | unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
10759 | unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
10760 | } |
10761 | |
10762 | inherit_expression_dependencies(dst: id, source: ptr); |
10763 | if (forward) |
10764 | add_implied_read_expression(e&: *e, source: ptr); |
10765 | break; |
10766 | } |
10767 | |
10768 | case OpInBoundsAccessChain: |
10769 | case OpAccessChain: |
10770 | case OpPtrAccessChain: |
10771 | { |
10772 | auto *var = maybe_get<SPIRVariable>(id: ops[2]); |
10773 | if (var) |
10774 | flush_variable_declaration(id: var->self); |
10775 | |
10776 | // If the base is immutable, the access chain pointer must also be. |
10777 | // If an expression is mutable and forwardable, we speculate that it is immutable. |
10778 | AccessChainMeta meta; |
10779 | bool ptr_chain = opcode == OpPtrAccessChain; |
10780 | auto &target_type = get<SPIRType>(id: ops[0]); |
10781 | auto e = access_chain(base: ops[2], indices: &ops[3], count: length - 3, target_type, meta: &meta, ptr_chain); |
10782 | |
10783 | // If the base is flattened UBO of struct type, the expression has to be a composite. |
10784 | // In that case, backends which do not support inline syntax need it to be bound to a temporary. |
10785 | // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted. |
10786 | bool requires_temporary = false; |
10787 | if (flattened_buffer_blocks.count(x: ops[2]) && target_type.basetype == SPIRType::Struct) |
10788 | requires_temporary = !backend.can_declare_struct_inline; |
10789 | |
10790 | auto &expr = requires_temporary ? |
10791 | emit_op(result_type: ops[0], result_id: ops[1], rhs: std::move(e), forwarding: false) : |
10792 | set<SPIRExpression>(id: ops[1], args: std::move(e), args: ops[0], args: should_forward(id: ops[2])); |
10793 | |
10794 | auto *backing_variable = maybe_get_backing_variable(chain: ops[2]); |
10795 | expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); |
10796 | expr.need_transpose = meta.need_transpose; |
10797 | expr.access_chain = true; |
10798 | |
10799 | // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. |
10800 | if (meta.storage_is_packed) |
10801 | set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypePacked); |
10802 | if (meta.storage_physical_type != 0) |
10803 | set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type); |
10804 | if (meta.storage_is_invariant) |
10805 | set_decoration(id: ops[1], decoration: DecorationInvariant); |
10806 | if (meta.flattened_struct) |
10807 | flattened_structs[ops[1]] = true; |
10808 | if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) |
10809 | set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision); |
10810 | |
10811 | // If we have some expression dependencies in our access chain, this access chain is technically a forwarded |
10812 | // temporary which could be subject to invalidation. |
10813 | // Need to assume we're forwarded while calling inherit_expression_depdendencies. |
10814 | forwarded_temporaries.insert(x: ops[1]); |
10815 | // The access chain itself is never forced to a temporary, but its dependencies might. |
10816 | suppressed_usage_tracking.insert(x: ops[1]); |
10817 | |
10818 | for (uint32_t i = 2; i < length; i++) |
10819 | { |
10820 | inherit_expression_dependencies(dst: ops[1], source: ops[i]); |
10821 | add_implied_read_expression(e&: expr, source: ops[i]); |
10822 | } |
10823 | |
10824 | // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, |
10825 | // we're not forwarded after all. |
10826 | if (expr.expression_dependencies.empty()) |
10827 | forwarded_temporaries.erase(x: ops[1]); |
10828 | |
10829 | break; |
10830 | } |
10831 | |
10832 | case OpStore: |
10833 | { |
10834 | auto *var = maybe_get<SPIRVariable>(id: ops[0]); |
10835 | |
10836 | if (var && var->statically_assigned) |
10837 | var->static_expression = ops[1]; |
10838 | else if (var && var->loop_variable && !var->loop_variable_enable) |
10839 | var->static_expression = ops[1]; |
10840 | else if (var && var->remapped_variable && var->static_expression) |
10841 | { |
10842 | // Skip the write. |
10843 | } |
10844 | else if (flattened_structs.count(x: ops[0])) |
10845 | { |
10846 | store_flattened_struct(lhs_id: ops[0], value: ops[1]); |
10847 | register_write(chain: ops[0]); |
10848 | } |
10849 | else |
10850 | { |
10851 | emit_store_statement(lhs_expression: ops[0], rhs_expression: ops[1]); |
10852 | } |
10853 | |
10854 | // Storing a pointer results in a variable pointer, so we must conservatively assume |
10855 | // we can write through it. |
10856 | if (expression_type(id: ops[1]).pointer) |
10857 | register_write(chain: ops[1]); |
10858 | break; |
10859 | } |
10860 | |
10861 | case OpArrayLength: |
10862 | { |
10863 | uint32_t result_type = ops[0]; |
10864 | uint32_t id = ops[1]; |
10865 | auto e = access_chain_internal(base: ops[2], indices: &ops[3], count: length - 3, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
10866 | if (has_decoration(id: ops[2], decoration: DecorationNonUniform)) |
10867 | convert_non_uniform_expression(expr&: e, ptr_id: ops[2]); |
10868 | set<SPIRExpression>(id, args: join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(" , ts&: e, ts: ".length())" ), args&: result_type, |
10869 | args: true); |
10870 | break; |
10871 | } |
10872 | |
10873 | // Function calls |
10874 | case OpFunctionCall: |
10875 | { |
10876 | uint32_t result_type = ops[0]; |
10877 | uint32_t id = ops[1]; |
10878 | uint32_t func = ops[2]; |
10879 | const auto *arg = &ops[3]; |
10880 | length -= 3; |
10881 | |
10882 | auto &callee = get<SPIRFunction>(id: func); |
10883 | auto &return_type = get<SPIRType>(id: callee.return_type); |
10884 | bool pure = function_is_pure(func: callee); |
10885 | |
10886 | bool callee_has_out_variables = false; |
10887 | bool emit_return_value_as_argument = false; |
10888 | |
10889 | // Invalidate out variables passed to functions since they can be OpStore'd to. |
10890 | for (uint32_t i = 0; i < length; i++) |
10891 | { |
10892 | if (callee.arguments[i].write_count) |
10893 | { |
10894 | register_call_out_argument(id: arg[i]); |
10895 | callee_has_out_variables = true; |
10896 | } |
10897 | |
10898 | flush_variable_declaration(id: arg[i]); |
10899 | } |
10900 | |
10901 | if (!return_type.array.empty() && !backend.can_return_array) |
10902 | { |
10903 | callee_has_out_variables = true; |
10904 | emit_return_value_as_argument = true; |
10905 | } |
10906 | |
10907 | if (!pure) |
10908 | register_impure_function_call(); |
10909 | |
10910 | string funexpr; |
10911 | SmallVector<string> arglist; |
10912 | funexpr += to_name(id: func) + "(" ; |
10913 | |
10914 | if (emit_return_value_as_argument) |
10915 | { |
10916 | statement(ts: type_to_glsl(type: return_type), ts: " " , ts: to_name(id), ts: type_to_array_glsl(type: return_type), ts: ";" ); |
10917 | arglist.push_back(t: to_name(id)); |
10918 | } |
10919 | |
10920 | for (uint32_t i = 0; i < length; i++) |
10921 | { |
10922 | // Do not pass in separate images or samplers if we're remapping |
10923 | // to combined image samplers. |
10924 | if (skip_argument(id: arg[i])) |
10925 | continue; |
10926 | |
10927 | arglist.push_back(t: to_func_call_arg(callee.arguments[i], id: arg[i])); |
10928 | } |
10929 | |
10930 | for (auto &combined : callee.combined_parameters) |
10931 | { |
10932 | auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); |
10933 | auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); |
10934 | arglist.push_back(t: to_combined_image_sampler(image_id, samp_id: sampler_id)); |
10935 | } |
10936 | |
10937 | append_global_func_args(func: callee, index: length, arglist); |
10938 | |
10939 | funexpr += merge(list: arglist); |
10940 | funexpr += ")" ; |
10941 | |
10942 | // Check for function call constraints. |
10943 | check_function_call_constraints(args: arg, length); |
10944 | |
10945 | if (return_type.basetype != SPIRType::Void) |
10946 | { |
10947 | // If the function actually writes to an out variable, |
10948 | // take the conservative route and do not forward. |
10949 | // The problem is that we might not read the function |
10950 | // result (and emit the function) before an out variable |
10951 | // is read (common case when return value is ignored! |
10952 | // In order to avoid start tracking invalid variables, |
10953 | // just avoid the forwarding problem altogether. |
10954 | bool forward = args_will_forward(id, args: arg, num_args: length, pure) && !callee_has_out_variables && pure && |
10955 | (forced_temporaries.find(x: id) == end(cont&: forced_temporaries)); |
10956 | |
10957 | if (emit_return_value_as_argument) |
10958 | { |
10959 | statement(ts&: funexpr, ts: ";" ); |
10960 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
10961 | } |
10962 | else |
10963 | emit_op(result_type, result_id: id, rhs: funexpr, forwarding: forward); |
10964 | |
10965 | // Function calls are implicit loads from all variables in question. |
10966 | // Set dependencies for them. |
10967 | for (uint32_t i = 0; i < length; i++) |
10968 | register_read(expr: id, chain: arg[i], forwarded: forward); |
10969 | |
10970 | // If we're going to forward the temporary result, |
10971 | // put dependencies on every variable that must not change. |
10972 | if (forward) |
10973 | register_global_read_dependencies(func: callee, id); |
10974 | } |
10975 | else |
10976 | statement(ts&: funexpr, ts: ";" ); |
10977 | |
10978 | break; |
10979 | } |
10980 | |
10981 | // Composite munging |
10982 | case OpCompositeConstruct: |
10983 | { |
10984 | uint32_t result_type = ops[0]; |
10985 | uint32_t id = ops[1]; |
10986 | const auto *const elems = &ops[2]; |
10987 | length -= 2; |
10988 | |
10989 | bool forward = true; |
10990 | for (uint32_t i = 0; i < length; i++) |
10991 | forward = forward && should_forward(id: elems[i]); |
10992 | |
10993 | auto &out_type = get<SPIRType>(id: result_type); |
10994 | auto *in_type = length > 0 ? &expression_type(id: elems[0]) : nullptr; |
10995 | |
10996 | // Only splat if we have vector constructors. |
10997 | // Arrays and structs must be initialized properly in full. |
10998 | bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; |
10999 | |
11000 | bool splat = false; |
11001 | bool swizzle_splat = false; |
11002 | |
11003 | if (in_type) |
11004 | { |
11005 | splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; |
11006 | swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; |
11007 | |
11008 | if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(type: *in_type)) |
11009 | { |
11010 | // Cannot swizzle literal integers as a special case. |
11011 | swizzle_splat = false; |
11012 | } |
11013 | } |
11014 | |
11015 | if (splat || swizzle_splat) |
11016 | { |
11017 | uint32_t input = elems[0]; |
11018 | for (uint32_t i = 0; i < length; i++) |
11019 | { |
11020 | if (input != elems[i]) |
11021 | { |
11022 | splat = false; |
11023 | swizzle_splat = false; |
11024 | } |
11025 | } |
11026 | } |
11027 | |
11028 | if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) |
11029 | forward = false; |
11030 | if (!out_type.array.empty() && !backend.can_declare_arrays_inline) |
11031 | forward = false; |
11032 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
11033 | forward = false; |
11034 | |
11035 | string constructor_op; |
11036 | if (backend.use_initializer_list && composite) |
11037 | { |
11038 | bool needs_trailing_tracket = false; |
11039 | // Only use this path if we are building composites. |
11040 | // This path cannot be used for arithmetic. |
11041 | if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) |
11042 | constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)); |
11043 | else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) |
11044 | { |
11045 | // MSL path. Array constructor is baked into type here, do not use _constructor variant. |
11046 | constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(" ; |
11047 | needs_trailing_tracket = true; |
11048 | } |
11049 | constructor_op += "{ " ; |
11050 | |
11051 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
11052 | constructor_op += "0" ; |
11053 | else if (splat) |
11054 | constructor_op += to_unpacked_expression(id: elems[0]); |
11055 | else |
11056 | constructor_op += build_composite_combiner(return_type: result_type, elems, length); |
11057 | constructor_op += " }" ; |
11058 | if (needs_trailing_tracket) |
11059 | constructor_op += ")" ; |
11060 | } |
11061 | else if (swizzle_splat && !composite) |
11062 | { |
11063 | constructor_op = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 1, expr: to_unpacked_expression(id: elems[0])); |
11064 | } |
11065 | else |
11066 | { |
11067 | constructor_op = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "(" ; |
11068 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
11069 | constructor_op += "0" ; |
11070 | else if (splat) |
11071 | constructor_op += to_unpacked_expression(id: elems[0]); |
11072 | else |
11073 | constructor_op += build_composite_combiner(return_type: result_type, elems, length); |
11074 | constructor_op += ")" ; |
11075 | } |
11076 | |
11077 | if (!constructor_op.empty()) |
11078 | { |
11079 | emit_op(result_type, result_id: id, rhs: constructor_op, forwarding: forward); |
11080 | for (uint32_t i = 0; i < length; i++) |
11081 | inherit_expression_dependencies(dst: id, source: elems[i]); |
11082 | } |
11083 | break; |
11084 | } |
11085 | |
11086 | case OpVectorInsertDynamic: |
11087 | { |
11088 | uint32_t result_type = ops[0]; |
11089 | uint32_t id = ops[1]; |
11090 | uint32_t vec = ops[2]; |
11091 | uint32_t comp = ops[3]; |
11092 | uint32_t index = ops[4]; |
11093 | |
11094 | flush_variable_declaration(id: vec); |
11095 | |
11096 | // Make a copy, then use access chain to store the variable. |
11097 | statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: vec), ts: ";" ); |
11098 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
11099 | auto chain = access_chain_internal(base: id, indices: &index, count: 1, flags: 0, meta: nullptr); |
11100 | statement(ts&: chain, ts: " = " , ts: to_unpacked_expression(id: comp), ts: ";" ); |
11101 | break; |
11102 | } |
11103 | |
11104 | case OpVectorExtractDynamic: |
11105 | { |
11106 | uint32_t result_type = ops[0]; |
11107 | uint32_t id = ops[1]; |
11108 | |
11109 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: 1, flags: 0, meta: nullptr); |
11110 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2])); |
11111 | inherit_expression_dependencies(dst: id, source: ops[2]); |
11112 | inherit_expression_dependencies(dst: id, source: ops[3]); |
11113 | break; |
11114 | } |
11115 | |
11116 | case OpCompositeExtract: |
11117 | { |
11118 | uint32_t result_type = ops[0]; |
11119 | uint32_t id = ops[1]; |
11120 | length -= 3; |
11121 | |
11122 | auto &type = get<SPIRType>(id: result_type); |
11123 | |
11124 | // We can only split the expression here if our expression is forwarded as a temporary. |
11125 | bool allow_base_expression = forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
11126 | |
11127 | // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. |
11128 | auto &composite_type = expression_type(id: ops[2]); |
11129 | bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty(); |
11130 | if (composite_type_is_complex) |
11131 | allow_base_expression = false; |
11132 | |
11133 | // Packed expressions or physical ID mapped expressions cannot be split up. |
11134 | if (has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypePacked) || |
11135 | has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypeID)) |
11136 | allow_base_expression = false; |
11137 | |
11138 | // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern |
11139 | // into the base expression. |
11140 | if (is_non_native_row_major_matrix(id: ops[2])) |
11141 | allow_base_expression = false; |
11142 | |
11143 | AccessChainMeta meta; |
11144 | SPIRExpression *e = nullptr; |
11145 | auto *c = maybe_get<SPIRConstant>(id: ops[2]); |
11146 | |
11147 | if (c && !c->specialization && !composite_type_is_complex) |
11148 | { |
11149 | auto expr = to_extract_constant_composite_expression(result_type, c: *c, chain: ops + 3, length); |
11150 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: true); |
11151 | } |
11152 | else if (allow_base_expression && should_forward(id: ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) |
11153 | { |
11154 | // Only apply this optimization if result is scalar. |
11155 | |
11156 | // We want to split the access chain from the base. |
11157 | // This is so we can later combine different CompositeExtract results |
11158 | // with CompositeConstruct without emitting code like |
11159 | // |
11160 | // vec3 temp = texture(...).xyz |
11161 | // vec4(temp.x, temp.y, temp.z, 1.0). |
11162 | // |
11163 | // when we actually wanted to emit this |
11164 | // vec4(texture(...).xyz, 1.0). |
11165 | // |
11166 | // Including the base will prevent this and would trigger multiple reads |
11167 | // from expression causing it to be forced to an actual temporary in GLSL. |
11168 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length, |
11169 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | |
11170 | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta); |
11171 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2])); |
11172 | inherit_expression_dependencies(dst: id, source: ops[2]); |
11173 | e->base_expression = ops[2]; |
11174 | |
11175 | if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) |
11176 | set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision); |
11177 | } |
11178 | else |
11179 | { |
11180 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length, |
11181 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta); |
11182 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]), suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2])); |
11183 | inherit_expression_dependencies(dst: id, source: ops[2]); |
11184 | } |
11185 | |
11186 | // Pass through some meta information to the loaded expression. |
11187 | // We can still end up loading a buffer type to a variable, then CompositeExtract from it |
11188 | // instead of loading everything through an access chain. |
11189 | e->need_transpose = meta.need_transpose; |
11190 | if (meta.storage_is_packed) |
11191 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
11192 | if (meta.storage_physical_type != 0) |
11193 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type); |
11194 | if (meta.storage_is_invariant) |
11195 | set_decoration(id, decoration: DecorationInvariant); |
11196 | |
11197 | break; |
11198 | } |
11199 | |
11200 | case OpCompositeInsert: |
11201 | { |
11202 | uint32_t result_type = ops[0]; |
11203 | uint32_t id = ops[1]; |
11204 | uint32_t obj = ops[2]; |
11205 | uint32_t composite = ops[3]; |
11206 | const auto *elems = &ops[4]; |
11207 | length -= 4; |
11208 | |
11209 | flush_variable_declaration(id: composite); |
11210 | |
11211 | // CompositeInsert requires a copy + modification, but this is very awkward code in HLL. |
11212 | // Speculate that the input composite is no longer used, and we can modify it in-place. |
11213 | // There are various scenarios where this is not possible to satisfy. |
11214 | bool can_modify_in_place = true; |
11215 | forced_temporaries.insert(x: id); |
11216 | |
11217 | // Cannot safely RMW PHI variables since they have no way to be invalidated, |
11218 | // forcing temporaries is not going to help. |
11219 | // This is similar for Constant and Undef inputs. |
11220 | // The only safe thing to RMW is SPIRExpression. |
11221 | if (invalid_expressions.count(x: composite) || |
11222 | block_composite_insert_overwrite.count(x: composite) || |
11223 | maybe_get<SPIRExpression>(id: composite) == nullptr) |
11224 | { |
11225 | can_modify_in_place = false; |
11226 | } |
11227 | else if (backend.requires_relaxed_precision_analysis && |
11228 | has_decoration(id: composite, decoration: DecorationRelaxedPrecision) != |
11229 | has_decoration(id, decoration: DecorationRelaxedPrecision) && |
11230 | get<SPIRType>(id: result_type).basetype != SPIRType::Struct) |
11231 | { |
11232 | // Similarly, if precision does not match for input and output, |
11233 | // we cannot alias them. If we write a composite into a relaxed precision |
11234 | // ID, we might get a false truncation. |
11235 | can_modify_in_place = false; |
11236 | } |
11237 | |
11238 | if (can_modify_in_place) |
11239 | { |
11240 | // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place. |
11241 | if (!forced_temporaries.count(x: composite)) |
11242 | force_temporary_and_recompile(id: composite); |
11243 | |
11244 | auto chain = access_chain_internal(base: composite, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
11245 | statement(ts&: chain, ts: " = " , ts: to_unpacked_expression(id: obj), ts: ";" ); |
11246 | set<SPIRExpression>(id, args: to_expression(id: composite), args&: result_type, args: true); |
11247 | invalid_expressions.insert(x: composite); |
11248 | composite_insert_overwritten.insert(x: composite); |
11249 | } |
11250 | else |
11251 | { |
11252 | if (maybe_get<SPIRUndef>(id: composite) != nullptr) |
11253 | { |
11254 | emit_uninitialized_temporary_expression(type: result_type, id); |
11255 | } |
11256 | else |
11257 | { |
11258 | // Make a copy, then use access chain to store the variable. |
11259 | statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: composite), ts: ";" ); |
11260 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
11261 | } |
11262 | |
11263 | auto chain = access_chain_internal(base: id, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
11264 | statement(ts&: chain, ts: " = " , ts: to_unpacked_expression(id: obj), ts: ";" ); |
11265 | } |
11266 | |
11267 | break; |
11268 | } |
11269 | |
11270 | case OpCopyMemory: |
11271 | { |
11272 | uint32_t lhs = ops[0]; |
11273 | uint32_t rhs = ops[1]; |
11274 | if (lhs != rhs) |
11275 | { |
11276 | uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET]; |
11277 | if (!tmp_id) |
11278 | tmp_id = ir.increase_bound_by(count: 1); |
11279 | uint32_t tmp_type_id = expression_type(id: rhs).parent_type; |
11280 | |
11281 | EmbeddedInstruction fake_load, fake_store; |
11282 | fake_load.op = OpLoad; |
11283 | fake_load.length = 3; |
11284 | fake_load.ops.push_back(t: tmp_type_id); |
11285 | fake_load.ops.push_back(t: tmp_id); |
11286 | fake_load.ops.push_back(t: rhs); |
11287 | |
11288 | fake_store.op = OpStore; |
11289 | fake_store.length = 2; |
11290 | fake_store.ops.push_back(t: lhs); |
11291 | fake_store.ops.push_back(t: tmp_id); |
11292 | |
11293 | // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible. |
11294 | // Synthesize a fake Load and Store pair for CopyMemory. |
11295 | emit_instruction(instruction: fake_load); |
11296 | emit_instruction(instruction: fake_store); |
11297 | } |
11298 | break; |
11299 | } |
11300 | |
11301 | case OpCopyLogical: |
11302 | { |
11303 | // This is used for copying object of different types, arrays and structs. |
11304 | // We need to unroll the copy, element-by-element. |
11305 | uint32_t result_type = ops[0]; |
11306 | uint32_t id = ops[1]; |
11307 | uint32_t rhs = ops[2]; |
11308 | |
11309 | emit_uninitialized_temporary_expression(type: result_type, id); |
11310 | emit_copy_logical_type(lhs_id: id, lhs_type_id: result_type, rhs_id: rhs, rhs_type_id: expression_type_id(id: rhs), chain: {}); |
11311 | break; |
11312 | } |
11313 | |
11314 | case OpCopyObject: |
11315 | { |
11316 | uint32_t result_type = ops[0]; |
11317 | uint32_t id = ops[1]; |
11318 | uint32_t rhs = ops[2]; |
11319 | bool pointer = get<SPIRType>(id: result_type).pointer; |
11320 | |
11321 | auto *chain = maybe_get<SPIRAccessChain>(id: rhs); |
11322 | auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(id: rhs); |
11323 | if (chain) |
11324 | { |
11325 | // Cannot lower to a SPIRExpression, just copy the object. |
11326 | auto &e = set<SPIRAccessChain>(id, args&: *chain); |
11327 | e.self = id; |
11328 | } |
11329 | else if (imgsamp) |
11330 | { |
11331 | // Cannot lower to a SPIRExpression, just copy the object. |
11332 | // GLSL does not currently use this type and will never get here, but MSL does. |
11333 | // Handled here instead of CompilerMSL for better integration and general handling, |
11334 | // and in case GLSL or other subclasses require it in the future. |
11335 | auto &e = set<SPIRCombinedImageSampler>(id, args&: *imgsamp); |
11336 | e.self = id; |
11337 | } |
11338 | else if (expression_is_lvalue(id: rhs) && !pointer) |
11339 | { |
11340 | // Need a copy. |
11341 | // For pointer types, we copy the pointer itself. |
11342 | emit_op(result_type, result_id: id, rhs: to_unpacked_expression(id: rhs), forwarding: false); |
11343 | } |
11344 | else |
11345 | { |
11346 | // RHS expression is immutable, so just forward it. |
11347 | // Copying these things really make no sense, but |
11348 | // seems to be allowed anyways. |
11349 | auto &e = set<SPIRExpression>(id, args: to_expression(id: rhs), args&: result_type, args: true); |
11350 | if (pointer) |
11351 | { |
11352 | auto *var = maybe_get_backing_variable(chain: rhs); |
11353 | e.loaded_from = var ? var->self : ID(0); |
11354 | } |
11355 | |
11356 | // If we're copying an access chain, need to inherit the read expressions. |
11357 | auto *rhs_expr = maybe_get<SPIRExpression>(id: rhs); |
11358 | if (rhs_expr) |
11359 | { |
11360 | e.implied_read_expressions = rhs_expr->implied_read_expressions; |
11361 | e.expression_dependencies = rhs_expr->expression_dependencies; |
11362 | } |
11363 | } |
11364 | break; |
11365 | } |
11366 | |
11367 | case OpVectorShuffle: |
11368 | { |
11369 | uint32_t result_type = ops[0]; |
11370 | uint32_t id = ops[1]; |
11371 | uint32_t vec0 = ops[2]; |
11372 | uint32_t vec1 = ops[3]; |
11373 | const auto *elems = &ops[4]; |
11374 | length -= 4; |
11375 | |
11376 | auto &type0 = expression_type(id: vec0); |
11377 | |
11378 | // If we have the undefined swizzle index -1, we need to swizzle in undefined data, |
11379 | // or in our case, T(0). |
11380 | bool shuffle = false; |
11381 | for (uint32_t i = 0; i < length; i++) |
11382 | if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) |
11383 | shuffle = true; |
11384 | |
11385 | // Cannot use swizzles with packed expressions, force shuffle path. |
11386 | if (!shuffle && has_extended_decoration(id: vec0, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
11387 | shuffle = true; |
11388 | |
11389 | string expr; |
11390 | bool should_fwd, trivial_forward; |
11391 | |
11392 | if (shuffle) |
11393 | { |
11394 | should_fwd = should_forward(id: vec0) && should_forward(id: vec1); |
11395 | trivial_forward = should_suppress_usage_tracking(id: vec0) && should_suppress_usage_tracking(id: vec1); |
11396 | |
11397 | // Constructor style and shuffling from two different vectors. |
11398 | SmallVector<string> args; |
11399 | for (uint32_t i = 0; i < length; i++) |
11400 | { |
11401 | if (elems[i] == 0xffffffffu) |
11402 | { |
11403 | // Use a constant 0 here. |
11404 | // We could use the first component or similar, but then we risk propagating |
11405 | // a value we might not need, and bog down codegen. |
11406 | SPIRConstant c; |
11407 | c.constant_type = type0.parent_type; |
11408 | assert(type0.parent_type != ID(0)); |
11409 | args.push_back(t: constant_expression(c)); |
11410 | } |
11411 | else if (elems[i] >= type0.vecsize) |
11412 | args.push_back(t: to_extract_component_expression(id: vec1, index: elems[i] - type0.vecsize)); |
11413 | else |
11414 | args.push_back(t: to_extract_component_expression(id: vec0, index: elems[i])); |
11415 | } |
11416 | expr += join(ts: type_to_glsl_constructor(type: get<SPIRType>(id: result_type)), ts: "(" , ts: merge(list: args), ts: ")" ); |
11417 | } |
11418 | else |
11419 | { |
11420 | should_fwd = should_forward(id: vec0); |
11421 | trivial_forward = should_suppress_usage_tracking(id: vec0); |
11422 | |
11423 | // We only source from first vector, so can use swizzle. |
11424 | // If the vector is packed, unpack it before applying a swizzle (needed for MSL) |
11425 | expr += to_enclosed_unpacked_expression(id: vec0); |
11426 | expr += "." ; |
11427 | for (uint32_t i = 0; i < length; i++) |
11428 | { |
11429 | assert(elems[i] != 0xffffffffu); |
11430 | expr += index_to_swizzle(index: elems[i]); |
11431 | } |
11432 | |
11433 | if (backend.swizzle_is_function && length > 1) |
11434 | expr += "()" ; |
11435 | } |
11436 | |
11437 | // A shuffle is trivial in that it doesn't actually *do* anything. |
11438 | // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. |
11439 | |
11440 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_fwd, suppress_usage_tracking: trivial_forward); |
11441 | |
11442 | inherit_expression_dependencies(dst: id, source: vec0); |
11443 | if (vec0 != vec1) |
11444 | inherit_expression_dependencies(dst: id, source: vec1); |
11445 | break; |
11446 | } |
11447 | |
11448 | // ALU |
11449 | case OpIsNan: |
11450 | GLSL_UFOP(isnan); |
11451 | break; |
11452 | |
11453 | case OpIsInf: |
11454 | GLSL_UFOP(isinf); |
11455 | break; |
11456 | |
11457 | case OpSNegate: |
11458 | case OpFNegate: |
11459 | GLSL_UOP(-); |
11460 | break; |
11461 | |
11462 | case OpIAdd: |
11463 | { |
11464 | // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. |
11465 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11466 | GLSL_BOP_CAST(+, type); |
11467 | break; |
11468 | } |
11469 | |
11470 | case OpFAdd: |
11471 | GLSL_BOP(+); |
11472 | break; |
11473 | |
11474 | case OpISub: |
11475 | { |
11476 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11477 | GLSL_BOP_CAST(-, type); |
11478 | break; |
11479 | } |
11480 | |
11481 | case OpFSub: |
11482 | GLSL_BOP(-); |
11483 | break; |
11484 | |
11485 | case OpIMul: |
11486 | { |
11487 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11488 | GLSL_BOP_CAST(*, type); |
11489 | break; |
11490 | } |
11491 | |
11492 | case OpVectorTimesMatrix: |
11493 | case OpMatrixTimesVector: |
11494 | { |
11495 | // If the matrix needs transpose, just flip the multiply order. |
11496 | auto *e = maybe_get<SPIRExpression>(id: ops[opcode == OpMatrixTimesVector ? 2 : 3]); |
11497 | if (e && e->need_transpose) |
11498 | { |
11499 | e->need_transpose = false; |
11500 | string expr; |
11501 | |
11502 | if (opcode == OpMatrixTimesVector) |
11503 | expr = join(ts: to_enclosed_unpacked_expression(id: ops[3]), ts: " * " , |
11504 | ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2]))); |
11505 | else |
11506 | expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * " , |
11507 | ts: to_enclosed_unpacked_expression(id: ops[2])); |
11508 | |
11509 | bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]); |
11510 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward); |
11511 | e->need_transpose = true; |
11512 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
11513 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
11514 | } |
11515 | else |
11516 | GLSL_BOP(*); |
11517 | break; |
11518 | } |
11519 | |
11520 | case OpMatrixTimesMatrix: |
11521 | { |
11522 | auto *a = maybe_get<SPIRExpression>(id: ops[2]); |
11523 | auto *b = maybe_get<SPIRExpression>(id: ops[3]); |
11524 | |
11525 | // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. |
11526 | // a^T * b^T = (b * a)^T. |
11527 | if (a && b && a->need_transpose && b->need_transpose) |
11528 | { |
11529 | a->need_transpose = false; |
11530 | b->need_transpose = false; |
11531 | auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * " , |
11532 | ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2]))); |
11533 | bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]); |
11534 | auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward); |
11535 | e.need_transpose = true; |
11536 | a->need_transpose = true; |
11537 | b->need_transpose = true; |
11538 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
11539 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
11540 | } |
11541 | else |
11542 | GLSL_BOP(*); |
11543 | |
11544 | break; |
11545 | } |
11546 | |
11547 | case OpFMul: |
11548 | case OpMatrixTimesScalar: |
11549 | case OpVectorTimesScalar: |
11550 | GLSL_BOP(*); |
11551 | break; |
11552 | |
11553 | case OpOuterProduct: |
11554 | GLSL_BFOP(outerProduct); |
11555 | break; |
11556 | |
11557 | case OpDot: |
11558 | GLSL_BFOP(dot); |
11559 | break; |
11560 | |
11561 | case OpTranspose: |
11562 | if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 |
11563 | { |
11564 | // transpose() is not available, so instead, flip need_transpose, |
11565 | // which can later be turned into an emulated transpose op by |
11566 | // convert_row_major_matrix(), if necessary. |
11567 | uint32_t result_type = ops[0]; |
11568 | uint32_t result_id = ops[1]; |
11569 | uint32_t input = ops[2]; |
11570 | |
11571 | // Force need_transpose to false temporarily to prevent |
11572 | // to_expression() from doing the transpose. |
11573 | bool need_transpose = false; |
11574 | auto *input_e = maybe_get<SPIRExpression>(id: input); |
11575 | if (input_e) |
11576 | swap(a&: need_transpose, b&: input_e->need_transpose); |
11577 | |
11578 | bool forward = should_forward(id: input); |
11579 | auto &e = emit_op(result_type, result_id, rhs: to_expression(id: input), forwarding: forward); |
11580 | e.need_transpose = !need_transpose; |
11581 | |
11582 | // Restore the old need_transpose flag. |
11583 | if (input_e) |
11584 | input_e->need_transpose = need_transpose; |
11585 | } |
11586 | else |
11587 | GLSL_UFOP(transpose); |
11588 | break; |
11589 | |
11590 | case OpSRem: |
11591 | { |
11592 | uint32_t result_type = ops[0]; |
11593 | uint32_t result_id = ops[1]; |
11594 | uint32_t op0 = ops[2]; |
11595 | uint32_t op1 = ops[3]; |
11596 | |
11597 | // Needs special handling. |
11598 | bool forward = should_forward(id: op0) && should_forward(id: op1); |
11599 | auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - " , ts: to_enclosed_expression(id: op1), ts: " * " , ts: "(" , |
11600 | ts: to_enclosed_expression(id: op0), ts: " / " , ts: to_enclosed_expression(id: op1), ts: ")" ); |
11601 | |
11602 | emit_op(result_type, result_id, rhs: expr, forwarding: forward); |
11603 | inherit_expression_dependencies(dst: result_id, source: op0); |
11604 | inherit_expression_dependencies(dst: result_id, source: op1); |
11605 | break; |
11606 | } |
11607 | |
11608 | case OpSDiv: |
11609 | GLSL_BOP_CAST(/, int_type); |
11610 | break; |
11611 | |
11612 | case OpUDiv: |
11613 | GLSL_BOP_CAST(/, uint_type); |
11614 | break; |
11615 | |
11616 | case OpIAddCarry: |
11617 | case OpISubBorrow: |
11618 | { |
11619 | if (options.es && options.version < 310) |
11620 | SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310." ); |
11621 | else if (!options.es && options.version < 400) |
11622 | SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400." ); |
11623 | |
11624 | uint32_t result_type = ops[0]; |
11625 | uint32_t result_id = ops[1]; |
11626 | uint32_t op0 = ops[2]; |
11627 | uint32_t op1 = ops[3]; |
11628 | auto &type = get<SPIRType>(id: result_type); |
11629 | emit_uninitialized_temporary_expression(type: result_type, id: result_id); |
11630 | const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow" ; |
11631 | |
11632 | statement(ts: to_expression(id: result_id), ts: "." , ts: to_member_name(type, index: 0), ts: " = " , ts&: op, ts: "(" , ts: to_expression(id: op0), ts: ", " , |
11633 | ts: to_expression(id: op1), ts: ", " , ts: to_expression(id: result_id), ts: "." , ts: to_member_name(type, index: 1), ts: ");" ); |
11634 | break; |
11635 | } |
11636 | |
11637 | case OpUMulExtended: |
11638 | case OpSMulExtended: |
11639 | { |
11640 | if (options.es && options.version < 310) |
11641 | SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310." ); |
11642 | else if (!options.es && options.version < 400) |
11643 | SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000." ); |
11644 | |
11645 | uint32_t result_type = ops[0]; |
11646 | uint32_t result_id = ops[1]; |
11647 | uint32_t op0 = ops[2]; |
11648 | uint32_t op1 = ops[3]; |
11649 | auto &type = get<SPIRType>(id: result_type); |
11650 | emit_uninitialized_temporary_expression(type: result_type, id: result_id); |
11651 | const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended" ; |
11652 | |
11653 | statement(ts&: op, ts: "(" , ts: to_expression(id: op0), ts: ", " , ts: to_expression(id: op1), ts: ", " , ts: to_expression(id: result_id), ts: "." , |
11654 | ts: to_member_name(type, index: 1), ts: ", " , ts: to_expression(id: result_id), ts: "." , ts: to_member_name(type, index: 0), ts: ");" ); |
11655 | break; |
11656 | } |
11657 | |
11658 | case OpFDiv: |
11659 | GLSL_BOP(/); |
11660 | break; |
11661 | |
11662 | case OpShiftRightLogical: |
11663 | GLSL_BOP_CAST(>>, uint_type); |
11664 | break; |
11665 | |
11666 | case OpShiftRightArithmetic: |
11667 | GLSL_BOP_CAST(>>, int_type); |
11668 | break; |
11669 | |
11670 | case OpShiftLeftLogical: |
11671 | { |
11672 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11673 | GLSL_BOP_CAST(<<, type); |
11674 | break; |
11675 | } |
11676 | |
11677 | case OpBitwiseOr: |
11678 | { |
11679 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11680 | GLSL_BOP_CAST(|, type); |
11681 | break; |
11682 | } |
11683 | |
11684 | case OpBitwiseXor: |
11685 | { |
11686 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11687 | GLSL_BOP_CAST(^, type); |
11688 | break; |
11689 | } |
11690 | |
11691 | case OpBitwiseAnd: |
11692 | { |
11693 | auto type = get<SPIRType>(id: ops[0]).basetype; |
11694 | GLSL_BOP_CAST(&, type); |
11695 | break; |
11696 | } |
11697 | |
11698 | case OpNot: |
11699 | GLSL_UOP(~); |
11700 | break; |
11701 | |
11702 | case OpUMod: |
11703 | GLSL_BOP_CAST(%, uint_type); |
11704 | break; |
11705 | |
11706 | case OpSMod: |
11707 | GLSL_BOP_CAST(%, int_type); |
11708 | break; |
11709 | |
11710 | case OpFMod: |
11711 | GLSL_BFOP(mod); |
11712 | break; |
11713 | |
11714 | case OpFRem: |
11715 | { |
11716 | if (is_legacy()) |
11717 | SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " |
11718 | "needed for legacy." ); |
11719 | |
11720 | uint32_t result_type = ops[0]; |
11721 | uint32_t result_id = ops[1]; |
11722 | uint32_t op0 = ops[2]; |
11723 | uint32_t op1 = ops[3]; |
11724 | |
11725 | // Needs special handling. |
11726 | bool forward = should_forward(id: op0) && should_forward(id: op1); |
11727 | auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - " , ts: to_enclosed_expression(id: op1), ts: " * " , ts: "trunc(" , |
11728 | ts: to_enclosed_expression(id: op0), ts: " / " , ts: to_enclosed_expression(id: op1), ts: ")" ); |
11729 | |
11730 | emit_op(result_type, result_id, rhs: expr, forwarding: forward); |
11731 | inherit_expression_dependencies(dst: result_id, source: op0); |
11732 | inherit_expression_dependencies(dst: result_id, source: op1); |
11733 | break; |
11734 | } |
11735 | |
11736 | // Relational |
11737 | case OpAny: |
11738 | GLSL_UFOP(any); |
11739 | break; |
11740 | |
11741 | case OpAll: |
11742 | GLSL_UFOP(all); |
11743 | break; |
11744 | |
11745 | case OpSelect: |
11746 | emit_mix_op(result_type: ops[0], id: ops[1], left: ops[4], right: ops[3], lerp: ops[2]); |
11747 | break; |
11748 | |
11749 | case OpLogicalOr: |
11750 | { |
11751 | // No vector variant in GLSL for logical OR. |
11752 | auto result_type = ops[0]; |
11753 | auto id = ops[1]; |
11754 | auto &type = get<SPIRType>(id: result_type); |
11755 | |
11756 | if (type.vecsize > 1) |
11757 | emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "||" , negate: false, expected_type: SPIRType::Unknown); |
11758 | else |
11759 | GLSL_BOP(||); |
11760 | break; |
11761 | } |
11762 | |
11763 | case OpLogicalAnd: |
11764 | { |
11765 | // No vector variant in GLSL for logical AND. |
11766 | auto result_type = ops[0]; |
11767 | auto id = ops[1]; |
11768 | auto &type = get<SPIRType>(id: result_type); |
11769 | |
11770 | if (type.vecsize > 1) |
11771 | emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "&&" , negate: false, expected_type: SPIRType::Unknown); |
11772 | else |
11773 | GLSL_BOP(&&); |
11774 | break; |
11775 | } |
11776 | |
11777 | case OpLogicalNot: |
11778 | { |
11779 | auto &type = get<SPIRType>(id: ops[0]); |
11780 | if (type.vecsize > 1) |
11781 | GLSL_UFOP(not ); |
11782 | else |
11783 | GLSL_UOP(!); |
11784 | break; |
11785 | } |
11786 | |
11787 | case OpIEqual: |
11788 | { |
11789 | if (expression_type(id: ops[2]).vecsize > 1) |
11790 | GLSL_BFOP_CAST(equal, int_type); |
11791 | else |
11792 | GLSL_BOP_CAST(==, int_type); |
11793 | break; |
11794 | } |
11795 | |
11796 | case OpLogicalEqual: |
11797 | case OpFOrdEqual: |
11798 | { |
11799 | if (expression_type(id: ops[2]).vecsize > 1) |
11800 | GLSL_BFOP(equal); |
11801 | else |
11802 | GLSL_BOP(==); |
11803 | break; |
11804 | } |
11805 | |
11806 | case OpINotEqual: |
11807 | { |
11808 | if (expression_type(id: ops[2]).vecsize > 1) |
11809 | GLSL_BFOP_CAST(notEqual, int_type); |
11810 | else |
11811 | GLSL_BOP_CAST(!=, int_type); |
11812 | break; |
11813 | } |
11814 | |
11815 | case OpLogicalNotEqual: |
11816 | case OpFOrdNotEqual: |
11817 | case OpFUnordNotEqual: |
11818 | { |
11819 | // GLSL is fuzzy on what to do with ordered vs unordered not equal. |
11820 | // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, |
11821 | // but this means we have no easy way of implementing ordered not equal. |
11822 | if (expression_type(id: ops[2]).vecsize > 1) |
11823 | GLSL_BFOP(notEqual); |
11824 | else |
11825 | GLSL_BOP(!=); |
11826 | break; |
11827 | } |
11828 | |
11829 | case OpUGreaterThan: |
11830 | case OpSGreaterThan: |
11831 | { |
11832 | auto type = opcode == OpUGreaterThan ? uint_type : int_type; |
11833 | if (expression_type(id: ops[2]).vecsize > 1) |
11834 | GLSL_BFOP_CAST(greaterThan, type); |
11835 | else |
11836 | GLSL_BOP_CAST(>, type); |
11837 | break; |
11838 | } |
11839 | |
11840 | case OpFOrdGreaterThan: |
11841 | { |
11842 | if (expression_type(id: ops[2]).vecsize > 1) |
11843 | GLSL_BFOP(greaterThan); |
11844 | else |
11845 | GLSL_BOP(>); |
11846 | break; |
11847 | } |
11848 | |
11849 | case OpUGreaterThanEqual: |
11850 | case OpSGreaterThanEqual: |
11851 | { |
11852 | auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; |
11853 | if (expression_type(id: ops[2]).vecsize > 1) |
11854 | GLSL_BFOP_CAST(greaterThanEqual, type); |
11855 | else |
11856 | GLSL_BOP_CAST(>=, type); |
11857 | break; |
11858 | } |
11859 | |
11860 | case OpFOrdGreaterThanEqual: |
11861 | { |
11862 | if (expression_type(id: ops[2]).vecsize > 1) |
11863 | GLSL_BFOP(greaterThanEqual); |
11864 | else |
11865 | GLSL_BOP(>=); |
11866 | break; |
11867 | } |
11868 | |
11869 | case OpULessThan: |
11870 | case OpSLessThan: |
11871 | { |
11872 | auto type = opcode == OpULessThan ? uint_type : int_type; |
11873 | if (expression_type(id: ops[2]).vecsize > 1) |
11874 | GLSL_BFOP_CAST(lessThan, type); |
11875 | else |
11876 | GLSL_BOP_CAST(<, type); |
11877 | break; |
11878 | } |
11879 | |
11880 | case OpFOrdLessThan: |
11881 | { |
11882 | if (expression_type(id: ops[2]).vecsize > 1) |
11883 | GLSL_BFOP(lessThan); |
11884 | else |
11885 | GLSL_BOP(<); |
11886 | break; |
11887 | } |
11888 | |
11889 | case OpULessThanEqual: |
11890 | case OpSLessThanEqual: |
11891 | { |
11892 | auto type = opcode == OpULessThanEqual ? uint_type : int_type; |
11893 | if (expression_type(id: ops[2]).vecsize > 1) |
11894 | GLSL_BFOP_CAST(lessThanEqual, type); |
11895 | else |
11896 | GLSL_BOP_CAST(<=, type); |
11897 | break; |
11898 | } |
11899 | |
11900 | case OpFOrdLessThanEqual: |
11901 | { |
11902 | if (expression_type(id: ops[2]).vecsize > 1) |
11903 | GLSL_BFOP(lessThanEqual); |
11904 | else |
11905 | GLSL_BOP(<=); |
11906 | break; |
11907 | } |
11908 | |
11909 | // Conversion |
11910 | case OpSConvert: |
11911 | case OpConvertSToF: |
11912 | case OpUConvert: |
11913 | case OpConvertUToF: |
11914 | { |
11915 | auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; |
11916 | uint32_t result_type = ops[0]; |
11917 | uint32_t id = ops[1]; |
11918 | |
11919 | auto &type = get<SPIRType>(id: result_type); |
11920 | auto &arg_type = expression_type(id: ops[2]); |
11921 | auto func = type_to_glsl_constructor(type); |
11922 | |
11923 | if (arg_type.width < type.width || type_is_floating_point(type)) |
11924 | emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type, expected_result_type: type.basetype); |
11925 | else |
11926 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str()); |
11927 | break; |
11928 | } |
11929 | |
11930 | case OpConvertFToU: |
11931 | case OpConvertFToS: |
11932 | { |
11933 | // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. |
11934 | uint32_t result_type = ops[0]; |
11935 | uint32_t id = ops[1]; |
11936 | auto &type = get<SPIRType>(id: result_type); |
11937 | auto expected_type = type; |
11938 | auto &float_type = expression_type(id: ops[2]); |
11939 | expected_type.basetype = |
11940 | opcode == OpConvertFToS ? to_signed_basetype(width: type.width) : to_unsigned_basetype(width: type.width); |
11941 | |
11942 | auto func = type_to_glsl_constructor(type: expected_type); |
11943 | emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type: float_type.basetype, expected_result_type: expected_type.basetype); |
11944 | break; |
11945 | } |
11946 | |
11947 | case OpFConvert: |
11948 | { |
11949 | uint32_t result_type = ops[0]; |
11950 | uint32_t id = ops[1]; |
11951 | |
11952 | auto func = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)); |
11953 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str()); |
11954 | break; |
11955 | } |
11956 | |
11957 | case OpBitcast: |
11958 | { |
11959 | uint32_t result_type = ops[0]; |
11960 | uint32_t id = ops[1]; |
11961 | uint32_t arg = ops[2]; |
11962 | |
11963 | if (!emit_complex_bitcast(result_type, id, op0: arg)) |
11964 | { |
11965 | auto op = bitcast_glsl_op(out_type: get<SPIRType>(id: result_type), in_type: expression_type(id: arg)); |
11966 | emit_unary_func_op(result_type, result_id: id, op0: arg, op: op.c_str()); |
11967 | } |
11968 | break; |
11969 | } |
11970 | |
11971 | case OpQuantizeToF16: |
11972 | { |
11973 | uint32_t result_type = ops[0]; |
11974 | uint32_t id = ops[1]; |
11975 | uint32_t arg = ops[2]; |
11976 | |
11977 | string op; |
11978 | auto &type = get<SPIRType>(id: result_type); |
11979 | |
11980 | switch (type.vecsize) |
11981 | { |
11982 | case 1: |
11983 | op = join(ts: "unpackHalf2x16(packHalf2x16(vec2(" , ts: to_expression(id: arg), ts: "))).x" ); |
11984 | break; |
11985 | case 2: |
11986 | op = join(ts: "unpackHalf2x16(packHalf2x16(" , ts: to_expression(id: arg), ts: "))" ); |
11987 | break; |
11988 | case 3: |
11989 | { |
11990 | auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(" , ts: to_expression(id: arg), ts: ".xy))" ); |
11991 | auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(" , ts: to_expression(id: arg), ts: ".zz)).x" ); |
11992 | op = join(ts: "vec3(" , ts&: op0, ts: ", " , ts&: op1, ts: ")" ); |
11993 | break; |
11994 | } |
11995 | case 4: |
11996 | { |
11997 | auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(" , ts: to_expression(id: arg), ts: ".xy))" ); |
11998 | auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(" , ts: to_expression(id: arg), ts: ".zw))" ); |
11999 | op = join(ts: "vec4(" , ts&: op0, ts: ", " , ts&: op1, ts: ")" ); |
12000 | break; |
12001 | } |
12002 | default: |
12003 | SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16." ); |
12004 | } |
12005 | |
12006 | emit_op(result_type, result_id: id, rhs: op, forwarding: should_forward(id: arg)); |
12007 | inherit_expression_dependencies(dst: id, source: arg); |
12008 | break; |
12009 | } |
12010 | |
12011 | // Derivatives |
12012 | case OpDPdx: |
12013 | GLSL_UFOP(dFdx); |
12014 | if (is_legacy_es()) |
12015 | require_extension_internal(ext: "GL_OES_standard_derivatives" ); |
12016 | register_control_dependent_expression(expr: ops[1]); |
12017 | break; |
12018 | |
12019 | case OpDPdy: |
12020 | GLSL_UFOP(dFdy); |
12021 | if (is_legacy_es()) |
12022 | require_extension_internal(ext: "GL_OES_standard_derivatives" ); |
12023 | register_control_dependent_expression(expr: ops[1]); |
12024 | break; |
12025 | |
12026 | case OpDPdxFine: |
12027 | GLSL_UFOP(dFdxFine); |
12028 | if (options.es) |
12029 | { |
12030 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12031 | } |
12032 | if (options.version < 450) |
12033 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12034 | register_control_dependent_expression(expr: ops[1]); |
12035 | break; |
12036 | |
12037 | case OpDPdyFine: |
12038 | GLSL_UFOP(dFdyFine); |
12039 | if (options.es) |
12040 | { |
12041 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12042 | } |
12043 | if (options.version < 450) |
12044 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12045 | register_control_dependent_expression(expr: ops[1]); |
12046 | break; |
12047 | |
12048 | case OpDPdxCoarse: |
12049 | if (options.es) |
12050 | { |
12051 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12052 | } |
12053 | GLSL_UFOP(dFdxCoarse); |
12054 | if (options.version < 450) |
12055 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12056 | register_control_dependent_expression(expr: ops[1]); |
12057 | break; |
12058 | |
12059 | case OpDPdyCoarse: |
12060 | GLSL_UFOP(dFdyCoarse); |
12061 | if (options.es) |
12062 | { |
12063 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12064 | } |
12065 | if (options.version < 450) |
12066 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12067 | register_control_dependent_expression(expr: ops[1]); |
12068 | break; |
12069 | |
12070 | case OpFwidth: |
12071 | GLSL_UFOP(fwidth); |
12072 | if (is_legacy_es()) |
12073 | require_extension_internal(ext: "GL_OES_standard_derivatives" ); |
12074 | register_control_dependent_expression(expr: ops[1]); |
12075 | break; |
12076 | |
12077 | case OpFwidthCoarse: |
12078 | GLSL_UFOP(fwidthCoarse); |
12079 | if (options.es) |
12080 | { |
12081 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12082 | } |
12083 | if (options.version < 450) |
12084 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12085 | register_control_dependent_expression(expr: ops[1]); |
12086 | break; |
12087 | |
12088 | case OpFwidthFine: |
12089 | GLSL_UFOP(fwidthFine); |
12090 | if (options.es) |
12091 | { |
12092 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES." ); |
12093 | } |
12094 | if (options.version < 450) |
12095 | require_extension_internal(ext: "GL_ARB_derivative_control" ); |
12096 | register_control_dependent_expression(expr: ops[1]); |
12097 | break; |
12098 | |
12099 | // Bitfield |
12100 | case OpBitFieldInsert: |
12101 | { |
12102 | emit_bitfield_insert_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op3: ops[5], op: "bitfieldInsert" , offset_count_type: SPIRType::Int); |
12103 | break; |
12104 | } |
12105 | |
12106 | case OpBitFieldSExtract: |
12107 | { |
12108 | emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract" , expected_result_type: int_type, input_type0: int_type, |
12109 | input_type1: SPIRType::Int, input_type2: SPIRType::Int); |
12110 | break; |
12111 | } |
12112 | |
12113 | case OpBitFieldUExtract: |
12114 | { |
12115 | emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract" , expected_result_type: uint_type, input_type0: uint_type, |
12116 | input_type1: SPIRType::Int, input_type2: SPIRType::Int); |
12117 | break; |
12118 | } |
12119 | |
12120 | case OpBitReverse: |
12121 | // BitReverse does not have issues with sign since result type must match input type. |
12122 | GLSL_UFOP(bitfieldReverse); |
12123 | break; |
12124 | |
12125 | case OpBitCount: |
12126 | { |
12127 | auto basetype = expression_type(id: ops[2]).basetype; |
12128 | emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "bitCount" , input_type: basetype, expected_result_type: int_type); |
12129 | break; |
12130 | } |
12131 | |
12132 | // Atomics |
12133 | case OpAtomicExchange: |
12134 | { |
12135 | uint32_t result_type = ops[0]; |
12136 | uint32_t id = ops[1]; |
12137 | uint32_t ptr = ops[2]; |
12138 | // Ignore semantics for now, probably only relevant to CL. |
12139 | uint32_t val = ops[5]; |
12140 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange" ; |
12141 | |
12142 | emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: val, op); |
12143 | break; |
12144 | } |
12145 | |
12146 | case OpAtomicCompareExchange: |
12147 | { |
12148 | uint32_t result_type = ops[0]; |
12149 | uint32_t id = ops[1]; |
12150 | uint32_t ptr = ops[2]; |
12151 | uint32_t val = ops[6]; |
12152 | uint32_t comp = ops[7]; |
12153 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicCompSwap" : "atomicCompSwap" ; |
12154 | |
12155 | emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: comp, op2: val, op); |
12156 | break; |
12157 | } |
12158 | |
12159 | case OpAtomicLoad: |
12160 | { |
12161 | // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. |
12162 | // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. |
12163 | auto &type = expression_type(id: ops[2]); |
12164 | forced_temporaries.insert(x: ops[1]); |
12165 | bool atomic_image = check_atomic_image(id: ops[2]); |
12166 | bool unsigned_type = (type.basetype == SPIRType::UInt) || |
12167 | (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt); |
12168 | const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd" ; |
12169 | const char *increment = unsigned_type ? "0u" : "0" ; |
12170 | emit_op(result_type: ops[0], result_id: ops[1], |
12171 | rhs: join(ts&: op, ts: "(" , |
12172 | ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , ts&: increment, ts: ")" ), forwarding: false); |
12173 | flush_all_atomic_capable_variables(); |
12174 | break; |
12175 | } |
12176 | |
12177 | case OpAtomicStore: |
12178 | { |
12179 | // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. |
12180 | // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. |
12181 | uint32_t ptr = ops[0]; |
12182 | // Ignore semantics for now, probably only relevant to CL. |
12183 | uint32_t val = ops[3]; |
12184 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange" : "atomicExchange" ; |
12185 | statement(ts&: op, ts: "(" , ts: to_non_uniform_aware_expression(id: ptr), ts: ", " , ts: to_expression(id: val), ts: ");" ); |
12186 | flush_all_atomic_capable_variables(); |
12187 | break; |
12188 | } |
12189 | |
12190 | case OpAtomicIIncrement: |
12191 | case OpAtomicIDecrement: |
12192 | { |
12193 | forced_temporaries.insert(x: ops[1]); |
12194 | auto &type = expression_type(id: ops[2]); |
12195 | if (type.storage == StorageClassAtomicCounter) |
12196 | { |
12197 | // Legacy GLSL stuff, not sure if this is relevant to support. |
12198 | if (opcode == OpAtomicIIncrement) |
12199 | GLSL_UFOP(atomicCounterIncrement); |
12200 | else |
12201 | GLSL_UFOP(atomicCounterDecrement); |
12202 | } |
12203 | else |
12204 | { |
12205 | bool atomic_image = check_atomic_image(id: ops[2]); |
12206 | bool unsigned_type = (type.basetype == SPIRType::UInt) || |
12207 | (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt); |
12208 | const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd" ; |
12209 | |
12210 | const char *increment = nullptr; |
12211 | if (opcode == OpAtomicIIncrement && unsigned_type) |
12212 | increment = "1u" ; |
12213 | else if (opcode == OpAtomicIIncrement) |
12214 | increment = "1" ; |
12215 | else if (unsigned_type) |
12216 | increment = "uint(-1)" ; |
12217 | else |
12218 | increment = "-1" ; |
12219 | |
12220 | emit_op(result_type: ops[0], result_id: ops[1], |
12221 | rhs: join(ts&: op, ts: "(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , ts&: increment, ts: ")" ), forwarding: false); |
12222 | } |
12223 | |
12224 | flush_all_atomic_capable_variables(); |
12225 | break; |
12226 | } |
12227 | |
12228 | case OpAtomicIAdd: |
12229 | case OpAtomicFAddEXT: |
12230 | { |
12231 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd" ; |
12232 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12233 | break; |
12234 | } |
12235 | |
12236 | case OpAtomicISub: |
12237 | { |
12238 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd" : "atomicAdd" ; |
12239 | forced_temporaries.insert(x: ops[1]); |
12240 | auto expr = join(ts&: op, ts: "(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", -" , ts: to_enclosed_expression(id: ops[5]), ts: ")" ); |
12241 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: ops[2]) && should_forward(id: ops[5])); |
12242 | flush_all_atomic_capable_variables(); |
12243 | break; |
12244 | } |
12245 | |
12246 | case OpAtomicSMin: |
12247 | case OpAtomicUMin: |
12248 | { |
12249 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMin" : "atomicMin" ; |
12250 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12251 | break; |
12252 | } |
12253 | |
12254 | case OpAtomicSMax: |
12255 | case OpAtomicUMax: |
12256 | { |
12257 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMax" : "atomicMax" ; |
12258 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12259 | break; |
12260 | } |
12261 | |
12262 | case OpAtomicAnd: |
12263 | { |
12264 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAnd" : "atomicAnd" ; |
12265 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12266 | break; |
12267 | } |
12268 | |
12269 | case OpAtomicOr: |
12270 | { |
12271 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicOr" : "atomicOr" ; |
12272 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12273 | break; |
12274 | } |
12275 | |
12276 | case OpAtomicXor: |
12277 | { |
12278 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicXor" : "atomicXor" ; |
12279 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
12280 | break; |
12281 | } |
12282 | |
12283 | // Geometry shaders |
12284 | case OpEmitVertex: |
12285 | statement(ts: "EmitVertex();" ); |
12286 | break; |
12287 | |
12288 | case OpEndPrimitive: |
12289 | statement(ts: "EndPrimitive();" ); |
12290 | break; |
12291 | |
12292 | case OpEmitStreamVertex: |
12293 | { |
12294 | if (options.es) |
12295 | SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES." ); |
12296 | else if (!options.es && options.version < 400) |
12297 | SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400." ); |
12298 | |
12299 | auto stream_expr = to_expression(id: ops[0]); |
12300 | if (expression_type(id: ops[0]).basetype != SPIRType::Int) |
12301 | stream_expr = join(ts: "int(" , ts&: stream_expr, ts: ")" ); |
12302 | statement(ts: "EmitStreamVertex(" , ts&: stream_expr, ts: ");" ); |
12303 | break; |
12304 | } |
12305 | |
12306 | case OpEndStreamPrimitive: |
12307 | { |
12308 | if (options.es) |
12309 | SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES." ); |
12310 | else if (!options.es && options.version < 400) |
12311 | SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400." ); |
12312 | |
12313 | auto stream_expr = to_expression(id: ops[0]); |
12314 | if (expression_type(id: ops[0]).basetype != SPIRType::Int) |
12315 | stream_expr = join(ts: "int(" , ts&: stream_expr, ts: ")" ); |
12316 | statement(ts: "EndStreamPrimitive(" , ts&: stream_expr, ts: ");" ); |
12317 | break; |
12318 | } |
12319 | |
12320 | // Textures |
12321 | case OpImageSampleExplicitLod: |
12322 | case OpImageSampleProjExplicitLod: |
12323 | case OpImageSampleDrefExplicitLod: |
12324 | case OpImageSampleProjDrefExplicitLod: |
12325 | case OpImageSampleImplicitLod: |
12326 | case OpImageSampleProjImplicitLod: |
12327 | case OpImageSampleDrefImplicitLod: |
12328 | case OpImageSampleProjDrefImplicitLod: |
12329 | case OpImageFetch: |
12330 | case OpImageGather: |
12331 | case OpImageDrefGather: |
12332 | // Gets a bit hairy, so move this to a separate instruction. |
12333 | emit_texture_op(i: instruction, sparse: false); |
12334 | break; |
12335 | |
12336 | case OpImageSparseSampleExplicitLod: |
12337 | case OpImageSparseSampleProjExplicitLod: |
12338 | case OpImageSparseSampleDrefExplicitLod: |
12339 | case OpImageSparseSampleProjDrefExplicitLod: |
12340 | case OpImageSparseSampleImplicitLod: |
12341 | case OpImageSparseSampleProjImplicitLod: |
12342 | case OpImageSparseSampleDrefImplicitLod: |
12343 | case OpImageSparseSampleProjDrefImplicitLod: |
12344 | case OpImageSparseFetch: |
12345 | case OpImageSparseGather: |
12346 | case OpImageSparseDrefGather: |
12347 | // Gets a bit hairy, so move this to a separate instruction. |
12348 | emit_texture_op(i: instruction, sparse: true); |
12349 | break; |
12350 | |
12351 | case OpImageSparseTexelsResident: |
12352 | if (options.es) |
12353 | SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL." ); |
12354 | require_extension_internal(ext: "GL_ARB_sparse_texture2" ); |
12355 | emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "sparseTexelsResidentARB" , input_type: int_type, expected_result_type: SPIRType::Boolean); |
12356 | break; |
12357 | |
12358 | case OpImage: |
12359 | { |
12360 | uint32_t result_type = ops[0]; |
12361 | uint32_t id = ops[1]; |
12362 | |
12363 | // Suppress usage tracking. |
12364 | auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[2]), forwarding: true, suppress_usage_tracking: true); |
12365 | |
12366 | // When using the image, we need to know which variable it is actually loaded from. |
12367 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
12368 | e.loaded_from = var ? var->self : ID(0); |
12369 | break; |
12370 | } |
12371 | |
12372 | case OpImageQueryLod: |
12373 | { |
12374 | const char *op = nullptr; |
12375 | if (!options.es && options.version < 400) |
12376 | { |
12377 | require_extension_internal(ext: "GL_ARB_texture_query_lod" ); |
12378 | // For some reason, the ARB spec is all-caps. |
12379 | op = "textureQueryLOD" ; |
12380 | } |
12381 | else if (options.es) |
12382 | SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile." ); |
12383 | else |
12384 | op = "textureQueryLod" ; |
12385 | |
12386 | auto sampler_expr = to_expression(id: ops[2]); |
12387 | if (has_decoration(id: ops[2], decoration: DecorationNonUniform)) |
12388 | { |
12389 | if (maybe_get_backing_variable(chain: ops[2])) |
12390 | convert_non_uniform_expression(expr&: sampler_expr, ptr_id: ops[2]); |
12391 | else if (*backend.nonuniform_qualifier != '\0') |
12392 | sampler_expr = join(ts&: backend.nonuniform_qualifier, ts: "(" , ts&: sampler_expr, ts: ")" ); |
12393 | } |
12394 | |
12395 | bool forward = should_forward(id: ops[3]); |
12396 | emit_op(result_type: ops[0], result_id: ops[1], |
12397 | rhs: join(ts&: op, ts: "(" , ts&: sampler_expr, ts: ", " , ts: to_unpacked_expression(id: ops[3]), ts: ")" ), |
12398 | forwarding: forward); |
12399 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
12400 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
12401 | register_control_dependent_expression(expr: ops[1]); |
12402 | break; |
12403 | } |
12404 | |
12405 | case OpImageQueryLevels: |
12406 | { |
12407 | uint32_t result_type = ops[0]; |
12408 | uint32_t id = ops[1]; |
12409 | |
12410 | if (!options.es && options.version < 430) |
12411 | require_extension_internal(ext: "GL_ARB_texture_query_levels" ); |
12412 | if (options.es) |
12413 | SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile." ); |
12414 | |
12415 | auto expr = join(ts: "textureQueryLevels(" , ts: convert_separate_image_to_expression(id: ops[2]), ts: ")" ); |
12416 | auto &restype = get<SPIRType>(id: ops[0]); |
12417 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
12418 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
12419 | break; |
12420 | } |
12421 | |
12422 | case OpImageQuerySamples: |
12423 | { |
12424 | auto &type = expression_type(id: ops[2]); |
12425 | uint32_t result_type = ops[0]; |
12426 | uint32_t id = ops[1]; |
12427 | |
12428 | string expr; |
12429 | if (type.image.sampled == 2) |
12430 | expr = join(ts: "imageSamples(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")" ); |
12431 | else |
12432 | expr = join(ts: "textureSamples(" , ts: convert_separate_image_to_expression(id: ops[2]), ts: ")" ); |
12433 | |
12434 | auto &restype = get<SPIRType>(id: ops[0]); |
12435 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
12436 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
12437 | break; |
12438 | } |
12439 | |
12440 | case OpSampledImage: |
12441 | { |
12442 | uint32_t result_type = ops[0]; |
12443 | uint32_t id = ops[1]; |
12444 | emit_sampled_image_op(result_type, result_id: id, image_id: ops[2], samp_id: ops[3]); |
12445 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12446 | inherit_expression_dependencies(dst: id, source: ops[3]); |
12447 | break; |
12448 | } |
12449 | |
12450 | case OpImageQuerySizeLod: |
12451 | { |
12452 | uint32_t result_type = ops[0]; |
12453 | uint32_t id = ops[1]; |
12454 | uint32_t img = ops[2]; |
12455 | |
12456 | std::string fname = "textureSize" ; |
12457 | if (is_legacy_desktop()) |
12458 | { |
12459 | auto &type = expression_type(id: img); |
12460 | auto &imgtype = get<SPIRType>(id: type.self); |
12461 | fname = legacy_tex_op(op: fname, imgtype, tex: img); |
12462 | } |
12463 | else if (is_legacy_es()) |
12464 | SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100." ); |
12465 | |
12466 | auto expr = join(ts&: fname, ts: "(" , ts: convert_separate_image_to_expression(id: img), ts: ", " , |
12467 | ts: bitcast_expression(target_type: SPIRType::Int, arg: ops[3]), ts: ")" ); |
12468 | auto &restype = get<SPIRType>(id: ops[0]); |
12469 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
12470 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
12471 | break; |
12472 | } |
12473 | |
12474 | // Image load/store |
12475 | case OpImageRead: |
12476 | case OpImageSparseRead: |
12477 | { |
12478 | // We added Nonreadable speculatively to the OpImage variable due to glslangValidator |
12479 | // not adding the proper qualifiers. |
12480 | // If it turns out we need to read the image after all, remove the qualifier and recompile. |
12481 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
12482 | if (var) |
12483 | { |
12484 | auto &flags = get_decoration_bitset(id: var->self); |
12485 | if (flags.get(bit: DecorationNonReadable)) |
12486 | { |
12487 | unset_decoration(id: var->self, decoration: DecorationNonReadable); |
12488 | force_recompile(); |
12489 | } |
12490 | } |
12491 | |
12492 | uint32_t result_type = ops[0]; |
12493 | uint32_t id = ops[1]; |
12494 | |
12495 | bool pure; |
12496 | string imgexpr; |
12497 | auto &type = expression_type(id: ops[2]); |
12498 | |
12499 | if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code |
12500 | { |
12501 | if (type.image.ms) |
12502 | SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible." ); |
12503 | |
12504 | auto itr = |
12505 | find_if(first: begin(cont&: pls_inputs), last: end(cont&: pls_inputs), pred: [var](const PlsRemap &pls) { return pls.id == var->self; }); |
12506 | |
12507 | if (itr == end(cont&: pls_inputs)) |
12508 | { |
12509 | // For non-PLS inputs, we rely on subpass type remapping information to get it right |
12510 | // since ImageRead always returns 4-component vectors and the backing type is opaque. |
12511 | if (!var->remapped_components) |
12512 | SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly." ); |
12513 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: var->remapped_components, expr: to_expression(id: ops[2])); |
12514 | } |
12515 | else |
12516 | { |
12517 | // PLS input could have different number of components than what the SPIR expects, swizzle to |
12518 | // the appropriate vector size. |
12519 | uint32_t components = pls_format_to_components(format: itr->format); |
12520 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: components, expr: to_expression(id: ops[2])); |
12521 | } |
12522 | pure = true; |
12523 | } |
12524 | else if (type.image.dim == DimSubpassData) |
12525 | { |
12526 | if (var && subpass_input_is_framebuffer_fetch(id: var->self)) |
12527 | { |
12528 | imgexpr = to_expression(id: var->self); |
12529 | } |
12530 | else if (options.vulkan_semantics) |
12531 | { |
12532 | // With Vulkan semantics, use the proper Vulkan GLSL construct. |
12533 | if (type.image.ms) |
12534 | { |
12535 | uint32_t operands = ops[4]; |
12536 | if (operands != ImageOperandsSampleMask || length != 6) |
12537 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
12538 | "operand mask was used." ); |
12539 | |
12540 | uint32_t samples = ops[5]; |
12541 | imgexpr = join(ts: "subpassLoad(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , ts: to_expression(id: samples), ts: ")" ); |
12542 | } |
12543 | else |
12544 | imgexpr = join(ts: "subpassLoad(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")" ); |
12545 | } |
12546 | else |
12547 | { |
12548 | if (type.image.ms) |
12549 | { |
12550 | uint32_t operands = ops[4]; |
12551 | if (operands != ImageOperandsSampleMask || length != 6) |
12552 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
12553 | "operand mask was used." ); |
12554 | |
12555 | uint32_t samples = ops[5]; |
12556 | imgexpr = join(ts: "texelFetch(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), " , |
12557 | ts: to_expression(id: samples), ts: ")" ); |
12558 | } |
12559 | else |
12560 | { |
12561 | // Implement subpass loads via texture barrier style sampling. |
12562 | imgexpr = join(ts: "texelFetch(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), 0)" ); |
12563 | } |
12564 | } |
12565 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr); |
12566 | pure = true; |
12567 | } |
12568 | else |
12569 | { |
12570 | bool sparse = opcode == OpImageSparseRead; |
12571 | uint32_t sparse_code_id = 0; |
12572 | uint32_t sparse_texel_id = 0; |
12573 | if (sparse) |
12574 | emit_sparse_feedback_temporaries(result_type_id: ops[0], id: ops[1], feedback_id&: sparse_code_id, texel_id&: sparse_texel_id); |
12575 | |
12576 | // imageLoad only accepts int coords, not uint. |
12577 | auto coord_expr = to_expression(id: ops[3]); |
12578 | auto target_coord_type = expression_type(id: ops[3]); |
12579 | target_coord_type.basetype = SPIRType::Int; |
12580 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr); |
12581 | |
12582 | // ES needs to emulate 1D images as 2D. |
12583 | if (type.image.dim == Dim1D && options.es) |
12584 | coord_expr = join(ts: "ivec2(" , ts&: coord_expr, ts: ", 0)" ); |
12585 | |
12586 | // Plain image load/store. |
12587 | if (sparse) |
12588 | { |
12589 | if (type.image.ms) |
12590 | { |
12591 | uint32_t operands = ops[4]; |
12592 | if (operands != ImageOperandsSampleMask || length != 6) |
12593 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
12594 | "operand mask was used." ); |
12595 | |
12596 | uint32_t samples = ops[5]; |
12597 | statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , |
12598 | ts&: coord_expr, ts: ", " , ts: to_expression(id: samples), ts: ", " , ts: to_expression(id: sparse_texel_id), ts: ");" ); |
12599 | } |
12600 | else |
12601 | { |
12602 | statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , |
12603 | ts&: coord_expr, ts: ", " , ts: to_expression(id: sparse_texel_id), ts: ");" ); |
12604 | } |
12605 | imgexpr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(" , ts: to_expression(id: sparse_code_id), ts: ", " , |
12606 | ts: to_expression(id: sparse_texel_id), ts: ")" ); |
12607 | } |
12608 | else |
12609 | { |
12610 | if (type.image.ms) |
12611 | { |
12612 | uint32_t operands = ops[4]; |
12613 | if (operands != ImageOperandsSampleMask || length != 6) |
12614 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
12615 | "operand mask was used." ); |
12616 | |
12617 | uint32_t samples = ops[5]; |
12618 | imgexpr = |
12619 | join(ts: "imageLoad(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , ts&: coord_expr, ts: ", " , ts: to_expression(id: samples), ts: ")" ); |
12620 | } |
12621 | else |
12622 | imgexpr = join(ts: "imageLoad(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", " , ts&: coord_expr, ts: ")" ); |
12623 | } |
12624 | |
12625 | if (!sparse) |
12626 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr); |
12627 | pure = false; |
12628 | } |
12629 | |
12630 | if (var) |
12631 | { |
12632 | bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
12633 | auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: forward); |
12634 | |
12635 | // We only need to track dependencies if we're reading from image load/store. |
12636 | if (!pure) |
12637 | { |
12638 | e.loaded_from = var->self; |
12639 | if (forward) |
12640 | var->dependees.push_back(t: id); |
12641 | } |
12642 | } |
12643 | else |
12644 | emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: false); |
12645 | |
12646 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12647 | if (type.image.ms) |
12648 | inherit_expression_dependencies(dst: id, source: ops[5]); |
12649 | break; |
12650 | } |
12651 | |
12652 | case OpImageTexelPointer: |
12653 | { |
12654 | uint32_t result_type = ops[0]; |
12655 | uint32_t id = ops[1]; |
12656 | |
12657 | auto coord_expr = to_expression(id: ops[3]); |
12658 | auto target_coord_type = expression_type(id: ops[3]); |
12659 | target_coord_type.basetype = SPIRType::Int; |
12660 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr); |
12661 | |
12662 | auto expr = join(ts: to_expression(id: ops[2]), ts: ", " , ts&: coord_expr); |
12663 | auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true); |
12664 | |
12665 | // When using the pointer, we need to know which variable it is actually loaded from. |
12666 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
12667 | e.loaded_from = var ? var->self : ID(0); |
12668 | inherit_expression_dependencies(dst: id, source: ops[3]); |
12669 | break; |
12670 | } |
12671 | |
12672 | case OpImageWrite: |
12673 | { |
12674 | // We added Nonwritable speculatively to the OpImage variable due to glslangValidator |
12675 | // not adding the proper qualifiers. |
12676 | // If it turns out we need to write to the image after all, remove the qualifier and recompile. |
12677 | auto *var = maybe_get_backing_variable(chain: ops[0]); |
12678 | if (var) |
12679 | { |
12680 | if (has_decoration(id: var->self, decoration: DecorationNonWritable)) |
12681 | { |
12682 | unset_decoration(id: var->self, decoration: DecorationNonWritable); |
12683 | force_recompile(); |
12684 | } |
12685 | } |
12686 | |
12687 | auto &type = expression_type(id: ops[0]); |
12688 | auto &value_type = expression_type(id: ops[2]); |
12689 | auto store_type = value_type; |
12690 | store_type.vecsize = 4; |
12691 | |
12692 | // imageStore only accepts int coords, not uint. |
12693 | auto coord_expr = to_expression(id: ops[1]); |
12694 | auto target_coord_type = expression_type(id: ops[1]); |
12695 | target_coord_type.basetype = SPIRType::Int; |
12696 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[1]).basetype, expr: coord_expr); |
12697 | |
12698 | // ES needs to emulate 1D images as 2D. |
12699 | if (type.image.dim == Dim1D && options.es) |
12700 | coord_expr = join(ts: "ivec2(" , ts&: coord_expr, ts: ", 0)" ); |
12701 | |
12702 | if (type.image.ms) |
12703 | { |
12704 | uint32_t operands = ops[3]; |
12705 | if (operands != ImageOperandsSampleMask || length != 5) |
12706 | SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used." ); |
12707 | uint32_t samples = ops[4]; |
12708 | statement(ts: "imageStore(" , ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", " , ts&: coord_expr, ts: ", " , ts: to_expression(id: samples), ts: ", " , |
12709 | ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");" ); |
12710 | } |
12711 | else |
12712 | statement(ts: "imageStore(" , ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", " , ts&: coord_expr, ts: ", " , |
12713 | ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");" ); |
12714 | |
12715 | if (var && variable_storage_is_aliased(var: *var)) |
12716 | flush_all_aliased_variables(); |
12717 | break; |
12718 | } |
12719 | |
12720 | case OpImageQuerySize: |
12721 | { |
12722 | auto &type = expression_type(id: ops[2]); |
12723 | uint32_t result_type = ops[0]; |
12724 | uint32_t id = ops[1]; |
12725 | |
12726 | if (type.basetype == SPIRType::Image) |
12727 | { |
12728 | string expr; |
12729 | if (type.image.sampled == 2) |
12730 | { |
12731 | if (!options.es && options.version < 430) |
12732 | require_extension_internal(ext: "GL_ARB_shader_image_size" ); |
12733 | else if (options.es && options.version < 310) |
12734 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize." ); |
12735 | |
12736 | // The size of an image is always constant. |
12737 | expr = join(ts: "imageSize(" , ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")" ); |
12738 | } |
12739 | else |
12740 | { |
12741 | // This path is hit for samplerBuffers and multisampled images which do not have LOD. |
12742 | std::string fname = "textureSize" ; |
12743 | if (is_legacy()) |
12744 | { |
12745 | auto &imgtype = get<SPIRType>(id: type.self); |
12746 | fname = legacy_tex_op(op: fname, imgtype, tex: ops[2]); |
12747 | } |
12748 | expr = join(ts&: fname, ts: "(" , ts: convert_separate_image_to_expression(id: ops[2]), ts: ")" ); |
12749 | } |
12750 | |
12751 | auto &restype = get<SPIRType>(id: ops[0]); |
12752 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
12753 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
12754 | } |
12755 | else |
12756 | SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize." ); |
12757 | break; |
12758 | } |
12759 | |
12760 | // Compute |
12761 | case OpControlBarrier: |
12762 | case OpMemoryBarrier: |
12763 | { |
12764 | uint32_t execution_scope = 0; |
12765 | uint32_t memory; |
12766 | uint32_t semantics; |
12767 | |
12768 | if (opcode == OpMemoryBarrier) |
12769 | { |
12770 | memory = evaluate_constant_u32(id: ops[0]); |
12771 | semantics = evaluate_constant_u32(id: ops[1]); |
12772 | } |
12773 | else |
12774 | { |
12775 | execution_scope = evaluate_constant_u32(id: ops[0]); |
12776 | memory = evaluate_constant_u32(id: ops[1]); |
12777 | semantics = evaluate_constant_u32(id: ops[2]); |
12778 | } |
12779 | |
12780 | if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) |
12781 | { |
12782 | // OpControlBarrier with ScopeSubgroup is subgroupBarrier() |
12783 | if (opcode != OpControlBarrier) |
12784 | { |
12785 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMemBarrier); |
12786 | } |
12787 | else |
12788 | { |
12789 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBarrier); |
12790 | } |
12791 | } |
12792 | |
12793 | if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) |
12794 | { |
12795 | // Control shaders only have barriers, and it implies memory barriers. |
12796 | if (opcode == OpControlBarrier) |
12797 | statement(ts: "barrier();" ); |
12798 | break; |
12799 | } |
12800 | |
12801 | // We only care about these flags, acquire/release and friends are not relevant to GLSL. |
12802 | semantics = mask_relevant_memory_semantics(semantics); |
12803 | |
12804 | if (opcode == OpMemoryBarrier) |
12805 | { |
12806 | // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier |
12807 | // does what we need, so we avoid redundant barriers. |
12808 | const Instruction *next = get_next_instruction_in_block(instr: instruction); |
12809 | if (next && next->op == OpControlBarrier) |
12810 | { |
12811 | auto *next_ops = stream(instr: *next); |
12812 | uint32_t next_memory = evaluate_constant_u32(id: next_ops[1]); |
12813 | uint32_t next_semantics = evaluate_constant_u32(id: next_ops[2]); |
12814 | next_semantics = mask_relevant_memory_semantics(semantics: next_semantics); |
12815 | |
12816 | bool memory_scope_covered = false; |
12817 | if (next_memory == memory) |
12818 | memory_scope_covered = true; |
12819 | else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) |
12820 | { |
12821 | // If we only care about workgroup memory, either Device or Workgroup scope is fine, |
12822 | // scope does not have to match. |
12823 | if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && |
12824 | (memory == ScopeDevice || memory == ScopeWorkgroup)) |
12825 | { |
12826 | memory_scope_covered = true; |
12827 | } |
12828 | } |
12829 | else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) |
12830 | { |
12831 | // The control barrier has device scope, but the memory barrier just has workgroup scope. |
12832 | memory_scope_covered = true; |
12833 | } |
12834 | |
12835 | // If we have the same memory scope, and all memory types are covered, we're good. |
12836 | if (memory_scope_covered && (semantics & next_semantics) == semantics) |
12837 | break; |
12838 | } |
12839 | } |
12840 | |
12841 | // We are synchronizing some memory or syncing execution, |
12842 | // so we cannot forward any loads beyond the memory barrier. |
12843 | if (semantics || opcode == OpControlBarrier) |
12844 | { |
12845 | assert(current_emitting_block); |
12846 | flush_control_dependent_expressions(block: current_emitting_block->self); |
12847 | flush_all_active_variables(); |
12848 | } |
12849 | |
12850 | if (memory == ScopeWorkgroup) // Only need to consider memory within a group |
12851 | { |
12852 | if (semantics == MemorySemanticsWorkgroupMemoryMask) |
12853 | { |
12854 | // OpControlBarrier implies a memory barrier for shared memory as well. |
12855 | bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; |
12856 | if (!implies_shared_barrier) |
12857 | statement(ts: "memoryBarrierShared();" ); |
12858 | } |
12859 | else if (semantics != 0) |
12860 | statement(ts: "groupMemoryBarrier();" ); |
12861 | } |
12862 | else if (memory == ScopeSubgroup) |
12863 | { |
12864 | const uint32_t all_barriers = |
12865 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; |
12866 | |
12867 | if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) |
12868 | { |
12869 | // These are not relevant for GLSL, but assume it means memoryBarrier(). |
12870 | // memoryBarrier() does everything, so no need to test anything else. |
12871 | statement(ts: "subgroupMemoryBarrier();" ); |
12872 | } |
12873 | else if ((semantics & all_barriers) == all_barriers) |
12874 | { |
12875 | // Short-hand instead of emitting 3 barriers. |
12876 | statement(ts: "subgroupMemoryBarrier();" ); |
12877 | } |
12878 | else |
12879 | { |
12880 | // Pick out individual barriers. |
12881 | if (semantics & MemorySemanticsWorkgroupMemoryMask) |
12882 | statement(ts: "subgroupMemoryBarrierShared();" ); |
12883 | if (semantics & MemorySemanticsUniformMemoryMask) |
12884 | statement(ts: "subgroupMemoryBarrierBuffer();" ); |
12885 | if (semantics & MemorySemanticsImageMemoryMask) |
12886 | statement(ts: "subgroupMemoryBarrierImage();" ); |
12887 | } |
12888 | } |
12889 | else |
12890 | { |
12891 | const uint32_t all_barriers = |
12892 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; |
12893 | |
12894 | if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) |
12895 | { |
12896 | // These are not relevant for GLSL, but assume it means memoryBarrier(). |
12897 | // memoryBarrier() does everything, so no need to test anything else. |
12898 | statement(ts: "memoryBarrier();" ); |
12899 | } |
12900 | else if ((semantics & all_barriers) == all_barriers) |
12901 | { |
12902 | // Short-hand instead of emitting 4 barriers. |
12903 | statement(ts: "memoryBarrier();" ); |
12904 | } |
12905 | else |
12906 | { |
12907 | // Pick out individual barriers. |
12908 | if (semantics & MemorySemanticsWorkgroupMemoryMask) |
12909 | statement(ts: "memoryBarrierShared();" ); |
12910 | if (semantics & MemorySemanticsUniformMemoryMask) |
12911 | statement(ts: "memoryBarrierBuffer();" ); |
12912 | if (semantics & MemorySemanticsImageMemoryMask) |
12913 | statement(ts: "memoryBarrierImage();" ); |
12914 | } |
12915 | } |
12916 | |
12917 | if (opcode == OpControlBarrier) |
12918 | { |
12919 | if (execution_scope == ScopeSubgroup) |
12920 | statement(ts: "subgroupBarrier();" ); |
12921 | else |
12922 | statement(ts: "barrier();" ); |
12923 | } |
12924 | break; |
12925 | } |
12926 | |
12927 | case OpExtInst: |
12928 | { |
12929 | uint32_t extension_set = ops[2]; |
12930 | auto ext = get<SPIRExtension>(id: extension_set).ext; |
12931 | |
12932 | if (ext == SPIRExtension::GLSL) |
12933 | { |
12934 | emit_glsl_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length: length - 4); |
12935 | } |
12936 | else if (ext == SPIRExtension::SPV_AMD_shader_ballot) |
12937 | { |
12938 | emit_spv_amd_shader_ballot_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
12939 | } |
12940 | else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) |
12941 | { |
12942 | emit_spv_amd_shader_explicit_vertex_parameter_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
12943 | } |
12944 | else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) |
12945 | { |
12946 | emit_spv_amd_shader_trinary_minmax_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
12947 | } |
12948 | else if (ext == SPIRExtension::SPV_AMD_gcn_shader) |
12949 | { |
12950 | emit_spv_amd_gcn_shader_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
12951 | } |
12952 | else if (ext == SPIRExtension::SPV_debug_info) |
12953 | { |
12954 | break; // Ignore SPIR-V debug information extended instructions. |
12955 | } |
12956 | else if (ext == SPIRExtension::NonSemanticDebugPrintf) |
12957 | { |
12958 | // Operation 1 is printf. |
12959 | if (ops[3] == 1) |
12960 | { |
12961 | if (!options.vulkan_semantics) |
12962 | SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n" ); |
12963 | require_extension_internal(ext: "GL_EXT_debug_printf" ); |
12964 | auto &format_string = get<SPIRString>(id: ops[4]).str; |
12965 | string expr = join(ts: "debugPrintfEXT(\"" , ts&: format_string, ts: "\"" ); |
12966 | for (uint32_t i = 5; i < length; i++) |
12967 | { |
12968 | expr += ", " ; |
12969 | expr += to_expression(id: ops[i]); |
12970 | } |
12971 | statement(ts&: expr, ts: ");" ); |
12972 | } |
12973 | } |
12974 | else |
12975 | { |
12976 | statement(ts: "// unimplemented ext op " , ts: instruction.op); |
12977 | break; |
12978 | } |
12979 | |
12980 | break; |
12981 | } |
12982 | |
12983 | // Legacy sub-group stuff ... |
12984 | case OpSubgroupBallotKHR: |
12985 | { |
12986 | uint32_t result_type = ops[0]; |
12987 | uint32_t id = ops[1]; |
12988 | string expr; |
12989 | expr = join(ts: "uvec4(unpackUint2x32(ballotARB(" + to_expression(id: ops[2]) + ")), 0u, 0u)" ); |
12990 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2])); |
12991 | |
12992 | require_extension_internal(ext: "GL_ARB_shader_ballot" ); |
12993 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12994 | register_control_dependent_expression(expr: ops[1]); |
12995 | break; |
12996 | } |
12997 | |
12998 | case OpSubgroupFirstInvocationKHR: |
12999 | { |
13000 | uint32_t result_type = ops[0]; |
13001 | uint32_t id = ops[1]; |
13002 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "readFirstInvocationARB" ); |
13003 | |
13004 | require_extension_internal(ext: "GL_ARB_shader_ballot" ); |
13005 | register_control_dependent_expression(expr: ops[1]); |
13006 | break; |
13007 | } |
13008 | |
13009 | case OpSubgroupReadInvocationKHR: |
13010 | { |
13011 | uint32_t result_type = ops[0]; |
13012 | uint32_t id = ops[1]; |
13013 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "readInvocationARB" ); |
13014 | |
13015 | require_extension_internal(ext: "GL_ARB_shader_ballot" ); |
13016 | register_control_dependent_expression(expr: ops[1]); |
13017 | break; |
13018 | } |
13019 | |
13020 | case OpSubgroupAllKHR: |
13021 | { |
13022 | uint32_t result_type = ops[0]; |
13023 | uint32_t id = ops[1]; |
13024 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsARB" ); |
13025 | |
13026 | require_extension_internal(ext: "GL_ARB_shader_group_vote" ); |
13027 | register_control_dependent_expression(expr: ops[1]); |
13028 | break; |
13029 | } |
13030 | |
13031 | case OpSubgroupAnyKHR: |
13032 | { |
13033 | uint32_t result_type = ops[0]; |
13034 | uint32_t id = ops[1]; |
13035 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "anyInvocationARB" ); |
13036 | |
13037 | require_extension_internal(ext: "GL_ARB_shader_group_vote" ); |
13038 | register_control_dependent_expression(expr: ops[1]); |
13039 | break; |
13040 | } |
13041 | |
13042 | case OpSubgroupAllEqualKHR: |
13043 | { |
13044 | uint32_t result_type = ops[0]; |
13045 | uint32_t id = ops[1]; |
13046 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsEqualARB" ); |
13047 | |
13048 | require_extension_internal(ext: "GL_ARB_shader_group_vote" ); |
13049 | register_control_dependent_expression(expr: ops[1]); |
13050 | break; |
13051 | } |
13052 | |
13053 | case OpGroupIAddNonUniformAMD: |
13054 | case OpGroupFAddNonUniformAMD: |
13055 | { |
13056 | uint32_t result_type = ops[0]; |
13057 | uint32_t id = ops[1]; |
13058 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "addInvocationsNonUniformAMD" ); |
13059 | |
13060 | require_extension_internal(ext: "GL_AMD_shader_ballot" ); |
13061 | register_control_dependent_expression(expr: ops[1]); |
13062 | break; |
13063 | } |
13064 | |
13065 | case OpGroupFMinNonUniformAMD: |
13066 | case OpGroupUMinNonUniformAMD: |
13067 | case OpGroupSMinNonUniformAMD: |
13068 | { |
13069 | uint32_t result_type = ops[0]; |
13070 | uint32_t id = ops[1]; |
13071 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "minInvocationsNonUniformAMD" ); |
13072 | |
13073 | require_extension_internal(ext: "GL_AMD_shader_ballot" ); |
13074 | register_control_dependent_expression(expr: ops[1]); |
13075 | break; |
13076 | } |
13077 | |
13078 | case OpGroupFMaxNonUniformAMD: |
13079 | case OpGroupUMaxNonUniformAMD: |
13080 | case OpGroupSMaxNonUniformAMD: |
13081 | { |
13082 | uint32_t result_type = ops[0]; |
13083 | uint32_t id = ops[1]; |
13084 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "maxInvocationsNonUniformAMD" ); |
13085 | |
13086 | require_extension_internal(ext: "GL_AMD_shader_ballot" ); |
13087 | register_control_dependent_expression(expr: ops[1]); |
13088 | break; |
13089 | } |
13090 | |
13091 | case OpFragmentMaskFetchAMD: |
13092 | { |
13093 | auto &type = expression_type(id: ops[2]); |
13094 | uint32_t result_type = ops[0]; |
13095 | uint32_t id = ops[1]; |
13096 | |
13097 | if (type.image.dim == spv::DimSubpassData) |
13098 | { |
13099 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "fragmentMaskFetchAMD" ); |
13100 | } |
13101 | else |
13102 | { |
13103 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "fragmentMaskFetchAMD" ); |
13104 | } |
13105 | |
13106 | require_extension_internal(ext: "GL_AMD_shader_fragment_mask" ); |
13107 | break; |
13108 | } |
13109 | |
13110 | case OpFragmentFetchAMD: |
13111 | { |
13112 | auto &type = expression_type(id: ops[2]); |
13113 | uint32_t result_type = ops[0]; |
13114 | uint32_t id = ops[1]; |
13115 | |
13116 | if (type.image.dim == spv::DimSubpassData) |
13117 | { |
13118 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[4], op: "fragmentFetchAMD" ); |
13119 | } |
13120 | else |
13121 | { |
13122 | emit_trinary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op2: ops[4], op: "fragmentFetchAMD" ); |
13123 | } |
13124 | |
13125 | require_extension_internal(ext: "GL_AMD_shader_fragment_mask" ); |
13126 | break; |
13127 | } |
13128 | |
13129 | // Vulkan 1.1 sub-group stuff ... |
13130 | case OpGroupNonUniformElect: |
13131 | case OpGroupNonUniformBroadcast: |
13132 | case OpGroupNonUniformBroadcastFirst: |
13133 | case OpGroupNonUniformBallot: |
13134 | case OpGroupNonUniformInverseBallot: |
13135 | case OpGroupNonUniformBallotBitExtract: |
13136 | case OpGroupNonUniformBallotBitCount: |
13137 | case OpGroupNonUniformBallotFindLSB: |
13138 | case OpGroupNonUniformBallotFindMSB: |
13139 | case OpGroupNonUniformShuffle: |
13140 | case OpGroupNonUniformShuffleXor: |
13141 | case OpGroupNonUniformShuffleUp: |
13142 | case OpGroupNonUniformShuffleDown: |
13143 | case OpGroupNonUniformAll: |
13144 | case OpGroupNonUniformAny: |
13145 | case OpGroupNonUniformAllEqual: |
13146 | case OpGroupNonUniformFAdd: |
13147 | case OpGroupNonUniformIAdd: |
13148 | case OpGroupNonUniformFMul: |
13149 | case OpGroupNonUniformIMul: |
13150 | case OpGroupNonUniformFMin: |
13151 | case OpGroupNonUniformFMax: |
13152 | case OpGroupNonUniformSMin: |
13153 | case OpGroupNonUniformSMax: |
13154 | case OpGroupNonUniformUMin: |
13155 | case OpGroupNonUniformUMax: |
13156 | case OpGroupNonUniformBitwiseAnd: |
13157 | case OpGroupNonUniformBitwiseOr: |
13158 | case OpGroupNonUniformBitwiseXor: |
13159 | case OpGroupNonUniformLogicalAnd: |
13160 | case OpGroupNonUniformLogicalOr: |
13161 | case OpGroupNonUniformLogicalXor: |
13162 | case OpGroupNonUniformQuadSwap: |
13163 | case OpGroupNonUniformQuadBroadcast: |
13164 | emit_subgroup_op(i: instruction); |
13165 | break; |
13166 | |
13167 | case OpFUnordEqual: |
13168 | case OpFUnordLessThan: |
13169 | case OpFUnordGreaterThan: |
13170 | case OpFUnordLessThanEqual: |
13171 | case OpFUnordGreaterThanEqual: |
13172 | { |
13173 | // GLSL doesn't specify if floating point comparisons are ordered or unordered, |
13174 | // but glslang always emits ordered floating point compares for GLSL. |
13175 | // To get unordered compares, we can test the opposite thing and invert the result. |
13176 | // This way, we force true when there is any NaN present. |
13177 | uint32_t op0 = ops[2]; |
13178 | uint32_t op1 = ops[3]; |
13179 | |
13180 | string expr; |
13181 | if (expression_type(id: op0).vecsize > 1) |
13182 | { |
13183 | const char *comp_op = nullptr; |
13184 | switch (opcode) |
13185 | { |
13186 | case OpFUnordEqual: |
13187 | comp_op = "notEqual" ; |
13188 | break; |
13189 | |
13190 | case OpFUnordLessThan: |
13191 | comp_op = "greaterThanEqual" ; |
13192 | break; |
13193 | |
13194 | case OpFUnordLessThanEqual: |
13195 | comp_op = "greaterThan" ; |
13196 | break; |
13197 | |
13198 | case OpFUnordGreaterThan: |
13199 | comp_op = "lessThanEqual" ; |
13200 | break; |
13201 | |
13202 | case OpFUnordGreaterThanEqual: |
13203 | comp_op = "lessThan" ; |
13204 | break; |
13205 | |
13206 | default: |
13207 | assert(0); |
13208 | break; |
13209 | } |
13210 | |
13211 | expr = join(ts: "not(" , ts&: comp_op, ts: "(" , ts: to_unpacked_expression(id: op0), ts: ", " , ts: to_unpacked_expression(id: op1), ts: "))" ); |
13212 | } |
13213 | else |
13214 | { |
13215 | const char *comp_op = nullptr; |
13216 | switch (opcode) |
13217 | { |
13218 | case OpFUnordEqual: |
13219 | comp_op = " != " ; |
13220 | break; |
13221 | |
13222 | case OpFUnordLessThan: |
13223 | comp_op = " >= " ; |
13224 | break; |
13225 | |
13226 | case OpFUnordLessThanEqual: |
13227 | comp_op = " > " ; |
13228 | break; |
13229 | |
13230 | case OpFUnordGreaterThan: |
13231 | comp_op = " <= " ; |
13232 | break; |
13233 | |
13234 | case OpFUnordGreaterThanEqual: |
13235 | comp_op = " < " ; |
13236 | break; |
13237 | |
13238 | default: |
13239 | assert(0); |
13240 | break; |
13241 | } |
13242 | |
13243 | expr = join(ts: "!(" , ts: to_enclosed_unpacked_expression(id: op0), ts&: comp_op, ts: to_enclosed_unpacked_expression(id: op1), ts: ")" ); |
13244 | } |
13245 | |
13246 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
13247 | inherit_expression_dependencies(dst: ops[1], source: op0); |
13248 | inherit_expression_dependencies(dst: ops[1], source: op1); |
13249 | break; |
13250 | } |
13251 | |
13252 | case OpReportIntersectionKHR: |
13253 | // NV is same opcode. |
13254 | forced_temporaries.insert(x: ops[1]); |
13255 | if (ray_tracing_is_khr) |
13256 | GLSL_BFOP(reportIntersectionEXT); |
13257 | else |
13258 | GLSL_BFOP(reportIntersectionNV); |
13259 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13260 | break; |
13261 | case OpIgnoreIntersectionNV: |
13262 | // KHR variant is a terminator. |
13263 | statement(ts: "ignoreIntersectionNV();" ); |
13264 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13265 | break; |
13266 | case OpTerminateRayNV: |
13267 | // KHR variant is a terminator. |
13268 | statement(ts: "terminateRayNV();" ); |
13269 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13270 | break; |
13271 | case OpTraceNV: |
13272 | statement(ts: "traceNV(" , ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", " , ts: to_expression(id: ops[1]), ts: ", " , ts: to_expression(id: ops[2]), ts: ", " , |
13273 | ts: to_expression(id: ops[3]), ts: ", " , ts: to_expression(id: ops[4]), ts: ", " , ts: to_expression(id: ops[5]), ts: ", " , |
13274 | ts: to_expression(id: ops[6]), ts: ", " , ts: to_expression(id: ops[7]), ts: ", " , ts: to_expression(id: ops[8]), ts: ", " , |
13275 | ts: to_expression(id: ops[9]), ts: ", " , ts: to_expression(id: ops[10]), ts: ");" ); |
13276 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13277 | break; |
13278 | case OpTraceRayKHR: |
13279 | if (!has_decoration(id: ops[10], decoration: DecorationLocation)) |
13280 | SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR." ); |
13281 | statement(ts: "traceRayEXT(" , ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", " , ts: to_expression(id: ops[1]), ts: ", " , ts: to_expression(id: ops[2]), ts: ", " , |
13282 | ts: to_expression(id: ops[3]), ts: ", " , ts: to_expression(id: ops[4]), ts: ", " , ts: to_expression(id: ops[5]), ts: ", " , |
13283 | ts: to_expression(id: ops[6]), ts: ", " , ts: to_expression(id: ops[7]), ts: ", " , ts: to_expression(id: ops[8]), ts: ", " , |
13284 | ts: to_expression(id: ops[9]), ts: ", " , ts: get_decoration(id: ops[10], decoration: DecorationLocation), ts: ");" ); |
13285 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13286 | break; |
13287 | case OpExecuteCallableNV: |
13288 | statement(ts: "executeCallableNV(" , ts: to_expression(id: ops[0]), ts: ", " , ts: to_expression(id: ops[1]), ts: ");" ); |
13289 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13290 | break; |
13291 | case OpExecuteCallableKHR: |
13292 | if (!has_decoration(id: ops[1], decoration: DecorationLocation)) |
13293 | SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR." ); |
13294 | statement(ts: "executeCallableEXT(" , ts: to_expression(id: ops[0]), ts: ", " , ts: get_decoration(id: ops[1], decoration: DecorationLocation), ts: ");" ); |
13295 | flush_control_dependent_expressions(block: current_emitting_block->self); |
13296 | break; |
13297 | |
13298 | // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. |
13299 | case OpRayQueryInitializeKHR: |
13300 | flush_variable_declaration(id: ops[0]); |
13301 | statement(ts: "rayQueryInitializeEXT(" , |
13302 | ts: to_expression(id: ops[0]), ts: ", " , ts: to_expression(id: ops[1]), ts: ", " , |
13303 | ts: to_expression(id: ops[2]), ts: ", " , ts: to_expression(id: ops[3]), ts: ", " , |
13304 | ts: to_expression(id: ops[4]), ts: ", " , ts: to_expression(id: ops[5]), ts: ", " , |
13305 | ts: to_expression(id: ops[6]), ts: ", " , ts: to_expression(id: ops[7]), ts: ");" ); |
13306 | break; |
13307 | case OpRayQueryProceedKHR: |
13308 | flush_variable_declaration(id: ops[0]); |
13309 | emit_op(result_type: ops[0], result_id: ops[1], rhs: join(ts: "rayQueryProceedEXT(" , ts: to_expression(id: ops[2]), ts: ")" ), forwarding: false); |
13310 | break; |
13311 | case OpRayQueryTerminateKHR: |
13312 | flush_variable_declaration(id: ops[0]); |
13313 | statement(ts: "rayQueryTerminateEXT(" , ts: to_expression(id: ops[0]), ts: ");" ); |
13314 | break; |
13315 | case OpRayQueryGenerateIntersectionKHR: |
13316 | flush_variable_declaration(id: ops[0]); |
13317 | statement(ts: "rayQueryGenerateIntersectionEXT(" , ts: to_expression(id: ops[0]), ts: ", " , ts: to_expression(id: ops[1]), ts: ");" ); |
13318 | break; |
13319 | case OpRayQueryConfirmIntersectionKHR: |
13320 | flush_variable_declaration(id: ops[0]); |
13321 | statement(ts: "rayQueryConfirmIntersectionEXT(" , ts: to_expression(id: ops[0]), ts: ");" ); |
13322 | break; |
13323 | #define GLSL_RAY_QUERY_GET_OP(op) \ |
13324 | case OpRayQueryGet##op##KHR: \ |
13325 | flush_variable_declaration(ops[2]); \ |
13326 | emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ |
13327 | break |
13328 | #define GLSL_RAY_QUERY_GET_OP2(op) \ |
13329 | case OpRayQueryGet##op##KHR: \ |
13330 | flush_variable_declaration(ops[2]); \ |
13331 | emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ |
13332 | break |
13333 | GLSL_RAY_QUERY_GET_OP(RayTMin); |
13334 | GLSL_RAY_QUERY_GET_OP(RayFlags); |
13335 | GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); |
13336 | GLSL_RAY_QUERY_GET_OP(WorldRayDirection); |
13337 | GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); |
13338 | GLSL_RAY_QUERY_GET_OP2(IntersectionType); |
13339 | GLSL_RAY_QUERY_GET_OP2(IntersectionT); |
13340 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); |
13341 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); |
13342 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); |
13343 | GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); |
13344 | GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); |
13345 | GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); |
13346 | GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); |
13347 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); |
13348 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); |
13349 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); |
13350 | GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); |
13351 | #undef GLSL_RAY_QUERY_GET_OP |
13352 | #undef GLSL_RAY_QUERY_GET_OP2 |
13353 | |
13354 | case OpConvertUToAccelerationStructureKHR: |
13355 | require_extension_internal(ext: "GL_EXT_ray_tracing" ); |
13356 | GLSL_UFOP(accelerationStructureEXT); |
13357 | break; |
13358 | |
13359 | case OpConvertUToPtr: |
13360 | { |
13361 | auto &type = get<SPIRType>(id: ops[0]); |
13362 | if (type.storage != StorageClassPhysicalStorageBufferEXT) |
13363 | SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr." ); |
13364 | |
13365 | auto &in_type = expression_type(id: ops[2]); |
13366 | if (in_type.vecsize == 2) |
13367 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2" ); |
13368 | |
13369 | auto op = type_to_glsl(type); |
13370 | emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str()); |
13371 | break; |
13372 | } |
13373 | |
13374 | case OpConvertPtrToU: |
13375 | { |
13376 | auto &type = get<SPIRType>(id: ops[0]); |
13377 | auto &ptr_type = expression_type(id: ops[2]); |
13378 | if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) |
13379 | SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU." ); |
13380 | |
13381 | if (type.vecsize == 2) |
13382 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2" ); |
13383 | |
13384 | auto op = type_to_glsl(type); |
13385 | emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str()); |
13386 | break; |
13387 | } |
13388 | |
13389 | case OpUndef: |
13390 | // Undefined value has been declared. |
13391 | break; |
13392 | |
13393 | case OpLine: |
13394 | { |
13395 | emit_line_directive(file_id: ops[0], line_literal: ops[1]); |
13396 | break; |
13397 | } |
13398 | |
13399 | case OpNoLine: |
13400 | break; |
13401 | |
13402 | case OpDemoteToHelperInvocationEXT: |
13403 | if (!options.vulkan_semantics) |
13404 | SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL." ); |
13405 | require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation" ); |
13406 | statement(ts&: backend.demote_literal, ts: ";" ); |
13407 | break; |
13408 | |
13409 | case OpIsHelperInvocationEXT: |
13410 | if (!options.vulkan_semantics) |
13411 | SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL." ); |
13412 | require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation" ); |
13413 | // Helper lane state with demote is volatile by nature. |
13414 | // Do not forward this. |
13415 | emit_op(result_type: ops[0], result_id: ops[1], rhs: "helperInvocationEXT()" , forwarding: false); |
13416 | break; |
13417 | |
13418 | case OpBeginInvocationInterlockEXT: |
13419 | // If the interlock is complex, we emit this elsewhere. |
13420 | if (!interlocked_is_complex) |
13421 | { |
13422 | statement(ts: "SPIRV_Cross_beginInvocationInterlock();" ); |
13423 | flush_all_active_variables(); |
13424 | // Make sure forwarding doesn't propagate outside interlock region. |
13425 | } |
13426 | break; |
13427 | |
13428 | case OpEndInvocationInterlockEXT: |
13429 | // If the interlock is complex, we emit this elsewhere. |
13430 | if (!interlocked_is_complex) |
13431 | { |
13432 | statement(ts: "SPIRV_Cross_endInvocationInterlock();" ); |
13433 | flush_all_active_variables(); |
13434 | // Make sure forwarding doesn't propagate outside interlock region. |
13435 | } |
13436 | break; |
13437 | |
13438 | default: |
13439 | statement(ts: "// unimplemented op " , ts: instruction.op); |
13440 | break; |
13441 | } |
13442 | } |
13443 | |
13444 | // Appends function arguments, mapped from global variables, beyond the specified arg index. |
13445 | // This is used when a function call uses fewer arguments than the function defines. |
13446 | // This situation may occur if the function signature has been dynamically modified to |
13447 | // extract global variables referenced from within the function, and convert them to |
13448 | // function arguments. This is necessary for shader languages that do not support global |
13449 | // access to shader input content from within a function (eg. Metal). Each additional |
13450 | // function args uses the name of the global variable. Function nesting will modify the |
13451 | // functions and function calls all the way up the nesting chain. |
13452 | void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist) |
13453 | { |
13454 | auto &args = func.arguments; |
13455 | uint32_t arg_cnt = uint32_t(args.size()); |
13456 | for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) |
13457 | { |
13458 | auto &arg = args[arg_idx]; |
13459 | assert(arg.alias_global_variable); |
13460 | |
13461 | // If the underlying variable needs to be declared |
13462 | // (ie. a local variable with deferred declaration), do so now. |
13463 | uint32_t var_id = get<SPIRVariable>(id: arg.id).basevariable; |
13464 | if (var_id) |
13465 | flush_variable_declaration(id: var_id); |
13466 | |
13467 | arglist.push_back(t: to_func_call_arg(arg, id: arg.id)); |
13468 | } |
13469 | } |
13470 | |
13471 | string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) |
13472 | { |
13473 | if (type.type_alias != TypeID(0) && |
13474 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
13475 | { |
13476 | return to_member_name(type: get<SPIRType>(id: type.type_alias), index); |
13477 | } |
13478 | |
13479 | auto &memb = ir.meta[type.self].members; |
13480 | if (index < memb.size() && !memb[index].alias.empty()) |
13481 | return memb[index].alias; |
13482 | else |
13483 | return join(ts: "_m" , ts&: index); |
13484 | } |
13485 | |
13486 | string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) |
13487 | { |
13488 | return join(ts: "." , ts: to_member_name(type, index)); |
13489 | } |
13490 | |
13491 | string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices) |
13492 | { |
13493 | string ret; |
13494 | auto *member_type = &type; |
13495 | for (auto &index : indices) |
13496 | { |
13497 | ret += join(ts: "." , ts: to_member_name(type: *member_type, index)); |
13498 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
13499 | } |
13500 | return ret; |
13501 | } |
13502 | |
13503 | void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) |
13504 | { |
13505 | auto &memb = ir.meta[type.self].members; |
13506 | if (index < memb.size() && !memb[index].alias.empty()) |
13507 | { |
13508 | auto &name = memb[index].alias; |
13509 | if (name.empty()) |
13510 | return; |
13511 | |
13512 | ParsedIR::sanitize_identifier(str&: name, member: true, allow_reserved_prefixes: true); |
13513 | update_name_cache(cache&: type.member_name_cache, name); |
13514 | } |
13515 | } |
13516 | |
13517 | // Checks whether the ID is a row_major matrix that requires conversion before use |
13518 | bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) |
13519 | { |
13520 | // Natively supported row-major matrices do not need to be converted. |
13521 | // Legacy targets do not support row major. |
13522 | if (backend.native_row_major_matrix && !is_legacy()) |
13523 | return false; |
13524 | |
13525 | auto *e = maybe_get<SPIRExpression>(id); |
13526 | if (e) |
13527 | return e->need_transpose; |
13528 | else |
13529 | return has_decoration(id, decoration: DecorationRowMajor); |
13530 | } |
13531 | |
13532 | // Checks whether the member is a row_major matrix that requires conversion before use |
13533 | bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) |
13534 | { |
13535 | // Natively supported row-major matrices do not need to be converted. |
13536 | if (backend.native_row_major_matrix && !is_legacy()) |
13537 | return false; |
13538 | |
13539 | // Non-matrix or column-major matrix types do not need to be converted. |
13540 | if (!has_member_decoration(id: type.self, index, decoration: DecorationRowMajor)) |
13541 | return false; |
13542 | |
13543 | // Only square row-major matrices can be converted at this time. |
13544 | // Converting non-square matrices will require defining custom GLSL function that |
13545 | // swaps matrix elements while retaining the original dimensional form of the matrix. |
13546 | const auto mbr_type = get<SPIRType>(id: type.member_types[index]); |
13547 | if (mbr_type.columns != mbr_type.vecsize) |
13548 | SPIRV_CROSS_THROW("Row-major matrices must be square on this platform." ); |
13549 | |
13550 | return true; |
13551 | } |
13552 | |
13553 | // Checks if we need to remap physical type IDs when declaring the type in a buffer. |
13554 | bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const |
13555 | { |
13556 | return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypeID); |
13557 | } |
13558 | |
13559 | // Checks whether the member is in packed data type, that might need to be unpacked. |
13560 | bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const |
13561 | { |
13562 | return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
13563 | } |
13564 | |
13565 | // Wraps the expression string in a function call that converts the |
13566 | // row_major matrix result of the expression to a column_major matrix. |
13567 | // Base implementation uses the standard library transpose() function. |
13568 | // Subclasses may override to use a different function. |
13569 | string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, |
13570 | bool /*is_packed*/) |
13571 | { |
13572 | strip_enclosed_expression(expr&: exp_str); |
13573 | if (!is_matrix(type: exp_type)) |
13574 | { |
13575 | auto column_index = exp_str.find_last_of(c: '['); |
13576 | if (column_index == string::npos) |
13577 | return exp_str; |
13578 | |
13579 | auto column_expr = exp_str.substr(pos: column_index); |
13580 | exp_str.resize(n: column_index); |
13581 | |
13582 | auto transposed_expr = type_to_glsl_constructor(type: exp_type) + "(" ; |
13583 | |
13584 | // Loading a column from a row-major matrix. Unroll the load. |
13585 | for (uint32_t c = 0; c < exp_type.vecsize; c++) |
13586 | { |
13587 | transposed_expr += join(ts&: exp_str, ts: '[', ts&: c, ts: ']', ts&: column_expr); |
13588 | if (c + 1 < exp_type.vecsize) |
13589 | transposed_expr += ", " ; |
13590 | } |
13591 | |
13592 | transposed_expr += ")" ; |
13593 | return transposed_expr; |
13594 | } |
13595 | else if (options.version < 120) |
13596 | { |
13597 | // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that |
13598 | // these GLSL versions do not support non-square matrices. |
13599 | if (exp_type.vecsize == 2 && exp_type.columns == 2) |
13600 | { |
13601 | if (!requires_transpose_2x2) |
13602 | { |
13603 | requires_transpose_2x2 = true; |
13604 | force_recompile(); |
13605 | } |
13606 | } |
13607 | else if (exp_type.vecsize == 3 && exp_type.columns == 3) |
13608 | { |
13609 | if (!requires_transpose_3x3) |
13610 | { |
13611 | requires_transpose_3x3 = true; |
13612 | force_recompile(); |
13613 | } |
13614 | } |
13615 | else if (exp_type.vecsize == 4 && exp_type.columns == 4) |
13616 | { |
13617 | if (!requires_transpose_4x4) |
13618 | { |
13619 | requires_transpose_4x4 = true; |
13620 | force_recompile(); |
13621 | } |
13622 | } |
13623 | else |
13624 | SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose." ); |
13625 | return join(ts: "spvTranspose(" , ts&: exp_str, ts: ")" ); |
13626 | } |
13627 | else |
13628 | return join(ts: "transpose(" , ts&: exp_str, ts: ")" ); |
13629 | } |
13630 | |
13631 | string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) |
13632 | { |
13633 | string type_name = type_to_glsl(type, id); |
13634 | remap_variable_type_name(type, var_name: name, type_name); |
13635 | return join(ts&: type_name, ts: " " , ts: name, ts: type_to_array_glsl(type)); |
13636 | } |
13637 | |
13638 | bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const |
13639 | { |
13640 | return var.storage == storage; |
13641 | } |
13642 | |
13643 | // Emit a structure member. Subclasses may override to modify output, |
13644 | // or to dynamically add a padding member if needed. |
13645 | void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
13646 | const string &qualifier, uint32_t) |
13647 | { |
13648 | auto &membertype = get<SPIRType>(id: member_type_id); |
13649 | |
13650 | Bitset memberflags; |
13651 | auto &memb = ir.meta[type.self].members; |
13652 | if (index < memb.size()) |
13653 | memberflags = memb[index].decoration_flags; |
13654 | |
13655 | string qualifiers; |
13656 | bool is_block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) || |
13657 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
13658 | |
13659 | if (is_block) |
13660 | qualifiers = to_interpolation_qualifiers(flags: memberflags); |
13661 | |
13662 | statement(ts: layout_for_member(type, index), ts&: qualifiers, ts: qualifier, ts: flags_to_qualifiers_glsl(type: membertype, flags: memberflags), |
13663 | ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts: ";" ); |
13664 | } |
13665 | |
13666 | void CompilerGLSL::emit_struct_padding_target(const SPIRType &) |
13667 | { |
13668 | } |
13669 | |
13670 | string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) |
13671 | { |
13672 | // GL_EXT_buffer_reference variables can be marked as restrict. |
13673 | if (flags.get(bit: DecorationRestrictPointerEXT)) |
13674 | return "restrict " ; |
13675 | |
13676 | string qual; |
13677 | |
13678 | if (type_is_floating_point(type) && flags.get(bit: DecorationNoContraction) && backend.support_precise_qualifier) |
13679 | qual = "precise " ; |
13680 | |
13681 | // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). |
13682 | bool type_supports_precision = |
13683 | type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || |
13684 | type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || |
13685 | type.basetype == SPIRType::Sampler; |
13686 | |
13687 | if (!type_supports_precision) |
13688 | return qual; |
13689 | |
13690 | if (options.es) |
13691 | { |
13692 | auto &execution = get_entry_point(); |
13693 | |
13694 | if (flags.get(bit: DecorationRelaxedPrecision)) |
13695 | { |
13696 | bool implied_fmediump = type.basetype == SPIRType::Float && |
13697 | options.fragment.default_float_precision == Options::Mediump && |
13698 | execution.model == ExecutionModelFragment; |
13699 | |
13700 | bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && |
13701 | options.fragment.default_int_precision == Options::Mediump && |
13702 | execution.model == ExecutionModelFragment; |
13703 | |
13704 | qual += (implied_fmediump || implied_imediump) ? "" : "mediump " ; |
13705 | } |
13706 | else |
13707 | { |
13708 | bool implied_fhighp = |
13709 | type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && |
13710 | execution.model == ExecutionModelFragment) || |
13711 | (execution.model != ExecutionModelFragment)); |
13712 | |
13713 | bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && |
13714 | ((options.fragment.default_int_precision == Options::Highp && |
13715 | execution.model == ExecutionModelFragment) || |
13716 | (execution.model != ExecutionModelFragment)); |
13717 | |
13718 | qual += (implied_fhighp || implied_ihighp) ? "" : "highp " ; |
13719 | } |
13720 | } |
13721 | else if (backend.allow_precision_qualifiers) |
13722 | { |
13723 | // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. |
13724 | // The default is highp however, so only emit mediump in the rare case that a shader has these. |
13725 | if (flags.get(bit: DecorationRelaxedPrecision)) |
13726 | qual += "mediump " ; |
13727 | } |
13728 | |
13729 | return qual; |
13730 | } |
13731 | |
13732 | string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) |
13733 | { |
13734 | auto &type = expression_type(id); |
13735 | bool use_precision_qualifiers = backend.allow_precision_qualifiers; |
13736 | if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) |
13737 | { |
13738 | // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. |
13739 | auto &result_type = get<SPIRType>(id: type.image.type); |
13740 | if (result_type.width < 32) |
13741 | return "mediump " ; |
13742 | } |
13743 | return flags_to_qualifiers_glsl(type, flags: ir.meta[id].decoration.decoration_flags); |
13744 | } |
13745 | |
13746 | void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var) |
13747 | { |
13748 | // Works around weird behavior in glslangValidator where |
13749 | // a patch out block is translated to just block members getting the decoration. |
13750 | // To make glslang not complain when we compile again, we have to transform this back to a case where |
13751 | // the variable itself has Patch decoration, and not members. |
13752 | auto &type = get<SPIRType>(id: var.basetype); |
13753 | if (has_decoration(id: type.self, decoration: DecorationBlock)) |
13754 | { |
13755 | uint32_t member_count = uint32_t(type.member_types.size()); |
13756 | for (uint32_t i = 0; i < member_count; i++) |
13757 | { |
13758 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationPatch)) |
13759 | { |
13760 | set_decoration(id: var.self, decoration: DecorationPatch); |
13761 | break; |
13762 | } |
13763 | } |
13764 | |
13765 | if (has_decoration(id: var.self, decoration: DecorationPatch)) |
13766 | for (uint32_t i = 0; i < member_count; i++) |
13767 | unset_member_decoration(id: type.self, index: i, decoration: DecorationPatch); |
13768 | } |
13769 | } |
13770 | |
13771 | string CompilerGLSL::to_qualifiers_glsl(uint32_t id) |
13772 | { |
13773 | auto &flags = get_decoration_bitset(id); |
13774 | string res; |
13775 | |
13776 | auto *var = maybe_get<SPIRVariable>(id); |
13777 | |
13778 | if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) |
13779 | res += "shared " ; |
13780 | |
13781 | res += to_interpolation_qualifiers(flags); |
13782 | if (var) |
13783 | res += to_storage_qualifiers_glsl(var: *var); |
13784 | |
13785 | auto &type = expression_type(id); |
13786 | if (type.image.dim != DimSubpassData && type.image.sampled == 2) |
13787 | { |
13788 | if (flags.get(bit: DecorationCoherent)) |
13789 | res += "coherent " ; |
13790 | if (flags.get(bit: DecorationRestrict)) |
13791 | res += "restrict " ; |
13792 | |
13793 | if (flags.get(bit: DecorationNonWritable)) |
13794 | res += "readonly " ; |
13795 | |
13796 | bool formatted_load = type.image.format == ImageFormatUnknown; |
13797 | if (flags.get(bit: DecorationNonReadable)) |
13798 | { |
13799 | res += "writeonly " ; |
13800 | formatted_load = false; |
13801 | } |
13802 | |
13803 | if (formatted_load) |
13804 | { |
13805 | if (!options.es) |
13806 | require_extension_internal(ext: "GL_EXT_shader_image_load_formatted" ); |
13807 | else |
13808 | SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL." ); |
13809 | } |
13810 | } |
13811 | |
13812 | res += to_precision_qualifiers_glsl(id); |
13813 | |
13814 | return res; |
13815 | } |
13816 | |
13817 | string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) |
13818 | { |
13819 | // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... |
13820 | auto &type = expression_type(id: arg.id); |
13821 | const char *direction = "" ; |
13822 | |
13823 | if (type.pointer) |
13824 | { |
13825 | if (arg.write_count && arg.read_count) |
13826 | direction = "inout " ; |
13827 | else if (arg.write_count) |
13828 | direction = "out " ; |
13829 | } |
13830 | |
13831 | return join(ts&: direction, ts: to_qualifiers_glsl(id: arg.id), ts: variable_decl(type, name: to_name(id: arg.id), id: arg.id)); |
13832 | } |
13833 | |
13834 | string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) |
13835 | { |
13836 | return to_unpacked_expression(id: var.initializer); |
13837 | } |
13838 | |
13839 | string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) |
13840 | { |
13841 | #ifndef NDEBUG |
13842 | auto &type = get<SPIRType>(id: type_id); |
13843 | assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || |
13844 | type.storage == StorageClassGeneric); |
13845 | #endif |
13846 | uint32_t id = ir.increase_bound_by(count: 1); |
13847 | ir.make_constant_null(id, type: type_id, add_to_typed_id_set: false); |
13848 | return constant_expression(c: get<SPIRConstant>(id)); |
13849 | } |
13850 | |
13851 | bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const |
13852 | { |
13853 | if (type.pointer) |
13854 | return false; |
13855 | |
13856 | if (!type.array.empty() && options.flatten_multidimensional_arrays) |
13857 | return false; |
13858 | |
13859 | for (auto &literal : type.array_size_literal) |
13860 | if (!literal) |
13861 | return false; |
13862 | |
13863 | for (auto &memb : type.member_types) |
13864 | if (!type_can_zero_initialize(type: get<SPIRType>(id: memb))) |
13865 | return false; |
13866 | |
13867 | return true; |
13868 | } |
13869 | |
13870 | string CompilerGLSL::variable_decl(const SPIRVariable &variable) |
13871 | { |
13872 | // Ignore the pointer type since GLSL doesn't have pointers. |
13873 | auto &type = get_variable_data_type(var: variable); |
13874 | |
13875 | if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer) |
13876 | SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types." ); |
13877 | |
13878 | auto res = join(ts: to_qualifiers_glsl(id: variable.self), ts: variable_decl(type, name: to_name(id: variable.self), id: variable.self)); |
13879 | |
13880 | if (variable.loop_variable && variable.static_expression) |
13881 | { |
13882 | uint32_t expr = variable.static_expression; |
13883 | if (ir.ids[expr].get_type() != TypeUndef) |
13884 | res += join(ts: " = " , ts: to_unpacked_expression(id: variable.static_expression)); |
13885 | else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
13886 | res += join(ts: " = " , ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable))); |
13887 | } |
13888 | else if (variable.initializer && !variable_decl_is_remapped_storage(var: variable, storage: StorageClassWorkgroup)) |
13889 | { |
13890 | uint32_t expr = variable.initializer; |
13891 | if (ir.ids[expr].get_type() != TypeUndef) |
13892 | res += join(ts: " = " , ts: to_initializer_expression(var: variable)); |
13893 | else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
13894 | res += join(ts: " = " , ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable))); |
13895 | } |
13896 | |
13897 | return res; |
13898 | } |
13899 | |
13900 | const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) |
13901 | { |
13902 | auto &flags = get_decoration_bitset(id: variable.self); |
13903 | if (flags.get(bit: DecorationRelaxedPrecision)) |
13904 | return "mediump " ; |
13905 | else |
13906 | return "highp " ; |
13907 | } |
13908 | |
13909 | string CompilerGLSL::pls_decl(const PlsRemap &var) |
13910 | { |
13911 | auto &variable = get<SPIRVariable>(id: var.id); |
13912 | |
13913 | SPIRType type; |
13914 | type.vecsize = pls_format_to_components(format: var.format); |
13915 | type.basetype = pls_format_to_basetype(format: var.format); |
13916 | |
13917 | return join(ts: to_pls_layout(format: var.format), ts: to_pls_qualifiers_glsl(variable), ts: type_to_glsl(type), ts: " " , |
13918 | ts: to_name(id: variable.self)); |
13919 | } |
13920 | |
13921 | uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const |
13922 | { |
13923 | return to_array_size_literal(type, index: uint32_t(type.array.size() - 1)); |
13924 | } |
13925 | |
13926 | uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const |
13927 | { |
13928 | assert(type.array.size() == type.array_size_literal.size()); |
13929 | |
13930 | if (type.array_size_literal[index]) |
13931 | { |
13932 | return type.array[index]; |
13933 | } |
13934 | else |
13935 | { |
13936 | // Use the default spec constant value. |
13937 | // This is the best we can do. |
13938 | return evaluate_constant_u32(id: type.array[index]); |
13939 | } |
13940 | } |
13941 | |
13942 | string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) |
13943 | { |
13944 | assert(type.array.size() == type.array_size_literal.size()); |
13945 | |
13946 | auto &size = type.array[index]; |
13947 | if (!type.array_size_literal[index]) |
13948 | return to_expression(id: size); |
13949 | else if (size) |
13950 | return convert_to_string(t: size); |
13951 | else if (!backend.unsized_array_supported) |
13952 | { |
13953 | // For runtime-sized arrays, we can work around |
13954 | // lack of standard support for this by simply having |
13955 | // a single element array. |
13956 | // |
13957 | // Runtime length arrays must always be the last element |
13958 | // in an interface block. |
13959 | return "1" ; |
13960 | } |
13961 | else |
13962 | return "" ; |
13963 | } |
13964 | |
13965 | string CompilerGLSL::type_to_array_glsl(const SPIRType &type) |
13966 | { |
13967 | if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) |
13968 | { |
13969 | // We are using a wrapped pointer type, and we should not emit any array declarations here. |
13970 | return "" ; |
13971 | } |
13972 | |
13973 | if (type.array.empty()) |
13974 | return "" ; |
13975 | |
13976 | if (options.flatten_multidimensional_arrays) |
13977 | { |
13978 | string res; |
13979 | res += "[" ; |
13980 | for (auto i = uint32_t(type.array.size()); i; i--) |
13981 | { |
13982 | res += enclose_expression(expr: to_array_size(type, index: i - 1)); |
13983 | if (i > 1) |
13984 | res += " * " ; |
13985 | } |
13986 | res += "]" ; |
13987 | return res; |
13988 | } |
13989 | else |
13990 | { |
13991 | if (type.array.size() > 1) |
13992 | { |
13993 | if (!options.es && options.version < 430) |
13994 | require_extension_internal(ext: "GL_ARB_arrays_of_arrays" ); |
13995 | else if (options.es && options.version < 310) |
13996 | SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " |
13997 | "Try using --flatten-multidimensional-arrays or set " |
13998 | "options.flatten_multidimensional_arrays to true." ); |
13999 | } |
14000 | |
14001 | string res; |
14002 | for (auto i = uint32_t(type.array.size()); i; i--) |
14003 | { |
14004 | res += "[" ; |
14005 | res += to_array_size(type, index: i - 1); |
14006 | res += "]" ; |
14007 | } |
14008 | return res; |
14009 | } |
14010 | } |
14011 | |
14012 | string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) |
14013 | { |
14014 | auto &imagetype = get<SPIRType>(id: type.image.type); |
14015 | string res; |
14016 | |
14017 | switch (imagetype.basetype) |
14018 | { |
14019 | case SPIRType::Int: |
14020 | case SPIRType::Short: |
14021 | case SPIRType::SByte: |
14022 | res = "i" ; |
14023 | break; |
14024 | case SPIRType::UInt: |
14025 | case SPIRType::UShort: |
14026 | case SPIRType::UByte: |
14027 | res = "u" ; |
14028 | break; |
14029 | default: |
14030 | break; |
14031 | } |
14032 | |
14033 | // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. |
14034 | // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. |
14035 | |
14036 | if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) |
14037 | return res + "subpassInput" + (type.image.ms ? "MS" : "" ); |
14038 | else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && |
14039 | subpass_input_is_framebuffer_fetch(id)) |
14040 | { |
14041 | SPIRType sampled_type = get<SPIRType>(id: type.image.type); |
14042 | sampled_type.vecsize = 4; |
14043 | return type_to_glsl(type: sampled_type); |
14044 | } |
14045 | |
14046 | // If we're emulating subpassInput with samplers, force sampler2D |
14047 | // so we don't have to specify format. |
14048 | if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) |
14049 | { |
14050 | // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. |
14051 | if (type.image.dim == DimBuffer && type.image.sampled == 1) |
14052 | res += "sampler" ; |
14053 | else |
14054 | res += type.image.sampled == 2 ? "image" : "texture" ; |
14055 | } |
14056 | else |
14057 | res += "sampler" ; |
14058 | |
14059 | switch (type.image.dim) |
14060 | { |
14061 | case Dim1D: |
14062 | // ES doesn't support 1D. Fake it with 2D. |
14063 | res += options.es ? "2D" : "1D" ; |
14064 | break; |
14065 | case Dim2D: |
14066 | res += "2D" ; |
14067 | break; |
14068 | case Dim3D: |
14069 | res += "3D" ; |
14070 | break; |
14071 | case DimCube: |
14072 | res += "Cube" ; |
14073 | break; |
14074 | case DimRect: |
14075 | if (options.es) |
14076 | SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES." ); |
14077 | |
14078 | if (is_legacy_desktop()) |
14079 | require_extension_internal(ext: "GL_ARB_texture_rectangle" ); |
14080 | |
14081 | res += "2DRect" ; |
14082 | break; |
14083 | |
14084 | case DimBuffer: |
14085 | if (options.es && options.version < 320) |
14086 | require_extension_internal(ext: "GL_EXT_texture_buffer" ); |
14087 | else if (!options.es && options.version < 300) |
14088 | require_extension_internal(ext: "GL_EXT_texture_buffer_object" ); |
14089 | res += "Buffer" ; |
14090 | break; |
14091 | |
14092 | case DimSubpassData: |
14093 | res += "2D" ; |
14094 | break; |
14095 | default: |
14096 | SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported." ); |
14097 | } |
14098 | |
14099 | if (type.image.ms) |
14100 | res += "MS" ; |
14101 | if (type.image.arrayed) |
14102 | { |
14103 | if (is_legacy_desktop()) |
14104 | require_extension_internal(ext: "GL_EXT_texture_array" ); |
14105 | res += "Array" ; |
14106 | } |
14107 | |
14108 | // "Shadow" state in GLSL only exists for samplers and combined image samplers. |
14109 | if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && |
14110 | is_depth_image(type, id)) |
14111 | { |
14112 | res += "Shadow" ; |
14113 | } |
14114 | |
14115 | return res; |
14116 | } |
14117 | |
14118 | string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) |
14119 | { |
14120 | if (backend.use_array_constructor && type.array.size() > 1) |
14121 | { |
14122 | if (options.flatten_multidimensional_arrays) |
14123 | SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " |
14124 | "e.g. float[][]()." ); |
14125 | else if (!options.es && options.version < 430) |
14126 | require_extension_internal(ext: "GL_ARB_arrays_of_arrays" ); |
14127 | else if (options.es && options.version < 310) |
14128 | SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310." ); |
14129 | } |
14130 | |
14131 | auto e = type_to_glsl(type); |
14132 | if (backend.use_array_constructor) |
14133 | { |
14134 | for (uint32_t i = 0; i < type.array.size(); i++) |
14135 | e += "[]" ; |
14136 | } |
14137 | return e; |
14138 | } |
14139 | |
14140 | // The optional id parameter indicates the object whose type we are trying |
14141 | // to find the description for. It is optional. Most type descriptions do not |
14142 | // depend on a specific object's use of that type. |
14143 | string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) |
14144 | { |
14145 | if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) |
14146 | { |
14147 | // Need to create a magic type name which compacts the entire type information. |
14148 | string name = type_to_glsl(type: get_pointee_type(type)); |
14149 | for (size_t i = 0; i < type.array.size(); i++) |
14150 | { |
14151 | if (type.array_size_literal[i]) |
14152 | name += join(ts: type.array[i], ts: "_" ); |
14153 | else |
14154 | name += join(ts: "id" , ts: type.array[i], ts: "_" ); |
14155 | } |
14156 | name += "Pointer" ; |
14157 | return name; |
14158 | } |
14159 | |
14160 | switch (type.basetype) |
14161 | { |
14162 | case SPIRType::Struct: |
14163 | // Need OpName lookup here to get a "sensible" name for a struct. |
14164 | if (backend.explicit_struct_type) |
14165 | return join(ts: "struct " , ts: to_name(id: type.self)); |
14166 | else |
14167 | return to_name(id: type.self); |
14168 | |
14169 | case SPIRType::Image: |
14170 | case SPIRType::SampledImage: |
14171 | return image_type_glsl(type, id); |
14172 | |
14173 | case SPIRType::Sampler: |
14174 | // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing |
14175 | // this distinction into the type system. |
14176 | return comparison_ids.count(x: id) ? "samplerShadow" : "sampler" ; |
14177 | |
14178 | case SPIRType::AccelerationStructure: |
14179 | return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV" ; |
14180 | |
14181 | case SPIRType::RayQuery: |
14182 | return "rayQueryEXT" ; |
14183 | |
14184 | case SPIRType::Void: |
14185 | return "void" ; |
14186 | |
14187 | default: |
14188 | break; |
14189 | } |
14190 | |
14191 | if (type.basetype == SPIRType::UInt && is_legacy()) |
14192 | SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets." ); |
14193 | |
14194 | if (type.vecsize == 1 && type.columns == 1) // Scalar builtin |
14195 | { |
14196 | switch (type.basetype) |
14197 | { |
14198 | case SPIRType::Boolean: |
14199 | return "bool" ; |
14200 | case SPIRType::SByte: |
14201 | return backend.basic_int8_type; |
14202 | case SPIRType::UByte: |
14203 | return backend.basic_uint8_type; |
14204 | case SPIRType::Short: |
14205 | return backend.basic_int16_type; |
14206 | case SPIRType::UShort: |
14207 | return backend.basic_uint16_type; |
14208 | case SPIRType::Int: |
14209 | return backend.basic_int_type; |
14210 | case SPIRType::UInt: |
14211 | return backend.basic_uint_type; |
14212 | case SPIRType::AtomicCounter: |
14213 | return "atomic_uint" ; |
14214 | case SPIRType::Half: |
14215 | return "float16_t" ; |
14216 | case SPIRType::Float: |
14217 | return "float" ; |
14218 | case SPIRType::Double: |
14219 | return "double" ; |
14220 | case SPIRType::Int64: |
14221 | return "int64_t" ; |
14222 | case SPIRType::UInt64: |
14223 | return "uint64_t" ; |
14224 | default: |
14225 | return "???" ; |
14226 | } |
14227 | } |
14228 | else if (type.vecsize > 1 && type.columns == 1) // Vector builtin |
14229 | { |
14230 | switch (type.basetype) |
14231 | { |
14232 | case SPIRType::Boolean: |
14233 | return join(ts: "bvec" , ts: type.vecsize); |
14234 | case SPIRType::SByte: |
14235 | return join(ts: "i8vec" , ts: type.vecsize); |
14236 | case SPIRType::UByte: |
14237 | return join(ts: "u8vec" , ts: type.vecsize); |
14238 | case SPIRType::Short: |
14239 | return join(ts: "i16vec" , ts: type.vecsize); |
14240 | case SPIRType::UShort: |
14241 | return join(ts: "u16vec" , ts: type.vecsize); |
14242 | case SPIRType::Int: |
14243 | return join(ts: "ivec" , ts: type.vecsize); |
14244 | case SPIRType::UInt: |
14245 | return join(ts: "uvec" , ts: type.vecsize); |
14246 | case SPIRType::Half: |
14247 | return join(ts: "f16vec" , ts: type.vecsize); |
14248 | case SPIRType::Float: |
14249 | return join(ts: "vec" , ts: type.vecsize); |
14250 | case SPIRType::Double: |
14251 | return join(ts: "dvec" , ts: type.vecsize); |
14252 | case SPIRType::Int64: |
14253 | return join(ts: "i64vec" , ts: type.vecsize); |
14254 | case SPIRType::UInt64: |
14255 | return join(ts: "u64vec" , ts: type.vecsize); |
14256 | default: |
14257 | return "???" ; |
14258 | } |
14259 | } |
14260 | else if (type.vecsize == type.columns) // Simple Matrix builtin |
14261 | { |
14262 | switch (type.basetype) |
14263 | { |
14264 | case SPIRType::Boolean: |
14265 | return join(ts: "bmat" , ts: type.vecsize); |
14266 | case SPIRType::Int: |
14267 | return join(ts: "imat" , ts: type.vecsize); |
14268 | case SPIRType::UInt: |
14269 | return join(ts: "umat" , ts: type.vecsize); |
14270 | case SPIRType::Half: |
14271 | return join(ts: "f16mat" , ts: type.vecsize); |
14272 | case SPIRType::Float: |
14273 | return join(ts: "mat" , ts: type.vecsize); |
14274 | case SPIRType::Double: |
14275 | return join(ts: "dmat" , ts: type.vecsize); |
14276 | // Matrix types not supported for int64/uint64. |
14277 | default: |
14278 | return "???" ; |
14279 | } |
14280 | } |
14281 | else |
14282 | { |
14283 | switch (type.basetype) |
14284 | { |
14285 | case SPIRType::Boolean: |
14286 | return join(ts: "bmat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14287 | case SPIRType::Int: |
14288 | return join(ts: "imat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14289 | case SPIRType::UInt: |
14290 | return join(ts: "umat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14291 | case SPIRType::Half: |
14292 | return join(ts: "f16mat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14293 | case SPIRType::Float: |
14294 | return join(ts: "mat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14295 | case SPIRType::Double: |
14296 | return join(ts: "dmat" , ts: type.columns, ts: "x" , ts: type.vecsize); |
14297 | // Matrix types not supported for int64/uint64. |
14298 | default: |
14299 | return "???" ; |
14300 | } |
14301 | } |
14302 | } |
14303 | |
14304 | void CompilerGLSL::add_variable(unordered_set<string> &variables_primary, |
14305 | const unordered_set<string> &variables_secondary, string &name) |
14306 | { |
14307 | if (name.empty()) |
14308 | return; |
14309 | |
14310 | ParsedIR::sanitize_underscores(str&: name); |
14311 | if (ParsedIR::is_globally_reserved_identifier(str&: name, allow_reserved_prefixes: true)) |
14312 | { |
14313 | name.clear(); |
14314 | return; |
14315 | } |
14316 | |
14317 | update_name_cache(cache_primary&: variables_primary, cache_secondary: variables_secondary, name); |
14318 | } |
14319 | |
14320 | void CompilerGLSL::add_local_variable_name(uint32_t id) |
14321 | { |
14322 | add_variable(variables_primary&: local_variable_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias); |
14323 | } |
14324 | |
14325 | void CompilerGLSL::add_resource_name(uint32_t id) |
14326 | { |
14327 | add_variable(variables_primary&: resource_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias); |
14328 | } |
14329 | |
14330 | void CompilerGLSL::(const std::string &line) |
14331 | { |
14332 | header_lines.push_back(t: line); |
14333 | } |
14334 | |
14335 | bool CompilerGLSL::has_extension(const std::string &ext) const |
14336 | { |
14337 | auto itr = find(first: begin(cont: forced_extensions), last: end(cont: forced_extensions), val: ext); |
14338 | return itr != end(cont: forced_extensions); |
14339 | } |
14340 | |
14341 | void CompilerGLSL::require_extension(const std::string &ext) |
14342 | { |
14343 | if (!has_extension(ext)) |
14344 | forced_extensions.push_back(t: ext); |
14345 | } |
14346 | |
14347 | void CompilerGLSL::require_extension_internal(const string &ext) |
14348 | { |
14349 | if (backend.supports_extensions && !has_extension(ext)) |
14350 | { |
14351 | forced_extensions.push_back(t: ext); |
14352 | force_recompile(); |
14353 | } |
14354 | } |
14355 | |
14356 | void CompilerGLSL::flatten_buffer_block(VariableID id) |
14357 | { |
14358 | auto &var = get<SPIRVariable>(id); |
14359 | auto &type = get<SPIRType>(id: var.basetype); |
14360 | auto name = to_name(id: type.self, allow_alias: false); |
14361 | auto &flags = get_decoration_bitset(id: type.self); |
14362 | |
14363 | if (!type.array.empty()) |
14364 | SPIRV_CROSS_THROW(name + " is an array of UBOs." ); |
14365 | if (type.basetype != SPIRType::Struct) |
14366 | SPIRV_CROSS_THROW(name + " is not a struct." ); |
14367 | if (!flags.get(bit: DecorationBlock)) |
14368 | SPIRV_CROSS_THROW(name + " is not a block." ); |
14369 | if (type.member_types.empty()) |
14370 | SPIRV_CROSS_THROW(name + " is an empty struct." ); |
14371 | |
14372 | flattened_buffer_blocks.insert(x: id); |
14373 | } |
14374 | |
14375 | bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const |
14376 | { |
14377 | return false; // GLSL itself does not need to translate array builtin types to non-array builtin types |
14378 | } |
14379 | |
14380 | bool CompilerGLSL::check_atomic_image(uint32_t id) |
14381 | { |
14382 | auto &type = expression_type(id); |
14383 | if (type.storage == StorageClassImage) |
14384 | { |
14385 | if (options.es && options.version < 320) |
14386 | require_extension_internal(ext: "GL_OES_shader_image_atomic" ); |
14387 | |
14388 | auto *var = maybe_get_backing_variable(chain: id); |
14389 | if (var) |
14390 | { |
14391 | if (has_decoration(id: var->self, decoration: DecorationNonWritable) || has_decoration(id: var->self, decoration: DecorationNonReadable)) |
14392 | { |
14393 | unset_decoration(id: var->self, decoration: DecorationNonWritable); |
14394 | unset_decoration(id: var->self, decoration: DecorationNonReadable); |
14395 | force_recompile(); |
14396 | } |
14397 | } |
14398 | return true; |
14399 | } |
14400 | else |
14401 | return false; |
14402 | } |
14403 | |
14404 | void CompilerGLSL::add_function_overload(const SPIRFunction &func) |
14405 | { |
14406 | Hasher hasher; |
14407 | for (auto &arg : func.arguments) |
14408 | { |
14409 | // Parameters can vary with pointer type or not, |
14410 | // but that will not change the signature in GLSL/HLSL, |
14411 | // so strip the pointer type before hashing. |
14412 | uint32_t type_id = get_pointee_type_id(type_id: arg.type); |
14413 | auto &type = get<SPIRType>(id: type_id); |
14414 | |
14415 | if (!combined_image_samplers.empty()) |
14416 | { |
14417 | // If we have combined image samplers, we cannot really trust the image and sampler arguments |
14418 | // we pass down to callees, because they may be shuffled around. |
14419 | // Ignore these arguments, to make sure that functions need to differ in some other way |
14420 | // to be considered different overloads. |
14421 | if (type.basetype == SPIRType::SampledImage || |
14422 | (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) |
14423 | { |
14424 | continue; |
14425 | } |
14426 | } |
14427 | |
14428 | hasher.u32(value: type_id); |
14429 | } |
14430 | uint64_t types_hash = hasher.get(); |
14431 | |
14432 | auto function_name = to_name(id: func.self); |
14433 | auto itr = function_overloads.find(x: function_name); |
14434 | if (itr != end(cont&: function_overloads)) |
14435 | { |
14436 | // There exists a function with this name already. |
14437 | auto &overloads = itr->second; |
14438 | if (overloads.count(x: types_hash) != 0) |
14439 | { |
14440 | // Overload conflict, assign a new name. |
14441 | add_resource_name(id: func.self); |
14442 | function_overloads[to_name(id: func.self)].insert(x: types_hash); |
14443 | } |
14444 | else |
14445 | { |
14446 | // Can reuse the name. |
14447 | overloads.insert(x: types_hash); |
14448 | } |
14449 | } |
14450 | else |
14451 | { |
14452 | // First time we see this function name. |
14453 | add_resource_name(id: func.self); |
14454 | function_overloads[to_name(id: func.self)].insert(x: types_hash); |
14455 | } |
14456 | } |
14457 | |
14458 | void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) |
14459 | { |
14460 | if (func.self != ir.default_entry_point) |
14461 | add_function_overload(func); |
14462 | |
14463 | // Avoid shadow declarations. |
14464 | local_variable_names = resource_names; |
14465 | |
14466 | string decl; |
14467 | |
14468 | auto &type = get<SPIRType>(id: func.return_type); |
14469 | decl += flags_to_qualifiers_glsl(type, flags: return_flags); |
14470 | decl += type_to_glsl(type); |
14471 | decl += type_to_array_glsl(type); |
14472 | decl += " " ; |
14473 | |
14474 | if (func.self == ir.default_entry_point) |
14475 | { |
14476 | // If we need complex fallback in GLSL, we just wrap main() in a function |
14477 | // and interlock the entire shader ... |
14478 | if (interlocked_is_complex) |
14479 | decl += "spvMainInterlockedBody" ; |
14480 | else |
14481 | decl += "main" ; |
14482 | |
14483 | processing_entry_point = true; |
14484 | } |
14485 | else |
14486 | decl += to_name(id: func.self); |
14487 | |
14488 | decl += "(" ; |
14489 | SmallVector<string> arglist; |
14490 | for (auto &arg : func.arguments) |
14491 | { |
14492 | // Do not pass in separate images or samplers if we're remapping |
14493 | // to combined image samplers. |
14494 | if (skip_argument(id: arg.id)) |
14495 | continue; |
14496 | |
14497 | // Might change the variable name if it already exists in this function. |
14498 | // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
14499 | // to use same name for variables. |
14500 | // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
14501 | add_local_variable_name(id: arg.id); |
14502 | |
14503 | arglist.push_back(t: argument_decl(arg)); |
14504 | |
14505 | // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
14506 | auto *var = maybe_get<SPIRVariable>(id: arg.id); |
14507 | if (var) |
14508 | var->parameter = &arg; |
14509 | } |
14510 | |
14511 | for (auto &arg : func.shadow_arguments) |
14512 | { |
14513 | // Might change the variable name if it already exists in this function. |
14514 | // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
14515 | // to use same name for variables. |
14516 | // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
14517 | add_local_variable_name(id: arg.id); |
14518 | |
14519 | arglist.push_back(t: argument_decl(arg)); |
14520 | |
14521 | // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
14522 | auto *var = maybe_get<SPIRVariable>(id: arg.id); |
14523 | if (var) |
14524 | var->parameter = &arg; |
14525 | } |
14526 | |
14527 | decl += merge(list: arglist); |
14528 | decl += ")" ; |
14529 | statement(ts&: decl); |
14530 | } |
14531 | |
14532 | void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) |
14533 | { |
14534 | // Avoid potential cycles. |
14535 | if (func.active) |
14536 | return; |
14537 | func.active = true; |
14538 | |
14539 | // If we depend on a function, emit that function before we emit our own function. |
14540 | for (auto block : func.blocks) |
14541 | { |
14542 | auto &b = get<SPIRBlock>(id: block); |
14543 | for (auto &i : b.ops) |
14544 | { |
14545 | auto ops = stream(instr: i); |
14546 | auto op = static_cast<Op>(i.op); |
14547 | |
14548 | if (op == OpFunctionCall) |
14549 | { |
14550 | // Recursively emit functions which are called. |
14551 | uint32_t id = ops[2]; |
14552 | emit_function(func&: get<SPIRFunction>(id), return_flags: ir.meta[ops[1]].decoration.decoration_flags); |
14553 | } |
14554 | } |
14555 | } |
14556 | |
14557 | if (func.entry_line.file_id != 0) |
14558 | emit_line_directive(file_id: func.entry_line.file_id, line_literal: func.entry_line.line_literal); |
14559 | emit_function_prototype(func, return_flags); |
14560 | begin_scope(); |
14561 | |
14562 | if (func.self == ir.default_entry_point) |
14563 | emit_entry_point_declarations(); |
14564 | |
14565 | current_function = &func; |
14566 | auto &entry_block = get<SPIRBlock>(id: func.entry_block); |
14567 | |
14568 | sort(first: begin(cont&: func.constant_arrays_needed_on_stack), last: end(cont&: func.constant_arrays_needed_on_stack)); |
14569 | for (auto &array : func.constant_arrays_needed_on_stack) |
14570 | { |
14571 | auto &c = get<SPIRConstant>(id: array); |
14572 | auto &type = get<SPIRType>(id: c.constant_type); |
14573 | statement(ts: variable_decl(type, name: join(ts: "_" , ts&: array, ts: "_array_copy" )), ts: " = " , ts: constant_expression(c), ts: ";" ); |
14574 | } |
14575 | |
14576 | for (auto &v : func.local_variables) |
14577 | { |
14578 | auto &var = get<SPIRVariable>(id: v); |
14579 | var.deferred_declaration = false; |
14580 | |
14581 | if (variable_decl_is_remapped_storage(var, storage: StorageClassWorkgroup)) |
14582 | { |
14583 | // Special variable type which cannot have initializer, |
14584 | // need to be declared as standalone variables. |
14585 | // Comes from MSL which can push global variables as local variables in main function. |
14586 | add_local_variable_name(id: var.self); |
14587 | statement(ts: variable_decl(variable: var), ts: ";" ); |
14588 | var.deferred_declaration = false; |
14589 | } |
14590 | else if (var.storage == StorageClassPrivate) |
14591 | { |
14592 | // These variables will not have had their CFG usage analyzed, so move it to the entry block. |
14593 | // Comes from MSL which can push global variables as local variables in main function. |
14594 | // We could just declare them right now, but we would miss out on an important initialization case which is |
14595 | // LUT declaration in MSL. |
14596 | // If we don't declare the variable when it is assigned we're forced to go through a helper function |
14597 | // which copies elements one by one. |
14598 | add_local_variable_name(id: var.self); |
14599 | |
14600 | if (var.initializer) |
14601 | { |
14602 | statement(ts: variable_decl(variable: var), ts: ";" ); |
14603 | var.deferred_declaration = false; |
14604 | } |
14605 | else |
14606 | { |
14607 | auto &dominated = entry_block.dominated_variables; |
14608 | if (find(first: begin(cont&: dominated), last: end(cont&: dominated), val: var.self) == end(cont&: dominated)) |
14609 | entry_block.dominated_variables.push_back(t: var.self); |
14610 | var.deferred_declaration = true; |
14611 | } |
14612 | } |
14613 | else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) |
14614 | { |
14615 | // No need to declare this variable, it has a static expression. |
14616 | var.deferred_declaration = false; |
14617 | } |
14618 | else if (expression_is_lvalue(id: v)) |
14619 | { |
14620 | add_local_variable_name(id: var.self); |
14621 | |
14622 | // Loop variables should never be declared early, they are explicitly emitted in a loop. |
14623 | if (var.initializer && !var.loop_variable) |
14624 | statement(ts: variable_decl_function_local(var), ts: ";" ); |
14625 | else |
14626 | { |
14627 | // Don't declare variable until first use to declutter the GLSL output quite a lot. |
14628 | // If we don't touch the variable before first branch, |
14629 | // declare it then since we need variable declaration to be in top scope. |
14630 | var.deferred_declaration = true; |
14631 | } |
14632 | } |
14633 | else |
14634 | { |
14635 | // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. |
14636 | // For these types (non-lvalue), we enforce forwarding through a shadowed variable. |
14637 | // This means that when we OpStore to these variables, we just write in the expression ID directly. |
14638 | // This breaks any kind of branching, since the variable must be statically assigned. |
14639 | // Branching on samplers and images would be pretty much impossible to fake in GLSL. |
14640 | var.statically_assigned = true; |
14641 | } |
14642 | |
14643 | var.loop_variable_enable = false; |
14644 | |
14645 | // Loop variables are never declared outside their for-loop, so block any implicit declaration. |
14646 | if (var.loop_variable) |
14647 | { |
14648 | var.deferred_declaration = false; |
14649 | // Need to reset the static expression so we can fallback to initializer if need be. |
14650 | var.static_expression = 0; |
14651 | } |
14652 | } |
14653 | |
14654 | // Enforce declaration order for regression testing purposes. |
14655 | for (auto &block_id : func.blocks) |
14656 | { |
14657 | auto &block = get<SPIRBlock>(id: block_id); |
14658 | sort(first: begin(cont&: block.dominated_variables), last: end(cont&: block.dominated_variables)); |
14659 | } |
14660 | |
14661 | for (auto &line : current_function->fixup_hooks_in) |
14662 | line(); |
14663 | |
14664 | emit_block_chain(block&: entry_block); |
14665 | |
14666 | end_scope(); |
14667 | processing_entry_point = false; |
14668 | statement(ts: "" ); |
14669 | |
14670 | // Make sure deferred declaration state for local variables is cleared when we are done with function. |
14671 | // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. |
14672 | for (auto &v : func.local_variables) |
14673 | { |
14674 | auto &var = get<SPIRVariable>(id: v); |
14675 | var.deferred_declaration = false; |
14676 | } |
14677 | } |
14678 | |
14679 | void CompilerGLSL::emit_fixup() |
14680 | { |
14681 | if (is_vertex_like_shader()) |
14682 | { |
14683 | if (options.vertex.fixup_clipspace) |
14684 | { |
14685 | const char *suffix = backend.float_literal_suffix ? "f" : "" ; |
14686 | statement(ts: "gl_Position.z = 2.0" , ts&: suffix, ts: " * gl_Position.z - gl_Position.w;" ); |
14687 | } |
14688 | |
14689 | if (options.vertex.flip_vert_y) |
14690 | statement(ts: "gl_Position.y = -gl_Position.y;" ); |
14691 | } |
14692 | } |
14693 | |
14694 | void CompilerGLSL::flush_phi(BlockID from, BlockID to) |
14695 | { |
14696 | auto &child = get<SPIRBlock>(id: to); |
14697 | if (child.ignore_phi_from_block == from) |
14698 | return; |
14699 | |
14700 | unordered_set<uint32_t> temporary_phi_variables; |
14701 | |
14702 | for (auto itr = begin(cont&: child.phi_variables); itr != end(cont&: child.phi_variables); ++itr) |
14703 | { |
14704 | auto &phi = *itr; |
14705 | |
14706 | if (phi.parent == from) |
14707 | { |
14708 | auto &var = get<SPIRVariable>(id: phi.function_variable); |
14709 | |
14710 | // A Phi variable might be a loop variable, so flush to static expression. |
14711 | if (var.loop_variable && !var.loop_variable_enable) |
14712 | var.static_expression = phi.local_variable; |
14713 | else |
14714 | { |
14715 | flush_variable_declaration(id: phi.function_variable); |
14716 | |
14717 | // Check if we are going to write to a Phi variable that another statement will read from |
14718 | // as part of another Phi node in our target block. |
14719 | // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. |
14720 | // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. |
14721 | bool need_saved_temporary = |
14722 | find_if(first: itr + 1, last: end(cont&: child.phi_variables), pred: [&](const SPIRBlock::Phi &future_phi) -> bool { |
14723 | return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; |
14724 | }) != end(cont&: child.phi_variables); |
14725 | |
14726 | if (need_saved_temporary) |
14727 | { |
14728 | // Need to make sure we declare the phi variable with a copy at the right scope. |
14729 | // We cannot safely declare a temporary here since we might be inside a continue block. |
14730 | if (!var.allocate_temporary_copy) |
14731 | { |
14732 | var.allocate_temporary_copy = true; |
14733 | force_recompile(); |
14734 | } |
14735 | statement(ts: "_" , ts&: phi.function_variable, ts: "_copy" , ts: " = " , ts: to_name(id: phi.function_variable), ts: ";" ); |
14736 | temporary_phi_variables.insert(x: phi.function_variable); |
14737 | } |
14738 | |
14739 | // This might be called in continue block, so make sure we |
14740 | // use this to emit ESSL 1.0 compliant increments/decrements. |
14741 | auto lhs = to_expression(id: phi.function_variable); |
14742 | |
14743 | string rhs; |
14744 | if (temporary_phi_variables.count(x: phi.local_variable)) |
14745 | rhs = join(ts: "_" , ts&: phi.local_variable, ts: "_copy" ); |
14746 | else |
14747 | rhs = to_pointer_expression(id: phi.local_variable); |
14748 | |
14749 | if (!optimize_read_modify_write(type: get<SPIRType>(id: var.basetype), lhs, rhs)) |
14750 | statement(ts&: lhs, ts: " = " , ts&: rhs, ts: ";" ); |
14751 | } |
14752 | |
14753 | register_write(chain: phi.function_variable); |
14754 | } |
14755 | } |
14756 | } |
14757 | |
14758 | void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) |
14759 | { |
14760 | auto &to_block = get<SPIRBlock>(id: to); |
14761 | if (from == to) |
14762 | return; |
14763 | |
14764 | assert(is_continue(to)); |
14765 | if (to_block.complex_continue) |
14766 | { |
14767 | // Just emit the whole block chain as is. |
14768 | auto usage_counts = expression_usage_counts; |
14769 | |
14770 | emit_block_chain(block&: to_block); |
14771 | |
14772 | // Expression usage counts are moot after returning from the continue block. |
14773 | expression_usage_counts = usage_counts; |
14774 | } |
14775 | else |
14776 | { |
14777 | auto &from_block = get<SPIRBlock>(id: from); |
14778 | bool outside_control_flow = false; |
14779 | uint32_t loop_dominator = 0; |
14780 | |
14781 | // FIXME: Refactor this to not use the old loop_dominator tracking. |
14782 | if (from_block.merge_block) |
14783 | { |
14784 | // If we are a loop header, we don't set the loop dominator, |
14785 | // so just use "self" here. |
14786 | loop_dominator = from; |
14787 | } |
14788 | else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
14789 | { |
14790 | loop_dominator = from_block.loop_dominator; |
14791 | } |
14792 | |
14793 | if (loop_dominator != 0) |
14794 | { |
14795 | auto &cfg = get_cfg_for_current_function(); |
14796 | |
14797 | // For non-complex continue blocks, we implicitly branch to the continue block |
14798 | // by having the continue block be part of the loop header in for (; ; continue-block). |
14799 | outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: loop_dominator, to: from); |
14800 | } |
14801 | |
14802 | // Some simplification for for-loops. We always end up with a useless continue; |
14803 | // statement since we branch to a loop block. |
14804 | // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, |
14805 | // we can avoid writing out an explicit continue statement. |
14806 | // Similar optimization to return statements if we know we're outside flow control. |
14807 | if (!outside_control_flow) |
14808 | statement(ts: "continue;" ); |
14809 | } |
14810 | } |
14811 | |
14812 | void CompilerGLSL::branch(BlockID from, BlockID to) |
14813 | { |
14814 | flush_phi(from, to); |
14815 | flush_control_dependent_expressions(block: from); |
14816 | |
14817 | bool to_is_continue = is_continue(next: to); |
14818 | |
14819 | // This is only a continue if we branch to our loop dominator. |
14820 | if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(id: from).loop_dominator == to) |
14821 | { |
14822 | // This can happen if we had a complex continue block which was emitted. |
14823 | // Once the continue block tries to branch to the loop header, just emit continue; |
14824 | // and end the chain here. |
14825 | statement(ts: "continue;" ); |
14826 | } |
14827 | else if (from != to && is_break(next: to)) |
14828 | { |
14829 | // We cannot break to ourselves, so check explicitly for from != to. |
14830 | // This case can trigger if a loop header is all three of these things: |
14831 | // - Continue block |
14832 | // - Loop header |
14833 | // - Break merge target all at once ... |
14834 | |
14835 | // Very dirty workaround. |
14836 | // Switch constructs are able to break, but they cannot break out of a loop at the same time. |
14837 | // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, |
14838 | // write to the ladder here, and defer the break. |
14839 | // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. |
14840 | if (current_emitting_switch && is_loop_break(next: to) && |
14841 | current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && |
14842 | get<SPIRBlock>(id: current_emitting_switch->loop_dominator).merge_block == to) |
14843 | { |
14844 | if (!current_emitting_switch->need_ladder_break) |
14845 | { |
14846 | force_recompile(); |
14847 | current_emitting_switch->need_ladder_break = true; |
14848 | } |
14849 | |
14850 | statement(ts: "_" , ts&: current_emitting_switch->self, ts: "_ladder_break = true;" ); |
14851 | } |
14852 | statement(ts: "break;" ); |
14853 | } |
14854 | else if (to_is_continue || from == to) |
14855 | { |
14856 | // For from == to case can happen for a do-while loop which branches into itself. |
14857 | // We don't mark these cases as continue blocks, but the only possible way to branch into |
14858 | // ourselves is through means of continue blocks. |
14859 | |
14860 | // If we are merging to a continue block, there is no need to emit the block chain for continue here. |
14861 | // We can branch to the continue block after we merge execution. |
14862 | |
14863 | // Here we make use of structured control flow rules from spec: |
14864 | // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block |
14865 | // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG |
14866 | // If we are branching to a merge block, we must be inside a construct which dominates the merge block. |
14867 | auto &block_meta = ir.block_meta[to]; |
14868 | bool branching_to_merge = |
14869 | (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | |
14870 | ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; |
14871 | if (!to_is_continue || !branching_to_merge) |
14872 | branch_to_continue(from, to); |
14873 | } |
14874 | else if (!is_conditional(next: to)) |
14875 | emit_block_chain(block&: get<SPIRBlock>(id: to)); |
14876 | |
14877 | // It is important that we check for break before continue. |
14878 | // A block might serve two purposes, a break block for the inner scope, and |
14879 | // a continue block in the outer scope. |
14880 | // Inner scope always takes precedence. |
14881 | } |
14882 | |
14883 | void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) |
14884 | { |
14885 | auto &from_block = get<SPIRBlock>(id: from); |
14886 | BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); |
14887 | |
14888 | // If we branch directly to our selection merge target, we don't need a code path. |
14889 | bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, to: true_block); |
14890 | bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, to: false_block); |
14891 | |
14892 | if (!true_block_needs_code && !false_block_needs_code) |
14893 | return; |
14894 | |
14895 | // We might have a loop merge here. Only consider selection flattening constructs. |
14896 | // Loop hints are handled explicitly elsewhere. |
14897 | if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten) |
14898 | emit_block_hints(block: from_block); |
14899 | |
14900 | if (true_block_needs_code) |
14901 | { |
14902 | statement(ts: "if (" , ts: to_expression(id: cond), ts: ")" ); |
14903 | begin_scope(); |
14904 | branch(from, to: true_block); |
14905 | end_scope(); |
14906 | |
14907 | if (false_block_needs_code) |
14908 | { |
14909 | statement(ts: "else" ); |
14910 | begin_scope(); |
14911 | branch(from, to: false_block); |
14912 | end_scope(); |
14913 | } |
14914 | } |
14915 | else if (false_block_needs_code) |
14916 | { |
14917 | // Only need false path, use negative conditional. |
14918 | statement(ts: "if (!" , ts: to_enclosed_expression(id: cond), ts: ")" ); |
14919 | begin_scope(); |
14920 | branch(from, to: false_block); |
14921 | end_scope(); |
14922 | } |
14923 | } |
14924 | |
14925 | // FIXME: This currently cannot handle complex continue blocks |
14926 | // as in do-while. |
14927 | // This should be seen as a "trivial" continue block. |
14928 | string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) |
14929 | { |
14930 | auto *block = &get<SPIRBlock>(id: continue_block); |
14931 | |
14932 | // While emitting the continue block, declare_temporary will check this |
14933 | // if we have to emit temporaries. |
14934 | current_continue_block = block; |
14935 | |
14936 | SmallVector<string> statements; |
14937 | |
14938 | // Capture all statements into our list. |
14939 | auto *old = redirect_statement; |
14940 | redirect_statement = &statements; |
14941 | |
14942 | // Stamp out all blocks one after each other. |
14943 | while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) |
14944 | { |
14945 | // Write out all instructions we have in this block. |
14946 | emit_block_instructions(block&: *block); |
14947 | |
14948 | // For plain branchless for/while continue blocks. |
14949 | if (block->next_block) |
14950 | { |
14951 | flush_phi(from: continue_block, to: block->next_block); |
14952 | block = &get<SPIRBlock>(id: block->next_block); |
14953 | } |
14954 | // For do while blocks. The last block will be a select block. |
14955 | else if (block->true_block && follow_true_block) |
14956 | { |
14957 | flush_phi(from: continue_block, to: block->true_block); |
14958 | block = &get<SPIRBlock>(id: block->true_block); |
14959 | } |
14960 | else if (block->false_block && follow_false_block) |
14961 | { |
14962 | flush_phi(from: continue_block, to: block->false_block); |
14963 | block = &get<SPIRBlock>(id: block->false_block); |
14964 | } |
14965 | else |
14966 | { |
14967 | SPIRV_CROSS_THROW("Invalid continue block detected!" ); |
14968 | } |
14969 | } |
14970 | |
14971 | // Restore old pointer. |
14972 | redirect_statement = old; |
14973 | |
14974 | // Somewhat ugly, strip off the last ';' since we use ',' instead. |
14975 | // Ideally, we should select this behavior in statement(). |
14976 | for (auto &s : statements) |
14977 | { |
14978 | if (!s.empty() && s.back() == ';') |
14979 | s.erase(pos: s.size() - 1, n: 1); |
14980 | } |
14981 | |
14982 | current_continue_block = nullptr; |
14983 | return merge(list: statements); |
14984 | } |
14985 | |
14986 | void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) |
14987 | { |
14988 | // While loops do not take initializers, so declare all of them outside. |
14989 | for (auto &loop_var : block.loop_variables) |
14990 | { |
14991 | auto &var = get<SPIRVariable>(id: loop_var); |
14992 | statement(ts: variable_decl(variable: var), ts: ";" ); |
14993 | } |
14994 | } |
14995 | |
14996 | string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) |
14997 | { |
14998 | if (block.loop_variables.empty()) |
14999 | return "" ; |
15000 | |
15001 | bool same_types = for_loop_initializers_are_same_type(block); |
15002 | // We can only declare for loop initializers if all variables are of same type. |
15003 | // If we cannot do this, declare individual variables before the loop header. |
15004 | |
15005 | // We might have a loop variable candidate which was not assigned to for some reason. |
15006 | uint32_t missing_initializers = 0; |
15007 | for (auto &variable : block.loop_variables) |
15008 | { |
15009 | uint32_t expr = get<SPIRVariable>(id: variable).static_expression; |
15010 | |
15011 | // Sometimes loop variables are initialized with OpUndef, but we can just declare |
15012 | // a plain variable without initializer in this case. |
15013 | if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) |
15014 | missing_initializers++; |
15015 | } |
15016 | |
15017 | if (block.loop_variables.size() == 1 && missing_initializers == 0) |
15018 | { |
15019 | return variable_decl(variable: get<SPIRVariable>(id: block.loop_variables.front())); |
15020 | } |
15021 | else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) |
15022 | { |
15023 | for (auto &loop_var : block.loop_variables) |
15024 | statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";" ); |
15025 | return "" ; |
15026 | } |
15027 | else |
15028 | { |
15029 | // We have a mix of loop variables, either ones with a clear initializer, or ones without. |
15030 | // Separate the two streams. |
15031 | string expr; |
15032 | |
15033 | for (auto &loop_var : block.loop_variables) |
15034 | { |
15035 | uint32_t static_expr = get<SPIRVariable>(id: loop_var).static_expression; |
15036 | if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) |
15037 | { |
15038 | statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";" ); |
15039 | } |
15040 | else |
15041 | { |
15042 | auto &var = get<SPIRVariable>(id: loop_var); |
15043 | auto &type = get_variable_data_type(var); |
15044 | if (expr.empty()) |
15045 | { |
15046 | // For loop initializers are of the form <type id = value, id = value, id = value, etc ... |
15047 | expr = join(ts: to_qualifiers_glsl(id: var.self), ts: type_to_glsl(type), ts: " " ); |
15048 | } |
15049 | else |
15050 | { |
15051 | expr += ", " ; |
15052 | // In MSL, being based on C++, the asterisk marking a pointer |
15053 | // binds to the identifier, not the type. |
15054 | if (type.pointer) |
15055 | expr += "* " ; |
15056 | } |
15057 | |
15058 | expr += join(ts: to_name(id: loop_var), ts: " = " , ts: to_pointer_expression(id: var.static_expression)); |
15059 | } |
15060 | } |
15061 | return expr; |
15062 | } |
15063 | } |
15064 | |
15065 | bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) |
15066 | { |
15067 | if (block.loop_variables.size() <= 1) |
15068 | return true; |
15069 | |
15070 | uint32_t expected = 0; |
15071 | Bitset expected_flags; |
15072 | for (auto &var : block.loop_variables) |
15073 | { |
15074 | // Don't care about uninitialized variables as they will not be part of the initializers. |
15075 | uint32_t expr = get<SPIRVariable>(id: var).static_expression; |
15076 | if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) |
15077 | continue; |
15078 | |
15079 | if (expected == 0) |
15080 | { |
15081 | expected = get<SPIRVariable>(id: var).basetype; |
15082 | expected_flags = get_decoration_bitset(id: var); |
15083 | } |
15084 | else if (expected != get<SPIRVariable>(id: var).basetype) |
15085 | return false; |
15086 | |
15087 | // Precision flags and things like that must also match. |
15088 | if (expected_flags != get_decoration_bitset(id: var)) |
15089 | return false; |
15090 | } |
15091 | |
15092 | return true; |
15093 | } |
15094 | |
15095 | bool CompilerGLSL::(SPIRBlock &block, SPIRBlock::Method method) |
15096 | { |
15097 | SPIRBlock::ContinueBlockType continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block)); |
15098 | |
15099 | if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) |
15100 | { |
15101 | uint32_t current_count = statement_count; |
15102 | // If we're trying to create a true for loop, |
15103 | // we need to make sure that all opcodes before branch statement do not actually emit any code. |
15104 | // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. |
15105 | emit_block_instructions(block); |
15106 | |
15107 | bool condition_is_temporary = forced_temporaries.find(x: block.condition) == end(cont&: forced_temporaries); |
15108 | |
15109 | // This can work! We only did trivial things which could be forwarded in block body! |
15110 | if (current_count == statement_count && condition_is_temporary) |
15111 | { |
15112 | switch (continue_type) |
15113 | { |
15114 | case SPIRBlock::ForLoop: |
15115 | { |
15116 | // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. |
15117 | flush_undeclared_variables(block); |
15118 | |
15119 | // Important that we do this in this order because |
15120 | // emitting the continue block can invalidate the condition expression. |
15121 | auto initializer = emit_for_loop_initializers(block); |
15122 | auto condition = to_expression(id: block.condition); |
15123 | |
15124 | // Condition might have to be inverted. |
15125 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15126 | condition = join(ts: "!" , ts: enclose_expression(expr: condition)); |
15127 | |
15128 | emit_block_hints(block); |
15129 | if (method != SPIRBlock::MergeToSelectContinueForLoop) |
15130 | { |
15131 | auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false); |
15132 | statement(ts: "for (" , ts&: initializer, ts: "; " , ts&: condition, ts: "; " , ts&: continue_block, ts: ")" ); |
15133 | } |
15134 | else |
15135 | statement(ts: "for (" , ts&: initializer, ts: "; " , ts&: condition, ts: "; )" ); |
15136 | break; |
15137 | } |
15138 | |
15139 | case SPIRBlock::WhileLoop: |
15140 | { |
15141 | // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. |
15142 | flush_undeclared_variables(block); |
15143 | emit_while_loop_initializers(block); |
15144 | emit_block_hints(block); |
15145 | |
15146 | auto condition = to_expression(id: block.condition); |
15147 | // Condition might have to be inverted. |
15148 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15149 | condition = join(ts: "!" , ts: enclose_expression(expr: condition)); |
15150 | |
15151 | statement(ts: "while (" , ts&: condition, ts: ")" ); |
15152 | break; |
15153 | } |
15154 | |
15155 | default: |
15156 | block.disable_block_optimization = true; |
15157 | force_recompile(); |
15158 | begin_scope(); // We'll see an end_scope() later. |
15159 | return false; |
15160 | } |
15161 | |
15162 | begin_scope(); |
15163 | return true; |
15164 | } |
15165 | else |
15166 | { |
15167 | block.disable_block_optimization = true; |
15168 | force_recompile(); |
15169 | begin_scope(); // We'll see an end_scope() later. |
15170 | return false; |
15171 | } |
15172 | } |
15173 | else if (method == SPIRBlock::MergeToDirectForLoop) |
15174 | { |
15175 | auto &child = get<SPIRBlock>(id: block.next_block); |
15176 | |
15177 | // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. |
15178 | flush_undeclared_variables(block&: child); |
15179 | |
15180 | uint32_t current_count = statement_count; |
15181 | |
15182 | // If we're trying to create a true for loop, |
15183 | // we need to make sure that all opcodes before branch statement do not actually emit any code. |
15184 | // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. |
15185 | emit_block_instructions(block&: child); |
15186 | |
15187 | bool condition_is_temporary = forced_temporaries.find(x: child.condition) == end(cont&: forced_temporaries); |
15188 | |
15189 | if (current_count == statement_count && condition_is_temporary) |
15190 | { |
15191 | uint32_t target_block = child.true_block; |
15192 | |
15193 | switch (continue_type) |
15194 | { |
15195 | case SPIRBlock::ForLoop: |
15196 | { |
15197 | // Important that we do this in this order because |
15198 | // emitting the continue block can invalidate the condition expression. |
15199 | auto initializer = emit_for_loop_initializers(block); |
15200 | auto condition = to_expression(id: child.condition); |
15201 | |
15202 | // Condition might have to be inverted. |
15203 | if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15204 | { |
15205 | condition = join(ts: "!" , ts: enclose_expression(expr: condition)); |
15206 | target_block = child.false_block; |
15207 | } |
15208 | |
15209 | auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false); |
15210 | emit_block_hints(block); |
15211 | statement(ts: "for (" , ts&: initializer, ts: "; " , ts&: condition, ts: "; " , ts&: continue_block, ts: ")" ); |
15212 | break; |
15213 | } |
15214 | |
15215 | case SPIRBlock::WhileLoop: |
15216 | { |
15217 | emit_while_loop_initializers(block); |
15218 | emit_block_hints(block); |
15219 | |
15220 | auto condition = to_expression(id: child.condition); |
15221 | // Condition might have to be inverted. |
15222 | if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15223 | { |
15224 | condition = join(ts: "!" , ts: enclose_expression(expr: condition)); |
15225 | target_block = child.false_block; |
15226 | } |
15227 | |
15228 | statement(ts: "while (" , ts&: condition, ts: ")" ); |
15229 | break; |
15230 | } |
15231 | |
15232 | default: |
15233 | block.disable_block_optimization = true; |
15234 | force_recompile(); |
15235 | begin_scope(); // We'll see an end_scope() later. |
15236 | return false; |
15237 | } |
15238 | |
15239 | begin_scope(); |
15240 | branch(from: child.self, to: target_block); |
15241 | return true; |
15242 | } |
15243 | else |
15244 | { |
15245 | block.disable_block_optimization = true; |
15246 | force_recompile(); |
15247 | begin_scope(); // We'll see an end_scope() later. |
15248 | return false; |
15249 | } |
15250 | } |
15251 | else |
15252 | return false; |
15253 | } |
15254 | |
15255 | void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) |
15256 | { |
15257 | for (auto &v : block.dominated_variables) |
15258 | flush_variable_declaration(id: v); |
15259 | } |
15260 | |
15261 | void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries) |
15262 | { |
15263 | // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. |
15264 | // Need to sort these to ensure that reference output is stable. |
15265 | sort(first: begin(cont&: temporaries), last: end(cont&: temporaries), |
15266 | comp: [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; }); |
15267 | |
15268 | for (auto &tmp : temporaries) |
15269 | { |
15270 | auto &type = get<SPIRType>(id: tmp.first); |
15271 | |
15272 | // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries. |
15273 | // This should be ignored unless we're doing actual variable pointers and backend supports it. |
15274 | // Access chains cannot normally be lowered to temporaries in GLSL and HLSL. |
15275 | if (type.pointer && !backend.native_pointers) |
15276 | continue; |
15277 | |
15278 | add_local_variable_name(id: tmp.second); |
15279 | auto &flags = get_decoration_bitset(id: tmp.second); |
15280 | |
15281 | // Not all targets support pointer literals, so don't bother with that case. |
15282 | string initializer; |
15283 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
15284 | initializer = join(ts: " = " , ts: to_zero_initialized_expression(type_id: tmp.first)); |
15285 | |
15286 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: tmp.second)), ts&: initializer, ts: ";" ); |
15287 | |
15288 | hoisted_temporaries.insert(x: tmp.second); |
15289 | forced_temporaries.insert(x: tmp.second); |
15290 | |
15291 | // The temporary might be read from before it's assigned, set up the expression now. |
15292 | set<SPIRExpression>(id: tmp.second, args: to_name(id: tmp.second), args&: tmp.first, args: true); |
15293 | |
15294 | // If we have hoisted temporaries in multi-precision contexts, emit that here too ... |
15295 | // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. |
15296 | auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: tmp.second); |
15297 | if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) |
15298 | { |
15299 | uint32_t mirror_id = mirrored_precision_itr->second; |
15300 | auto &mirror_flags = get_decoration_bitset(id: mirror_id); |
15301 | statement(ts: flags_to_qualifiers_glsl(type, flags: mirror_flags), |
15302 | ts: variable_decl(type, name: to_name(id: mirror_id)), |
15303 | ts&: initializer, ts: ";" ); |
15304 | // The temporary might be read from before it's assigned, set up the expression now. |
15305 | set<SPIRExpression>(id: mirror_id, args: to_name(id: mirror_id), args&: tmp.first, args: true); |
15306 | hoisted_temporaries.insert(x: mirror_id); |
15307 | } |
15308 | } |
15309 | } |
15310 | |
15311 | void CompilerGLSL::emit_block_chain(SPIRBlock &block) |
15312 | { |
15313 | bool select_branch_to_true_block = false; |
15314 | bool select_branch_to_false_block = false; |
15315 | bool skip_direct_branch = false; |
15316 | bool = false; |
15317 | bool force_complex_continue_block = false; |
15318 | ValueSaver<uint32_t> loop_level_saver(current_loop_level); |
15319 | |
15320 | if (block.merge == SPIRBlock::MergeLoop) |
15321 | add_loop_level(); |
15322 | |
15323 | emit_hoisted_temporaries(temporaries&: block.declare_temporary); |
15324 | |
15325 | SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; |
15326 | if (block.continue_block) |
15327 | { |
15328 | continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block)); |
15329 | // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. |
15330 | if (continue_type == SPIRBlock::ComplexLoop) |
15331 | block.complex_continue = true; |
15332 | } |
15333 | |
15334 | // If we have loop variables, stop masking out access to the variable now. |
15335 | for (auto var_id : block.loop_variables) |
15336 | { |
15337 | auto &var = get<SPIRVariable>(id: var_id); |
15338 | var.loop_variable_enable = true; |
15339 | // We're not going to declare the variable directly, so emit a copy here. |
15340 | emit_variable_temporary_copies(var); |
15341 | } |
15342 | |
15343 | // Remember deferred declaration state. We will restore it before returning. |
15344 | SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size()); |
15345 | for (size_t i = 0; i < block.dominated_variables.size(); i++) |
15346 | { |
15347 | uint32_t var_id = block.dominated_variables[i]; |
15348 | auto &var = get<SPIRVariable>(id: var_id); |
15349 | rearm_dominated_variables[i] = var.deferred_declaration; |
15350 | } |
15351 | |
15352 | // This is the method often used by spirv-opt to implement loops. |
15353 | // The loop header goes straight into the continue block. |
15354 | // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, |
15355 | // it *MUST* be used in the continue block. This loop method will not work. |
15356 | if (!is_legacy_es() && block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectContinueForLoop)) |
15357 | { |
15358 | flush_undeclared_variables(block); |
15359 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectContinueForLoop)) |
15360 | { |
15361 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15362 | select_branch_to_false_block = true; |
15363 | else |
15364 | select_branch_to_true_block = true; |
15365 | |
15366 | emitted_loop_header_variables = true; |
15367 | force_complex_continue_block = true; |
15368 | } |
15369 | } |
15370 | // This is the older loop behavior in glslang which branches to loop body directly from the loop header. |
15371 | else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectForLoop)) |
15372 | { |
15373 | flush_undeclared_variables(block); |
15374 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectForLoop)) |
15375 | { |
15376 | // The body of while, is actually just the true (or false) block, so always branch there unconditionally. |
15377 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
15378 | select_branch_to_false_block = true; |
15379 | else |
15380 | select_branch_to_true_block = true; |
15381 | |
15382 | emitted_loop_header_variables = true; |
15383 | } |
15384 | } |
15385 | // This is the newer loop behavior in glslang which branches from Loop header directly to |
15386 | // a new block, which in turn has a OpBranchSelection without a selection merge. |
15387 | else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToDirectForLoop)) |
15388 | { |
15389 | flush_undeclared_variables(block); |
15390 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToDirectForLoop)) |
15391 | { |
15392 | skip_direct_branch = true; |
15393 | emitted_loop_header_variables = true; |
15394 | } |
15395 | } |
15396 | else if (continue_type == SPIRBlock::DoWhileLoop) |
15397 | { |
15398 | flush_undeclared_variables(block); |
15399 | emit_while_loop_initializers(block); |
15400 | emitted_loop_header_variables = true; |
15401 | // We have some temporaries where the loop header is the dominator. |
15402 | // We risk a case where we have code like: |
15403 | // for (;;) { create-temporary; break; } consume-temporary; |
15404 | // so force-declare temporaries here. |
15405 | emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary); |
15406 | statement(ts: "do" ); |
15407 | begin_scope(); |
15408 | |
15409 | emit_block_instructions(block); |
15410 | } |
15411 | else if (block.merge == SPIRBlock::MergeLoop) |
15412 | { |
15413 | flush_undeclared_variables(block); |
15414 | emit_while_loop_initializers(block); |
15415 | emitted_loop_header_variables = true; |
15416 | |
15417 | // We have a generic loop without any distinguishable pattern like for, while or do while. |
15418 | get<SPIRBlock>(id: block.continue_block).complex_continue = true; |
15419 | continue_type = SPIRBlock::ComplexLoop; |
15420 | |
15421 | // We have some temporaries where the loop header is the dominator. |
15422 | // We risk a case where we have code like: |
15423 | // for (;;) { create-temporary; break; } consume-temporary; |
15424 | // so force-declare temporaries here. |
15425 | emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary); |
15426 | emit_block_hints(block); |
15427 | statement(ts: "for (;;)" ); |
15428 | begin_scope(); |
15429 | |
15430 | emit_block_instructions(block); |
15431 | } |
15432 | else |
15433 | { |
15434 | emit_block_instructions(block); |
15435 | } |
15436 | |
15437 | // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem |
15438 | // as writes to said loop variables might have been masked out, we need a recompile. |
15439 | if (!emitted_loop_header_variables && !block.loop_variables.empty()) |
15440 | { |
15441 | force_recompile_guarantee_forward_progress(); |
15442 | for (auto var : block.loop_variables) |
15443 | get<SPIRVariable>(id: var).loop_variable = false; |
15444 | block.loop_variables.clear(); |
15445 | } |
15446 | |
15447 | flush_undeclared_variables(block); |
15448 | bool emit_next_block = true; |
15449 | |
15450 | // Handle end of block. |
15451 | switch (block.terminator) |
15452 | { |
15453 | case SPIRBlock::Direct: |
15454 | // True when emitting complex continue block. |
15455 | if (block.loop_dominator == block.next_block) |
15456 | { |
15457 | branch(from: block.self, to: block.next_block); |
15458 | emit_next_block = false; |
15459 | } |
15460 | // True if MergeToDirectForLoop succeeded. |
15461 | else if (skip_direct_branch) |
15462 | emit_next_block = false; |
15463 | else if (is_continue(next: block.next_block) || is_break(next: block.next_block) || is_conditional(next: block.next_block)) |
15464 | { |
15465 | branch(from: block.self, to: block.next_block); |
15466 | emit_next_block = false; |
15467 | } |
15468 | break; |
15469 | |
15470 | case SPIRBlock::Select: |
15471 | // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. |
15472 | if (select_branch_to_true_block) |
15473 | { |
15474 | if (force_complex_continue_block) |
15475 | { |
15476 | assert(block.true_block == block.continue_block); |
15477 | |
15478 | // We're going to emit a continue block directly here, so make sure it's marked as complex. |
15479 | auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue; |
15480 | bool old_complex = complex_continue; |
15481 | complex_continue = true; |
15482 | branch(from: block.self, to: block.true_block); |
15483 | complex_continue = old_complex; |
15484 | } |
15485 | else |
15486 | branch(from: block.self, to: block.true_block); |
15487 | } |
15488 | else if (select_branch_to_false_block) |
15489 | { |
15490 | if (force_complex_continue_block) |
15491 | { |
15492 | assert(block.false_block == block.continue_block); |
15493 | |
15494 | // We're going to emit a continue block directly here, so make sure it's marked as complex. |
15495 | auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue; |
15496 | bool old_complex = complex_continue; |
15497 | complex_continue = true; |
15498 | branch(from: block.self, to: block.false_block); |
15499 | complex_continue = old_complex; |
15500 | } |
15501 | else |
15502 | branch(from: block.self, to: block.false_block); |
15503 | } |
15504 | else |
15505 | branch(from: block.self, cond: block.condition, true_block: block.true_block, false_block: block.false_block); |
15506 | break; |
15507 | |
15508 | case SPIRBlock::MultiSelect: |
15509 | { |
15510 | auto &type = expression_type(id: block.condition); |
15511 | bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || |
15512 | type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64; |
15513 | |
15514 | if (block.merge == SPIRBlock::MergeNone) |
15515 | SPIRV_CROSS_THROW("Switch statement is not structured" ); |
15516 | |
15517 | if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)) |
15518 | { |
15519 | // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. |
15520 | SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors." ); |
15521 | } |
15522 | |
15523 | const char *label_suffix = "" ; |
15524 | if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) |
15525 | label_suffix = "u" ; |
15526 | else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch) |
15527 | label_suffix = "l" ; |
15528 | else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch) |
15529 | label_suffix = "ul" ; |
15530 | else if (type.basetype == SPIRType::UShort) |
15531 | label_suffix = backend.uint16_t_literal_suffix; |
15532 | else if (type.basetype == SPIRType::Short) |
15533 | label_suffix = backend.int16_t_literal_suffix; |
15534 | |
15535 | SPIRBlock *old_emitting_switch = current_emitting_switch; |
15536 | current_emitting_switch = █ |
15537 | |
15538 | if (block.need_ladder_break) |
15539 | statement(ts: "bool _" , ts&: block.self, ts: "_ladder_break = false;" ); |
15540 | |
15541 | // Find all unique case constructs. |
15542 | unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs; |
15543 | SmallVector<uint32_t> block_declaration_order; |
15544 | SmallVector<uint64_t> literals_to_merge; |
15545 | |
15546 | // If a switch case branches to the default block for some reason, we can just remove that literal from consideration |
15547 | // and let the default: block handle it. |
15548 | // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. |
15549 | // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. |
15550 | auto &cases = get_case_list(block); |
15551 | for (auto &c : cases) |
15552 | { |
15553 | if (c.block != block.next_block && c.block != block.default_block) |
15554 | { |
15555 | if (!case_constructs.count(x: c.block)) |
15556 | block_declaration_order.push_back(t: c.block); |
15557 | case_constructs[c.block].push_back(t: c.value); |
15558 | } |
15559 | else if (c.block == block.next_block && block.default_block != block.next_block) |
15560 | { |
15561 | // We might have to flush phi inside specific case labels. |
15562 | // If we can piggyback on default:, do so instead. |
15563 | literals_to_merge.push_back(t: c.value); |
15564 | } |
15565 | } |
15566 | |
15567 | // Empty literal array -> default. |
15568 | if (block.default_block != block.next_block) |
15569 | { |
15570 | auto &default_block = get<SPIRBlock>(id: block.default_block); |
15571 | |
15572 | // We need to slide in the default block somewhere in this chain |
15573 | // if there are fall-through scenarios since the default is declared separately in OpSwitch. |
15574 | // Only consider trivial fall-through cases here. |
15575 | size_t num_blocks = block_declaration_order.size(); |
15576 | bool injected_block = false; |
15577 | |
15578 | for (size_t i = 0; i < num_blocks; i++) |
15579 | { |
15580 | auto &case_block = get<SPIRBlock>(id: block_declaration_order[i]); |
15581 | if (execution_is_direct_branch(from: case_block, to: default_block)) |
15582 | { |
15583 | // Fallthrough to default block, we must inject the default block here. |
15584 | block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i + 1, value: block.default_block); |
15585 | injected_block = true; |
15586 | break; |
15587 | } |
15588 | else if (execution_is_direct_branch(from: default_block, to: case_block)) |
15589 | { |
15590 | // Default case is falling through to another case label, we must inject the default block here. |
15591 | block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i, value: block.default_block); |
15592 | injected_block = true; |
15593 | break; |
15594 | } |
15595 | } |
15596 | |
15597 | // Order does not matter. |
15598 | if (!injected_block) |
15599 | block_declaration_order.push_back(t: block.default_block); |
15600 | else if (is_legacy_es()) |
15601 | SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0." ); |
15602 | |
15603 | case_constructs[block.default_block] = {}; |
15604 | } |
15605 | |
15606 | size_t num_blocks = block_declaration_order.size(); |
15607 | |
15608 | const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string |
15609 | { |
15610 | if (is_unsigned_case) |
15611 | return convert_to_string(t: literal); |
15612 | |
15613 | // For smaller cases, the literals are compiled as 32 bit wide |
15614 | // literals so we don't need to care for all sizes specifically. |
15615 | if (width <= 32) |
15616 | { |
15617 | return convert_to_string(t: int64_t(int32_t(literal))); |
15618 | } |
15619 | |
15620 | return convert_to_string(t: int64_t(literal)); |
15621 | }; |
15622 | |
15623 | const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels, |
15624 | const char *suffix) -> string { |
15625 | string ret; |
15626 | size_t count = labels.size(); |
15627 | for (size_t i = 0; i < count; i++) |
15628 | { |
15629 | if (i) |
15630 | ret += " || " ; |
15631 | ret += join(ts: count > 1 ? "(" : "" , ts: to_enclosed_expression(id: condition), ts: " == " , ts: labels[i], ts&: suffix, |
15632 | ts: count > 1 ? ")" : "" ); |
15633 | } |
15634 | return ret; |
15635 | }; |
15636 | |
15637 | // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, |
15638 | // we need to flush phi nodes outside the switch block in a branch, |
15639 | // and skip any Phi handling inside the case label to make fall-through work as expected. |
15640 | // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this |
15641 | // inside the case label if at all possible. |
15642 | for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) |
15643 | { |
15644 | if (flush_phi_required(from: block.self, to: block_declaration_order[i]) && |
15645 | flush_phi_required(from: block_declaration_order[i - 1], to: block_declaration_order[i])) |
15646 | { |
15647 | uint32_t target_block = block_declaration_order[i]; |
15648 | |
15649 | // Make sure we flush Phi, it might have been marked to be ignored earlier. |
15650 | get<SPIRBlock>(id: target_block).ignore_phi_from_block = 0; |
15651 | |
15652 | auto &literals = case_constructs[target_block]; |
15653 | |
15654 | if (literals.empty()) |
15655 | { |
15656 | // Oh boy, gotta make a complete negative test instead! o.o |
15657 | // Find all possible literals that would *not* make us enter the default block. |
15658 | // If none of those literals match, we flush Phi ... |
15659 | SmallVector<string> conditions; |
15660 | for (size_t j = 0; j < num_blocks; j++) |
15661 | { |
15662 | auto &negative_literals = case_constructs[block_declaration_order[j]]; |
15663 | for (auto &case_label : negative_literals) |
15664 | conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition), |
15665 | ts: " != " , ts: to_case_label(case_label, type.width, unsigned_case))); |
15666 | } |
15667 | |
15668 | statement(ts: "if (" , ts: merge(list: conditions, between: " && " ), ts: ")" ); |
15669 | begin_scope(); |
15670 | flush_phi(from: block.self, to: target_block); |
15671 | end_scope(); |
15672 | } |
15673 | else |
15674 | { |
15675 | SmallVector<string> conditions; |
15676 | conditions.reserve(count: literals.size()); |
15677 | for (auto &case_label : literals) |
15678 | conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition), |
15679 | ts: " == " , ts: to_case_label(case_label, type.width, unsigned_case))); |
15680 | statement(ts: "if (" , ts: merge(list: conditions, between: " || " ), ts: ")" ); |
15681 | begin_scope(); |
15682 | flush_phi(from: block.self, to: target_block); |
15683 | end_scope(); |
15684 | } |
15685 | |
15686 | // Mark the block so that we don't flush Phi from header to case label. |
15687 | get<SPIRBlock>(id: target_block).ignore_phi_from_block = block.self; |
15688 | } |
15689 | } |
15690 | |
15691 | // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate |
15692 | // non-structured exits with the help of a switch block. |
15693 | // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. |
15694 | bool degenerate_switch = block.default_block != block.merge_block && cases.empty(); |
15695 | |
15696 | if (degenerate_switch || is_legacy_es()) |
15697 | { |
15698 | // ESSL 1.0 is not guaranteed to support do/while. |
15699 | if (is_legacy_es()) |
15700 | { |
15701 | uint32_t counter = statement_count; |
15702 | statement(ts: "for (int spvDummy" , ts&: counter, ts: " = 0; spvDummy" , ts&: counter, |
15703 | ts: " < 1; spvDummy" , ts&: counter, ts: "++)" ); |
15704 | } |
15705 | else |
15706 | statement(ts: "do" ); |
15707 | } |
15708 | else |
15709 | { |
15710 | emit_block_hints(block); |
15711 | statement(ts: "switch (" , ts: to_unpacked_expression(id: block.condition), ts: ")" ); |
15712 | } |
15713 | begin_scope(); |
15714 | |
15715 | for (size_t i = 0; i < num_blocks; i++) |
15716 | { |
15717 | uint32_t target_block = block_declaration_order[i]; |
15718 | auto &literals = case_constructs[target_block]; |
15719 | |
15720 | if (literals.empty()) |
15721 | { |
15722 | // Default case. |
15723 | if (!degenerate_switch) |
15724 | { |
15725 | if (is_legacy_es()) |
15726 | statement(ts: "else" ); |
15727 | else |
15728 | statement(ts: "default:" ); |
15729 | } |
15730 | } |
15731 | else |
15732 | { |
15733 | if (is_legacy_es()) |
15734 | { |
15735 | statement(ts: (i ? "else " : "" ), ts: "if (" , ts: to_legacy_case_label(block.condition, literals, label_suffix), |
15736 | ts: ")" ); |
15737 | } |
15738 | else |
15739 | { |
15740 | for (auto &case_literal : literals) |
15741 | { |
15742 | // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. |
15743 | statement(ts: "case " , ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":" ); |
15744 | } |
15745 | } |
15746 | } |
15747 | |
15748 | auto &case_block = get<SPIRBlock>(id: target_block); |
15749 | if (backend.support_case_fallthrough && i + 1 < num_blocks && |
15750 | execution_is_direct_branch(from: case_block, to: get<SPIRBlock>(id: block_declaration_order[i + 1]))) |
15751 | { |
15752 | // We will fall through here, so just terminate the block chain early. |
15753 | // We still need to deal with Phi potentially. |
15754 | // No need for a stack-like thing here since we only do fall-through when there is a |
15755 | // single trivial branch to fall-through target.. |
15756 | current_emitting_switch_fallthrough = true; |
15757 | } |
15758 | else |
15759 | current_emitting_switch_fallthrough = false; |
15760 | |
15761 | if (!degenerate_switch) |
15762 | begin_scope(); |
15763 | branch(from: block.self, to: target_block); |
15764 | if (!degenerate_switch) |
15765 | end_scope(); |
15766 | |
15767 | current_emitting_switch_fallthrough = false; |
15768 | } |
15769 | |
15770 | // Might still have to flush phi variables if we branch from loop header directly to merge target. |
15771 | // This is supposed to emit all cases where we branch from header to merge block directly. |
15772 | // There are two main scenarios where cannot rely on default fallthrough. |
15773 | // - There is an explicit default: label already. |
15774 | // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block. |
15775 | // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. |
15776 | bool = flush_phi_required(from: block.self, to: block.next_block); |
15777 | bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); |
15778 | if ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()) |
15779 | { |
15780 | for (auto &case_literal : literals_to_merge) |
15781 | statement(ts: "case " , ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":" ); |
15782 | |
15783 | if (block.default_block == block.next_block) |
15784 | { |
15785 | if (is_legacy_es()) |
15786 | statement(ts: "else" ); |
15787 | else |
15788 | statement(ts: "default:" ); |
15789 | } |
15790 | |
15791 | begin_scope(); |
15792 | flush_phi(from: block.self, to: block.next_block); |
15793 | statement(ts: "break;" ); |
15794 | end_scope(); |
15795 | } |
15796 | |
15797 | if (degenerate_switch && !is_legacy_es()) |
15798 | end_scope_decl(decl: "while(false)" ); |
15799 | else |
15800 | end_scope(); |
15801 | |
15802 | if (block.need_ladder_break) |
15803 | { |
15804 | statement(ts: "if (_" , ts&: block.self, ts: "_ladder_break)" ); |
15805 | begin_scope(); |
15806 | statement(ts: "break;" ); |
15807 | end_scope(); |
15808 | } |
15809 | |
15810 | current_emitting_switch = old_emitting_switch; |
15811 | break; |
15812 | } |
15813 | |
15814 | case SPIRBlock::Return: |
15815 | { |
15816 | for (auto &line : current_function->fixup_hooks_out) |
15817 | line(); |
15818 | |
15819 | if (processing_entry_point) |
15820 | emit_fixup(); |
15821 | |
15822 | auto &cfg = get_cfg_for_current_function(); |
15823 | |
15824 | if (block.return_value) |
15825 | { |
15826 | auto &type = expression_type(id: block.return_value); |
15827 | if (!type.array.empty() && !backend.can_return_array) |
15828 | { |
15829 | // If we cannot return arrays, we will have a special out argument we can write to instead. |
15830 | // The backend is responsible for setting this up, and redirection the return values as appropriate. |
15831 | if (ir.ids[block.return_value].get_type() != TypeUndef) |
15832 | { |
15833 | emit_array_copy(lhs: "spvReturnValue" , lhs_id: 0, rhs_id: block.return_value, lhs_storage: StorageClassFunction, |
15834 | rhs_storage: get_expression_effective_storage_class(ptr: block.return_value)); |
15835 | } |
15836 | |
15837 | if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) || |
15838 | block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
15839 | { |
15840 | statement(ts: "return;" ); |
15841 | } |
15842 | } |
15843 | else |
15844 | { |
15845 | // OpReturnValue can return Undef, so don't emit anything for this case. |
15846 | if (ir.ids[block.return_value].get_type() != TypeUndef) |
15847 | statement(ts: "return " , ts: to_unpacked_expression(id: block.return_value), ts: ";" ); |
15848 | } |
15849 | } |
15850 | else if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) || |
15851 | block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
15852 | { |
15853 | // If this block is the very final block and not called from control flow, |
15854 | // we do not need an explicit return which looks out of place. Just end the function here. |
15855 | // In the very weird case of for(;;) { return; } executing return is unconditional, |
15856 | // but we actually need a return here ... |
15857 | statement(ts: "return;" ); |
15858 | } |
15859 | break; |
15860 | } |
15861 | |
15862 | // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement. |
15863 | case SPIRBlock::Kill: |
15864 | statement(ts&: backend.discard_literal, ts: ";" ); |
15865 | if (block.return_value) |
15866 | statement(ts: "return " , ts: to_unpacked_expression(id: block.return_value), ts: ";" ); |
15867 | break; |
15868 | |
15869 | case SPIRBlock::Unreachable: |
15870 | { |
15871 | // Avoid emitting false fallthrough, which can happen for |
15872 | // if (cond) break; else discard; inside a case label. |
15873 | // Discard is not always implementable as a terminator. |
15874 | |
15875 | auto &cfg = get_cfg_for_current_function(); |
15876 | bool inner_dominator_is_switch = false; |
15877 | ID id = block.self; |
15878 | |
15879 | while (id) |
15880 | { |
15881 | auto &iter_block = get<SPIRBlock>(id); |
15882 | if (iter_block.terminator == SPIRBlock::MultiSelect || |
15883 | iter_block.merge == SPIRBlock::MergeLoop) |
15884 | { |
15885 | ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? |
15886 | iter_block.merge_block : iter_block.next_block; |
15887 | bool outside_construct = next_block && cfg.find_common_dominator(a: next_block, b: block.self) == next_block; |
15888 | if (!outside_construct) |
15889 | { |
15890 | inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect; |
15891 | break; |
15892 | } |
15893 | } |
15894 | |
15895 | if (cfg.get_preceding_edges(block: id).empty()) |
15896 | break; |
15897 | |
15898 | id = cfg.get_immediate_dominator(block: id); |
15899 | } |
15900 | |
15901 | if (inner_dominator_is_switch) |
15902 | statement(ts: "break; // unreachable workaround" ); |
15903 | |
15904 | emit_next_block = false; |
15905 | break; |
15906 | } |
15907 | |
15908 | case SPIRBlock::IgnoreIntersection: |
15909 | statement(ts: "ignoreIntersectionEXT;" ); |
15910 | break; |
15911 | |
15912 | case SPIRBlock::TerminateRay: |
15913 | statement(ts: "terminateRayEXT;" ); |
15914 | break; |
15915 | |
15916 | default: |
15917 | SPIRV_CROSS_THROW("Unimplemented block terminator." ); |
15918 | } |
15919 | |
15920 | if (block.next_block && emit_next_block) |
15921 | { |
15922 | // If we hit this case, we're dealing with an unconditional branch, which means we will output |
15923 | // that block after this. If we had selection merge, we already flushed phi variables. |
15924 | if (block.merge != SPIRBlock::MergeSelection) |
15925 | { |
15926 | flush_phi(from: block.self, to: block.next_block); |
15927 | // For a direct branch, need to remember to invalidate expressions in the next linear block instead. |
15928 | get<SPIRBlock>(id: block.next_block).invalidate_expressions = block.invalidate_expressions; |
15929 | } |
15930 | |
15931 | // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. |
15932 | if (!current_emitting_switch_fallthrough) |
15933 | { |
15934 | // For merge selects we might have ignored the fact that a merge target |
15935 | // could have been a break; or continue; |
15936 | // We will need to deal with it here. |
15937 | if (is_loop_break(next: block.next_block)) |
15938 | { |
15939 | // Cannot check for just break, because switch statements will also use break. |
15940 | assert(block.merge == SPIRBlock::MergeSelection); |
15941 | statement(ts: "break;" ); |
15942 | } |
15943 | else if (is_continue(next: block.next_block)) |
15944 | { |
15945 | assert(block.merge == SPIRBlock::MergeSelection); |
15946 | branch_to_continue(from: block.self, to: block.next_block); |
15947 | } |
15948 | else if (BlockID(block.self) != block.next_block) |
15949 | emit_block_chain(block&: get<SPIRBlock>(id: block.next_block)); |
15950 | } |
15951 | } |
15952 | |
15953 | if (block.merge == SPIRBlock::MergeLoop) |
15954 | { |
15955 | if (continue_type == SPIRBlock::DoWhileLoop) |
15956 | { |
15957 | // Make sure that we run the continue block to get the expressions set, but this |
15958 | // should become an empty string. |
15959 | // We have no fallbacks if we cannot forward everything to temporaries ... |
15960 | const auto &continue_block = get<SPIRBlock>(id: block.continue_block); |
15961 | bool positive_test = execution_is_noop(from: get<SPIRBlock>(id: continue_block.true_block), |
15962 | to: get<SPIRBlock>(id: continue_block.loop_dominator)); |
15963 | |
15964 | uint32_t current_count = statement_count; |
15965 | auto statements = emit_continue_block(continue_block: block.continue_block, follow_true_block: positive_test, follow_false_block: !positive_test); |
15966 | if (statement_count != current_count) |
15967 | { |
15968 | // The DoWhile block has side effects, force ComplexLoop pattern next pass. |
15969 | get<SPIRBlock>(id: block.continue_block).complex_continue = true; |
15970 | force_recompile(); |
15971 | } |
15972 | |
15973 | // Might have to invert the do-while test here. |
15974 | auto condition = to_expression(id: continue_block.condition); |
15975 | if (!positive_test) |
15976 | condition = join(ts: "!" , ts: enclose_expression(expr: condition)); |
15977 | |
15978 | end_scope_decl(decl: join(ts: "while (" , ts&: condition, ts: ")" )); |
15979 | } |
15980 | else |
15981 | end_scope(); |
15982 | |
15983 | loop_level_saver.release(); |
15984 | |
15985 | // We cannot break out of two loops at once, so don't check for break; here. |
15986 | // Using block.self as the "from" block isn't quite right, but it has the same scope |
15987 | // and dominance structure, so it's fine. |
15988 | if (is_continue(next: block.merge_block)) |
15989 | branch_to_continue(from: block.self, to: block.merge_block); |
15990 | else |
15991 | emit_block_chain(block&: get<SPIRBlock>(id: block.merge_block)); |
15992 | } |
15993 | |
15994 | // Forget about control dependent expressions now. |
15995 | block.invalidate_expressions.clear(); |
15996 | |
15997 | // After we return, we must be out of scope, so if we somehow have to re-emit this function, |
15998 | // re-declare variables if necessary. |
15999 | assert(rearm_dominated_variables.size() == block.dominated_variables.size()); |
16000 | for (size_t i = 0; i < block.dominated_variables.size(); i++) |
16001 | { |
16002 | uint32_t var = block.dominated_variables[i]; |
16003 | get<SPIRVariable>(id: var).deferred_declaration = rearm_dominated_variables[i]; |
16004 | } |
16005 | |
16006 | // Just like for deferred declaration, we need to forget about loop variable enable |
16007 | // if our block chain is reinstantiated later. |
16008 | for (auto &var_id : block.loop_variables) |
16009 | get<SPIRVariable>(id: var_id).loop_variable_enable = false; |
16010 | } |
16011 | |
16012 | void CompilerGLSL::begin_scope() |
16013 | { |
16014 | statement(ts: "{" ); |
16015 | indent++; |
16016 | } |
16017 | |
16018 | void CompilerGLSL::end_scope() |
16019 | { |
16020 | if (!indent) |
16021 | SPIRV_CROSS_THROW("Popping empty indent stack." ); |
16022 | indent--; |
16023 | statement(ts: "}" ); |
16024 | } |
16025 | |
16026 | void CompilerGLSL::end_scope(const string &trailer) |
16027 | { |
16028 | if (!indent) |
16029 | SPIRV_CROSS_THROW("Popping empty indent stack." ); |
16030 | indent--; |
16031 | statement(ts: "}" , ts: trailer); |
16032 | } |
16033 | |
16034 | void CompilerGLSL::end_scope_decl() |
16035 | { |
16036 | if (!indent) |
16037 | SPIRV_CROSS_THROW("Popping empty indent stack." ); |
16038 | indent--; |
16039 | statement(ts: "};" ); |
16040 | } |
16041 | |
16042 | void CompilerGLSL::end_scope_decl(const string &decl) |
16043 | { |
16044 | if (!indent) |
16045 | SPIRV_CROSS_THROW("Popping empty indent stack." ); |
16046 | indent--; |
16047 | statement(ts: "} " , ts: decl, ts: ";" ); |
16048 | } |
16049 | |
16050 | void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) |
16051 | { |
16052 | // If our variable is remapped, and we rely on type-remapping information as |
16053 | // well, then we cannot pass the variable as a function parameter. |
16054 | // Fixing this is non-trivial without stamping out variants of the same function, |
16055 | // so for now warn about this and suggest workarounds instead. |
16056 | for (uint32_t i = 0; i < length; i++) |
16057 | { |
16058 | auto *var = maybe_get<SPIRVariable>(id: args[i]); |
16059 | if (!var || !var->remapped_variable) |
16060 | continue; |
16061 | |
16062 | auto &type = get<SPIRType>(id: var->basetype); |
16063 | if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) |
16064 | { |
16065 | SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " |
16066 | "This will not work correctly because type-remapping information is lost. " |
16067 | "To workaround, please consider not passing the subpass input as a function parameter, " |
16068 | "or use in/out variables instead which do not need type remapping information." ); |
16069 | } |
16070 | } |
16071 | } |
16072 | |
16073 | const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) |
16074 | { |
16075 | // FIXME: This is kind of hacky. There should be a cleaner way. |
16076 | auto offset = uint32_t(&instr - current_emitting_block->ops.data()); |
16077 | if ((offset + 1) < current_emitting_block->ops.size()) |
16078 | return ¤t_emitting_block->ops[offset + 1]; |
16079 | else |
16080 | return nullptr; |
16081 | } |
16082 | |
16083 | uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) |
16084 | { |
16085 | return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | |
16086 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | |
16087 | MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); |
16088 | } |
16089 | |
16090 | void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass) |
16091 | { |
16092 | statement(ts: lhs, ts: " = " , ts: to_expression(id: rhs_id), ts: ";" ); |
16093 | } |
16094 | |
16095 | bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id) |
16096 | { |
16097 | if (!backend.force_gl_in_out_block) |
16098 | return false; |
16099 | // This path is only relevant for GL backends. |
16100 | |
16101 | auto *var = maybe_get<SPIRVariable>(id: target_id); |
16102 | if (!var || var->storage != StorageClassOutput) |
16103 | return false; |
16104 | |
16105 | if (!is_builtin_variable(var: *var) || BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)) != BuiltInSampleMask) |
16106 | return false; |
16107 | |
16108 | auto &type = expression_type(id: source_id); |
16109 | string array_expr; |
16110 | if (type.array_size_literal.back()) |
16111 | { |
16112 | array_expr = convert_to_string(t: type.array.back()); |
16113 | if (type.array.back() == 0) |
16114 | SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array." ); |
16115 | } |
16116 | else |
16117 | array_expr = to_expression(id: type.array.back()); |
16118 | |
16119 | SPIRType target_type; |
16120 | target_type.basetype = SPIRType::Int; |
16121 | |
16122 | statement(ts: "for (int i = 0; i < int(" , ts&: array_expr, ts: "); i++)" ); |
16123 | begin_scope(); |
16124 | statement(ts: to_expression(id: target_id), ts: "[i] = " , |
16125 | ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts: to_expression(id: source_id), ts: "[i]" )), |
16126 | ts: ";" ); |
16127 | end_scope(); |
16128 | |
16129 | return true; |
16130 | } |
16131 | |
16132 | void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) |
16133 | { |
16134 | if (!backend.force_gl_in_out_block) |
16135 | return; |
16136 | // This path is only relevant for GL backends. |
16137 | |
16138 | auto *var = maybe_get<SPIRVariable>(id: source_id); |
16139 | if (!var) |
16140 | return; |
16141 | |
16142 | if (var->storage != StorageClassInput && var->storage != StorageClassOutput) |
16143 | return; |
16144 | |
16145 | auto &type = get_variable_data_type(var: *var); |
16146 | if (type.array.empty()) |
16147 | return; |
16148 | |
16149 | auto builtin = BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)); |
16150 | bool is_builtin = is_builtin_variable(var: *var) && |
16151 | (builtin == BuiltInPointSize || |
16152 | builtin == BuiltInPosition || |
16153 | builtin == BuiltInSampleMask); |
16154 | bool is_tess = is_tessellation_shader(); |
16155 | bool is_patch = has_decoration(id: var->self, decoration: DecorationPatch); |
16156 | bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; |
16157 | |
16158 | // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. |
16159 | // We must unroll the array load. |
16160 | // For builtins, we couldn't catch this case normally, |
16161 | // because this is resolved in the OpAccessChain in most cases. |
16162 | // If we load the entire array, we have no choice but to unroll here. |
16163 | if (!is_patch && (is_builtin || is_tess)) |
16164 | { |
16165 | auto new_expr = join(ts: "_" , ts&: target_id, ts: "_unrolled" ); |
16166 | statement(ts: variable_decl(type, name: new_expr, id: target_id), ts: ";" ); |
16167 | string array_expr; |
16168 | if (type.array_size_literal.back()) |
16169 | { |
16170 | array_expr = convert_to_string(t: type.array.back()); |
16171 | if (type.array.back() == 0) |
16172 | SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array." ); |
16173 | } |
16174 | else |
16175 | array_expr = to_expression(id: type.array.back()); |
16176 | |
16177 | // The array size might be a specialization constant, so use a for-loop instead. |
16178 | statement(ts: "for (int i = 0; i < int(" , ts&: array_expr, ts: "); i++)" ); |
16179 | begin_scope(); |
16180 | if (is_builtin && !is_sample_mask) |
16181 | statement(ts&: new_expr, ts: "[i] = gl_in[i]." , ts&: expr, ts: ";" ); |
16182 | else if (is_sample_mask) |
16183 | { |
16184 | SPIRType target_type; |
16185 | target_type.basetype = SPIRType::Int; |
16186 | statement(ts&: new_expr, ts: "[i] = " , ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts&: expr, ts: "[i]" )), ts: ";" ); |
16187 | } |
16188 | else |
16189 | statement(ts&: new_expr, ts: "[i] = " , ts&: expr, ts: "[i];" ); |
16190 | end_scope(); |
16191 | |
16192 | expr = std::move(new_expr); |
16193 | } |
16194 | } |
16195 | |
16196 | void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) |
16197 | { |
16198 | // We will handle array cases elsewhere. |
16199 | if (!expr_type.array.empty()) |
16200 | return; |
16201 | |
16202 | auto *var = maybe_get_backing_variable(chain: source_id); |
16203 | if (var) |
16204 | source_id = var->self; |
16205 | |
16206 | // Only interested in standalone builtin variables. |
16207 | if (!has_decoration(id: source_id, decoration: DecorationBuiltIn)) |
16208 | return; |
16209 | |
16210 | auto builtin = static_cast<BuiltIn>(get_decoration(id: source_id, decoration: DecorationBuiltIn)); |
16211 | auto expected_type = expr_type.basetype; |
16212 | |
16213 | // TODO: Fill in for more builtins. |
16214 | switch (builtin) |
16215 | { |
16216 | case BuiltInLayer: |
16217 | case BuiltInPrimitiveId: |
16218 | case BuiltInViewportIndex: |
16219 | case BuiltInInstanceId: |
16220 | case BuiltInInstanceIndex: |
16221 | case BuiltInVertexId: |
16222 | case BuiltInVertexIndex: |
16223 | case BuiltInSampleId: |
16224 | case BuiltInBaseVertex: |
16225 | case BuiltInBaseInstance: |
16226 | case BuiltInDrawIndex: |
16227 | case BuiltInFragStencilRefEXT: |
16228 | case BuiltInInstanceCustomIndexNV: |
16229 | case BuiltInSampleMask: |
16230 | case BuiltInPrimitiveShadingRateKHR: |
16231 | case BuiltInShadingRateKHR: |
16232 | expected_type = SPIRType::Int; |
16233 | break; |
16234 | |
16235 | case BuiltInGlobalInvocationId: |
16236 | case BuiltInLocalInvocationId: |
16237 | case BuiltInWorkgroupId: |
16238 | case BuiltInLocalInvocationIndex: |
16239 | case BuiltInWorkgroupSize: |
16240 | case BuiltInNumWorkgroups: |
16241 | case BuiltInIncomingRayFlagsNV: |
16242 | case BuiltInLaunchIdNV: |
16243 | case BuiltInLaunchSizeNV: |
16244 | expected_type = SPIRType::UInt; |
16245 | break; |
16246 | |
16247 | default: |
16248 | break; |
16249 | } |
16250 | |
16251 | if (expected_type != expr_type.basetype) |
16252 | expr = bitcast_expression(target_type: expr_type, expr_type: expected_type, expr); |
16253 | } |
16254 | |
16255 | void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) |
16256 | { |
16257 | auto *var = maybe_get_backing_variable(chain: target_id); |
16258 | if (var) |
16259 | target_id = var->self; |
16260 | |
16261 | // Only interested in standalone builtin variables. |
16262 | if (!has_decoration(id: target_id, decoration: DecorationBuiltIn)) |
16263 | return; |
16264 | |
16265 | auto builtin = static_cast<BuiltIn>(get_decoration(id: target_id, decoration: DecorationBuiltIn)); |
16266 | auto expected_type = expr_type.basetype; |
16267 | |
16268 | // TODO: Fill in for more builtins. |
16269 | switch (builtin) |
16270 | { |
16271 | case BuiltInLayer: |
16272 | case BuiltInPrimitiveId: |
16273 | case BuiltInViewportIndex: |
16274 | case BuiltInFragStencilRefEXT: |
16275 | case BuiltInSampleMask: |
16276 | case BuiltInPrimitiveShadingRateKHR: |
16277 | case BuiltInShadingRateKHR: |
16278 | expected_type = SPIRType::Int; |
16279 | break; |
16280 | |
16281 | default: |
16282 | break; |
16283 | } |
16284 | |
16285 | if (expected_type != expr_type.basetype) |
16286 | { |
16287 | auto type = expr_type; |
16288 | type.basetype = expected_type; |
16289 | expr = bitcast_expression(target_type: type, expr_type: expr_type.basetype, expr); |
16290 | } |
16291 | } |
16292 | |
16293 | void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) |
16294 | { |
16295 | if (*backend.nonuniform_qualifier == '\0') |
16296 | return; |
16297 | |
16298 | auto *var = maybe_get_backing_variable(chain: ptr_id); |
16299 | if (!var) |
16300 | return; |
16301 | |
16302 | if (var->storage != StorageClassUniformConstant && |
16303 | var->storage != StorageClassStorageBuffer && |
16304 | var->storage != StorageClassUniform) |
16305 | return; |
16306 | |
16307 | auto &backing_type = get<SPIRType>(id: var->basetype); |
16308 | if (backing_type.array.empty()) |
16309 | return; |
16310 | |
16311 | // If we get here, we know we're accessing an arrayed resource which |
16312 | // might require nonuniform qualifier. |
16313 | |
16314 | auto start_array_index = expr.find_first_of(c: '['); |
16315 | |
16316 | if (start_array_index == string::npos) |
16317 | return; |
16318 | |
16319 | // We've opened a bracket, track expressions until we can close the bracket. |
16320 | // This must be our resource index. |
16321 | size_t end_array_index = string::npos; |
16322 | unsigned bracket_count = 1; |
16323 | for (size_t index = start_array_index + 1; index < expr.size(); index++) |
16324 | { |
16325 | if (expr[index] == ']') |
16326 | { |
16327 | if (--bracket_count == 0) |
16328 | { |
16329 | end_array_index = index; |
16330 | break; |
16331 | } |
16332 | } |
16333 | else if (expr[index] == '[') |
16334 | bracket_count++; |
16335 | } |
16336 | |
16337 | assert(bracket_count == 0); |
16338 | |
16339 | // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's |
16340 | // nothing we can do here to express that. |
16341 | if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) |
16342 | return; |
16343 | |
16344 | start_array_index++; |
16345 | |
16346 | expr = join(ts: expr.substr(pos: 0, n: start_array_index), ts&: backend.nonuniform_qualifier, ts: "(" , |
16347 | ts: expr.substr(pos: start_array_index, n: end_array_index - start_array_index), ts: ")" , |
16348 | ts: expr.substr(pos: end_array_index, n: string::npos)); |
16349 | } |
16350 | |
16351 | void CompilerGLSL::emit_block_hints(const SPIRBlock &block) |
16352 | { |
16353 | if ((options.es && options.version < 310) || (!options.es && options.version < 140)) |
16354 | return; |
16355 | |
16356 | switch (block.hint) |
16357 | { |
16358 | case SPIRBlock::HintFlatten: |
16359 | require_extension_internal(ext: "GL_EXT_control_flow_attributes" ); |
16360 | statement(ts: "SPIRV_CROSS_FLATTEN" ); |
16361 | break; |
16362 | case SPIRBlock::HintDontFlatten: |
16363 | require_extension_internal(ext: "GL_EXT_control_flow_attributes" ); |
16364 | statement(ts: "SPIRV_CROSS_BRANCH" ); |
16365 | break; |
16366 | case SPIRBlock::HintUnroll: |
16367 | require_extension_internal(ext: "GL_EXT_control_flow_attributes" ); |
16368 | statement(ts: "SPIRV_CROSS_UNROLL" ); |
16369 | break; |
16370 | case SPIRBlock::HintDontUnroll: |
16371 | require_extension_internal(ext: "GL_EXT_control_flow_attributes" ); |
16372 | statement(ts: "SPIRV_CROSS_LOOP" ); |
16373 | break; |
16374 | default: |
16375 | break; |
16376 | } |
16377 | } |
16378 | |
16379 | void CompilerGLSL::preserve_alias_on_reset(uint32_t id) |
16380 | { |
16381 | preserved_aliases[id] = get_name(id); |
16382 | } |
16383 | |
16384 | void CompilerGLSL::reset_name_caches() |
16385 | { |
16386 | for (auto &preserved : preserved_aliases) |
16387 | set_name(id: preserved.first, name: preserved.second); |
16388 | |
16389 | preserved_aliases.clear(); |
16390 | resource_names.clear(); |
16391 | block_input_names.clear(); |
16392 | block_output_names.clear(); |
16393 | block_ubo_names.clear(); |
16394 | block_ssbo_names.clear(); |
16395 | block_names.clear(); |
16396 | function_overloads.clear(); |
16397 | } |
16398 | |
16399 | void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type) |
16400 | { |
16401 | if (visited.count(x: type.self)) |
16402 | return; |
16403 | visited.insert(x: type.self); |
16404 | |
16405 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
16406 | { |
16407 | auto &mbr_type = get<SPIRType>(id: type.member_types[i]); |
16408 | |
16409 | if (mbr_type.basetype == SPIRType::Struct) |
16410 | { |
16411 | // If there are multiple aliases, the output might be somewhat unpredictable, |
16412 | // but the only real alternative in that case is to do nothing, which isn't any better. |
16413 | // This check should be fine in practice. |
16414 | if (get_name(id: mbr_type.self).empty() && !get_member_name(id: type.self, index: i).empty()) |
16415 | { |
16416 | auto anon_name = join(ts: "anon_" , ts: get_member_name(id: type.self, index: i)); |
16417 | ParsedIR::sanitize_underscores(str&: anon_name); |
16418 | set_name(id: mbr_type.self, name: anon_name); |
16419 | } |
16420 | |
16421 | fixup_anonymous_struct_names(visited, type: mbr_type); |
16422 | } |
16423 | } |
16424 | } |
16425 | |
16426 | void CompilerGLSL::fixup_anonymous_struct_names() |
16427 | { |
16428 | // HLSL codegen can often end up emitting anonymous structs inside blocks, which |
16429 | // breaks GL linking since all names must match ... |
16430 | // Try to emit sensible code, so attempt to find such structs and emit anon_$member. |
16431 | |
16432 | // Breaks exponential explosion with weird type trees. |
16433 | std::unordered_set<uint32_t> visited; |
16434 | |
16435 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) { |
16436 | if (type.basetype == SPIRType::Struct && |
16437 | (has_decoration(id: type.self, decoration: DecorationBlock) || |
16438 | has_decoration(id: type.self, decoration: DecorationBufferBlock))) |
16439 | { |
16440 | fixup_anonymous_struct_names(visited, type); |
16441 | } |
16442 | }); |
16443 | } |
16444 | |
16445 | void CompilerGLSL::fixup_type_alias() |
16446 | { |
16447 | // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. |
16448 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) { |
16449 | if (!type.type_alias) |
16450 | return; |
16451 | |
16452 | if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock)) |
16453 | { |
16454 | // Top-level block types should never alias anything else. |
16455 | type.type_alias = 0; |
16456 | } |
16457 | else if (type_is_block_like(type) && type.self == ID(self)) |
16458 | { |
16459 | // A block-like type is any type which contains Offset decoration, but not top-level blocks, |
16460 | // i.e. blocks which are placed inside buffers. |
16461 | // Become the master. |
16462 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t other_id, SPIRType &other_type) { |
16463 | if (other_id == self) |
16464 | return; |
16465 | |
16466 | if (other_type.type_alias == type.type_alias) |
16467 | other_type.type_alias = self; |
16468 | }); |
16469 | |
16470 | this->get<SPIRType>(id: type.type_alias).type_alias = self; |
16471 | type.type_alias = 0; |
16472 | } |
16473 | }); |
16474 | } |
16475 | |
16476 | void CompilerGLSL::reorder_type_alias() |
16477 | { |
16478 | // Reorder declaration of types so that the master of the type alias is always emitted first. |
16479 | // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which |
16480 | // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. |
16481 | auto loop_lock = ir.create_loop_hard_lock(); |
16482 | |
16483 | auto &type_ids = ir.ids_for_type[TypeType]; |
16484 | for (auto alias_itr = begin(cont&: type_ids); alias_itr != end(cont&: type_ids); ++alias_itr) |
16485 | { |
16486 | auto &type = get<SPIRType>(id: *alias_itr); |
16487 | if (type.type_alias != TypeID(0) && |
16488 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
16489 | { |
16490 | // We will skip declaring this type, so make sure the type_alias type comes before. |
16491 | auto master_itr = find(first: begin(cont&: type_ids), last: end(cont&: type_ids), val: ID(type.type_alias)); |
16492 | assert(master_itr != end(type_ids)); |
16493 | |
16494 | if (alias_itr < master_itr) |
16495 | { |
16496 | // Must also swap the type order for the constant-type joined array. |
16497 | auto &joined_types = ir.ids_for_constant_or_type; |
16498 | auto alt_alias_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *alias_itr); |
16499 | auto alt_master_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *master_itr); |
16500 | assert(alt_alias_itr != end(joined_types)); |
16501 | assert(alt_master_itr != end(joined_types)); |
16502 | |
16503 | swap(a&: *alias_itr, b&: *master_itr); |
16504 | swap(a&: *alt_alias_itr, b&: *alt_master_itr); |
16505 | } |
16506 | } |
16507 | } |
16508 | } |
16509 | |
16510 | void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) |
16511 | { |
16512 | // If we are redirecting statements, ignore the line directive. |
16513 | // Common case here is continue blocks. |
16514 | if (redirect_statement) |
16515 | return; |
16516 | |
16517 | if (options.emit_line_directives) |
16518 | { |
16519 | require_extension_internal(ext: "GL_GOOGLE_cpp_style_line_directive" ); |
16520 | statement_no_indent(ts: "#line " , ts&: line_literal, ts: " \"" , ts&: get<SPIRString>(id: file_id).str, ts: "\"" ); |
16521 | } |
16522 | } |
16523 | |
16524 | void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, |
16525 | SmallVector<uint32_t> chain) |
16526 | { |
16527 | // Fully unroll all member/array indices one by one. |
16528 | |
16529 | auto &lhs_type = get<SPIRType>(id: lhs_type_id); |
16530 | auto &rhs_type = get<SPIRType>(id: rhs_type_id); |
16531 | |
16532 | if (!lhs_type.array.empty()) |
16533 | { |
16534 | // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, |
16535 | // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. |
16536 | uint32_t array_size = to_array_size_literal(type: lhs_type); |
16537 | chain.push_back(t: 0); |
16538 | |
16539 | for (uint32_t i = 0; i < array_size; i++) |
16540 | { |
16541 | chain.back() = i; |
16542 | emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.parent_type, rhs_id, rhs_type_id: rhs_type.parent_type, chain); |
16543 | } |
16544 | } |
16545 | else if (lhs_type.basetype == SPIRType::Struct) |
16546 | { |
16547 | chain.push_back(t: 0); |
16548 | uint32_t member_count = uint32_t(lhs_type.member_types.size()); |
16549 | for (uint32_t i = 0; i < member_count; i++) |
16550 | { |
16551 | chain.back() = i; |
16552 | emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.member_types[i], rhs_id, rhs_type_id: rhs_type.member_types[i], chain); |
16553 | } |
16554 | } |
16555 | else |
16556 | { |
16557 | // Need to handle unpack/packing fixups since this can differ wildly between the logical types, |
16558 | // particularly in MSL. |
16559 | // To deal with this, we emit access chains and go through emit_store_statement |
16560 | // to deal with all the special cases we can encounter. |
16561 | |
16562 | AccessChainMeta lhs_meta, rhs_meta; |
16563 | auto lhs = access_chain_internal(base: lhs_id, indices: chain.data(), count: uint32_t(chain.size()), |
16564 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &lhs_meta); |
16565 | auto rhs = access_chain_internal(base: rhs_id, indices: chain.data(), count: uint32_t(chain.size()), |
16566 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &rhs_meta); |
16567 | |
16568 | uint32_t id = ir.increase_bound_by(count: 2); |
16569 | lhs_id = id; |
16570 | rhs_id = id + 1; |
16571 | |
16572 | { |
16573 | auto &lhs_expr = set<SPIRExpression>(id: lhs_id, args: std::move(lhs), args&: lhs_type_id, args: true); |
16574 | lhs_expr.need_transpose = lhs_meta.need_transpose; |
16575 | |
16576 | if (lhs_meta.storage_is_packed) |
16577 | set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
16578 | if (lhs_meta.storage_physical_type != 0) |
16579 | set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: lhs_meta.storage_physical_type); |
16580 | |
16581 | forwarded_temporaries.insert(x: lhs_id); |
16582 | suppressed_usage_tracking.insert(x: lhs_id); |
16583 | } |
16584 | |
16585 | { |
16586 | auto &rhs_expr = set<SPIRExpression>(id: rhs_id, args: std::move(rhs), args&: rhs_type_id, args: true); |
16587 | rhs_expr.need_transpose = rhs_meta.need_transpose; |
16588 | |
16589 | if (rhs_meta.storage_is_packed) |
16590 | set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
16591 | if (rhs_meta.storage_physical_type != 0) |
16592 | set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: rhs_meta.storage_physical_type); |
16593 | |
16594 | forwarded_temporaries.insert(x: rhs_id); |
16595 | suppressed_usage_tracking.insert(x: rhs_id); |
16596 | } |
16597 | |
16598 | emit_store_statement(lhs_expression: lhs_id, rhs_expression: rhs_id); |
16599 | } |
16600 | } |
16601 | |
16602 | bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const |
16603 | { |
16604 | if (!has_decoration(id, decoration: DecorationInputAttachmentIndex)) |
16605 | return false; |
16606 | |
16607 | uint32_t input_attachment_index = get_decoration(id, decoration: DecorationInputAttachmentIndex); |
16608 | for (auto &remap : subpass_to_framebuffer_fetch_attachment) |
16609 | if (remap.first == input_attachment_index) |
16610 | return true; |
16611 | |
16612 | return false; |
16613 | } |
16614 | |
16615 | const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const |
16616 | { |
16617 | const SPIRVariable *ret = nullptr; |
16618 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
16619 | if (has_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) && |
16620 | get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) == index) |
16621 | { |
16622 | ret = &var; |
16623 | } |
16624 | }); |
16625 | return ret; |
16626 | } |
16627 | |
16628 | const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const |
16629 | { |
16630 | const SPIRVariable *ret = nullptr; |
16631 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
16632 | if (var.storage == StorageClassOutput && get_decoration(id: var.self, decoration: DecorationLocation) == location) |
16633 | ret = &var; |
16634 | }); |
16635 | return ret; |
16636 | } |
16637 | |
16638 | void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() |
16639 | { |
16640 | for (auto &remap : subpass_to_framebuffer_fetch_attachment) |
16641 | { |
16642 | auto *subpass_var = find_subpass_input_by_attachment_index(index: remap.first); |
16643 | auto *output_var = find_color_output_by_location(location: remap.second); |
16644 | if (!subpass_var) |
16645 | continue; |
16646 | if (!output_var) |
16647 | SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " |
16648 | "to read from it." ); |
16649 | if (is_array(type: get<SPIRType>(id: output_var->basetype))) |
16650 | SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs." ); |
16651 | |
16652 | auto &func = get<SPIRFunction>(id: get_entry_point().self); |
16653 | func.fixup_hooks_in.push_back(t: [=]() { |
16654 | if (is_legacy()) |
16655 | { |
16656 | statement(ts: to_expression(id: subpass_var->self), ts: " = " , ts: "gl_LastFragData[" , |
16657 | ts: get_decoration(id: output_var->self, decoration: DecorationLocation), ts: "];" ); |
16658 | } |
16659 | else |
16660 | { |
16661 | uint32_t num_rt_components = this->get<SPIRType>(id: output_var->basetype).vecsize; |
16662 | statement(ts: to_expression(id: subpass_var->self), ts: vector_swizzle(vecsize: num_rt_components, index: 0), ts: " = " , |
16663 | ts: to_expression(id: output_var->self), ts: ";" ); |
16664 | } |
16665 | }); |
16666 | } |
16667 | } |
16668 | |
16669 | bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const |
16670 | { |
16671 | return is_depth_image(type: get<SPIRType>(id: get<SPIRVariable>(id).basetype), id); |
16672 | } |
16673 | |
16674 | const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) |
16675 | { |
16676 | static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot" , |
16677 | "GL_KHR_shader_subgroup_basic" , |
16678 | "GL_KHR_shader_subgroup_vote" , |
16679 | "GL_NV_gpu_shader_5" , |
16680 | "GL_NV_shader_thread_group" , |
16681 | "GL_NV_shader_thread_shuffle" , |
16682 | "GL_ARB_shader_ballot" , |
16683 | "GL_ARB_shader_group_vote" , |
16684 | "GL_AMD_gcn_shader" }; |
16685 | return retval[c]; |
16686 | } |
16687 | |
16688 | SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) |
16689 | { |
16690 | switch (c) |
16691 | { |
16692 | case ARB_shader_ballot: |
16693 | return { "GL_ARB_shader_int64" }; |
16694 | case AMD_gcn_shader: |
16695 | return { "GL_AMD_gpu_shader_int64" , "GL_NV_gpu_shader5" }; |
16696 | default: |
16697 | return {}; |
16698 | } |
16699 | } |
16700 | |
16701 | const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) |
16702 | { |
16703 | switch (c) |
16704 | { |
16705 | case ARB_shader_ballot: |
16706 | return "defined(GL_ARB_shader_int64)" ; |
16707 | case AMD_gcn_shader: |
16708 | return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))" ; |
16709 | default: |
16710 | return "" ; |
16711 | } |
16712 | } |
16713 | |
16714 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
16715 | get_feature_dependencies(Feature feature) |
16716 | { |
16717 | switch (feature) |
16718 | { |
16719 | case SubgroupAllEqualT: |
16720 | return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; |
16721 | case SubgroupElect: |
16722 | return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; |
16723 | case SubgroupInverseBallot_InclBitCount_ExclBitCout: |
16724 | return { SubgroupMask }; |
16725 | case SubgroupBallotBitCount: |
16726 | return { SubgroupBallot }; |
16727 | default: |
16728 | return {}; |
16729 | } |
16730 | } |
16731 | |
16732 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: |
16733 | get_feature_dependency_mask(Feature feature) |
16734 | { |
16735 | return build_mask(features: get_feature_dependencies(feature)); |
16736 | } |
16737 | |
16738 | bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) |
16739 | { |
16740 | static const bool retval[FeatureCount] = { false, false, false, false, false, false, |
16741 | true, // SubgroupBalloFindLSB_MSB |
16742 | false, false, false, false, |
16743 | true, // SubgroupMemBarrier - replaced with workgroup memory barriers |
16744 | false, false, true, false }; |
16745 | |
16746 | return retval[feature]; |
16747 | } |
16748 | |
16749 | CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: |
16750 | get_KHR_extension_for_feature(Feature feature) |
16751 | { |
16752 | static const Candidate extensions[FeatureCount] = { |
16753 | KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, |
16754 | KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, |
16755 | KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, |
16756 | KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot |
16757 | }; |
16758 | |
16759 | return extensions[feature]; |
16760 | } |
16761 | |
16762 | void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) |
16763 | { |
16764 | feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); |
16765 | } |
16766 | |
16767 | bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const |
16768 | { |
16769 | return (feature_mask & (1u << feature)) != 0; |
16770 | } |
16771 | |
16772 | CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const |
16773 | { |
16774 | Result res; |
16775 | |
16776 | for (uint32_t i = 0u; i < FeatureCount; ++i) |
16777 | { |
16778 | if (feature_mask & (1u << i)) |
16779 | { |
16780 | auto feature = static_cast<Feature>(i); |
16781 | std::unordered_set<uint32_t> unique_candidates; |
16782 | |
16783 | auto candidates = get_candidates_for_feature(ft: feature); |
16784 | unique_candidates.insert(first: candidates.begin(), last: candidates.end()); |
16785 | |
16786 | auto deps = get_feature_dependencies(feature); |
16787 | for (Feature d : deps) |
16788 | { |
16789 | candidates = get_candidates_for_feature(ft: d); |
16790 | if (!candidates.empty()) |
16791 | unique_candidates.insert(first: candidates.begin(), last: candidates.end()); |
16792 | } |
16793 | |
16794 | for (uint32_t c : unique_candidates) |
16795 | ++res.weights[static_cast<Candidate>(c)]; |
16796 | } |
16797 | } |
16798 | |
16799 | return res; |
16800 | } |
16801 | |
16802 | CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
16803 | get_candidates_for_feature(Feature ft, const Result &r) |
16804 | { |
16805 | auto c = get_candidates_for_feature(ft); |
16806 | auto cmp = [&r](Candidate a, Candidate b) { |
16807 | if (r.weights[a] == r.weights[b]) |
16808 | return a < b; // Prefer candidates with lower enum value |
16809 | return r.weights[a] > r.weights[b]; |
16810 | }; |
16811 | std::sort(first: c.begin(), last: c.end(), comp: cmp); |
16812 | return c; |
16813 | } |
16814 | |
16815 | CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
16816 | get_candidates_for_feature(Feature feature) |
16817 | { |
16818 | switch (feature) |
16819 | { |
16820 | case SubgroupMask: |
16821 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; |
16822 | case SubgroupSize: |
16823 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; |
16824 | case SubgroupInvocationID: |
16825 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; |
16826 | case SubgroupID: |
16827 | return { KHR_shader_subgroup_basic, NV_shader_thread_group }; |
16828 | case NumSubgroups: |
16829 | return { KHR_shader_subgroup_basic, NV_shader_thread_group }; |
16830 | case SubgroupBroadcast_First: |
16831 | return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; |
16832 | case SubgroupBallotFindLSB_MSB: |
16833 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; |
16834 | case SubgroupAll_Any_AllEqualBool: |
16835 | return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; |
16836 | case SubgroupAllEqualT: |
16837 | return {}; // depends on other features only |
16838 | case SubgroupElect: |
16839 | return {}; // depends on other features only |
16840 | case SubgroupBallot: |
16841 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; |
16842 | case SubgroupBarrier: |
16843 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; |
16844 | case SubgroupMemBarrier: |
16845 | return { KHR_shader_subgroup_basic }; |
16846 | case SubgroupInverseBallot_InclBitCount_ExclBitCout: |
16847 | return {}; |
16848 | case SubgroupBallotBitExtract: |
16849 | return { NV_shader_thread_group }; |
16850 | case SubgroupBallotBitCount: |
16851 | return {}; |
16852 | default: |
16853 | return {}; |
16854 | } |
16855 | } |
16856 | |
16857 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( |
16858 | const SmallVector<Feature> &features) |
16859 | { |
16860 | FeatureMask mask = 0; |
16861 | for (Feature f : features) |
16862 | mask |= FeatureMask(1) << f; |
16863 | return mask; |
16864 | } |
16865 | |
16866 | CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() |
16867 | { |
16868 | for (auto &weight : weights) |
16869 | weight = 0; |
16870 | |
16871 | // Make sure KHR_shader_subgroup extensions are always prefered. |
16872 | const uint32_t big_num = FeatureCount; |
16873 | weights[KHR_shader_subgroup_ballot] = big_num; |
16874 | weights[KHR_shader_subgroup_basic] = big_num; |
16875 | weights[KHR_shader_subgroup_vote] = big_num; |
16876 | } |
16877 | |
16878 | void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) |
16879 | { |
16880 | // Must be ordered to maintain deterministic output, so vector is appropriate. |
16881 | if (find(first: begin(cont&: workaround_ubo_load_overload_types), last: end(cont&: workaround_ubo_load_overload_types), val: id) == |
16882 | end(cont&: workaround_ubo_load_overload_types)) |
16883 | { |
16884 | force_recompile(); |
16885 | workaround_ubo_load_overload_types.push_back(t: id); |
16886 | } |
16887 | } |
16888 | |
16889 | void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) |
16890 | { |
16891 | // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. |
16892 | // To load these types correctly, we must first wrap them in a dummy function which only purpose is to |
16893 | // ensure row_major decoration is actually respected. |
16894 | auto *var = maybe_get_backing_variable(chain: ptr); |
16895 | if (!var) |
16896 | return; |
16897 | |
16898 | auto &backing_type = get<SPIRType>(id: var->basetype); |
16899 | bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && |
16900 | has_decoration(id: backing_type.self, decoration: DecorationBlock); |
16901 | if (!is_ubo) |
16902 | return; |
16903 | |
16904 | auto *type = &get<SPIRType>(id: loaded_type); |
16905 | bool rewrite = false; |
16906 | |
16907 | if (is_matrix(type: *type)) |
16908 | { |
16909 | // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, |
16910 | // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. |
16911 | // If there is any row-major action going on, we apply the workaround. |
16912 | // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. |
16913 | // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. |
16914 | type = &backing_type; |
16915 | } |
16916 | |
16917 | if (type->basetype == SPIRType::Struct) |
16918 | { |
16919 | // If we're loading a struct where any member is a row-major matrix, apply the workaround. |
16920 | for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) |
16921 | { |
16922 | if (combined_decoration_for_member(type: *type, index: i).get(bit: DecorationRowMajor)) |
16923 | { |
16924 | rewrite = true; |
16925 | break; |
16926 | } |
16927 | } |
16928 | } |
16929 | |
16930 | if (rewrite) |
16931 | { |
16932 | request_workaround_wrapper_overload(id: loaded_type); |
16933 | expr = join(ts: "spvWorkaroundRowMajor(" , ts&: expr, ts: ")" ); |
16934 | } |
16935 | } |
16936 | |
16937 | void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) |
16938 | { |
16939 | masked_output_locations.insert(x: { .location: location, .component: component }); |
16940 | } |
16941 | |
16942 | void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) |
16943 | { |
16944 | masked_output_builtins.insert(x: builtin); |
16945 | } |
16946 | |
16947 | bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const |
16948 | { |
16949 | auto &type = get<SPIRType>(id: var.basetype); |
16950 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
16951 | // Blocks by themselves are never masked. Must be masked per-member. |
16952 | if (is_block) |
16953 | return false; |
16954 | |
16955 | bool is_builtin = has_decoration(id: var.self, decoration: DecorationBuiltIn); |
16956 | |
16957 | if (is_builtin) |
16958 | { |
16959 | return is_stage_output_builtin_masked(builtin: BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn))); |
16960 | } |
16961 | else |
16962 | { |
16963 | if (!has_decoration(id: var.self, decoration: DecorationLocation)) |
16964 | return false; |
16965 | |
16966 | return is_stage_output_location_masked( |
16967 | location: get_decoration(id: var.self, decoration: DecorationLocation), |
16968 | component: get_decoration(id: var.self, decoration: DecorationComponent)); |
16969 | } |
16970 | } |
16971 | |
16972 | bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const |
16973 | { |
16974 | auto &type = get<SPIRType>(id: var.basetype); |
16975 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
16976 | if (!is_block) |
16977 | return false; |
16978 | |
16979 | BuiltIn builtin = BuiltInMax; |
16980 | if (is_member_builtin(type, index, builtin: &builtin)) |
16981 | { |
16982 | return is_stage_output_builtin_masked(builtin); |
16983 | } |
16984 | else |
16985 | { |
16986 | uint32_t location = get_declared_member_location(var, mbr_idx: index, strip_array); |
16987 | uint32_t component = get_member_decoration(id: type.self, index, decoration: DecorationComponent); |
16988 | return is_stage_output_location_masked(location, component); |
16989 | } |
16990 | } |
16991 | |
16992 | bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const |
16993 | { |
16994 | return masked_output_locations.count(x: { .location: location, .component: component }) != 0; |
16995 | } |
16996 | |
16997 | bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const |
16998 | { |
16999 | return masked_output_builtins.count(x: builtin) != 0; |
17000 | } |
17001 | |
17002 | uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const |
17003 | { |
17004 | auto &block_type = get<SPIRType>(id: var.basetype); |
17005 | if (has_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation)) |
17006 | return get_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation); |
17007 | else |
17008 | return get_accumulated_member_location(var, mbr_idx, strip_array); |
17009 | } |
17010 | |
17011 | uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const |
17012 | { |
17013 | auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); |
17014 | uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation); |
17015 | |
17016 | for (uint32_t i = 0; i < mbr_idx; i++) |
17017 | { |
17018 | auto &mbr_type = get<SPIRType>(id: type.member_types[i]); |
17019 | |
17020 | // Start counting from any place we have a new location decoration. |
17021 | if (has_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation)) |
17022 | location = get_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation); |
17023 | |
17024 | uint32_t location_count = type_to_location_count(type: mbr_type); |
17025 | location += location_count; |
17026 | } |
17027 | |
17028 | return location; |
17029 | } |
17030 | |
17031 | StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) |
17032 | { |
17033 | auto *var = maybe_get_backing_variable(chain: ptr); |
17034 | |
17035 | // If the expression has been lowered to a temporary, we need to use the Generic storage class. |
17036 | // We're looking for the effective storage class of a given expression. |
17037 | // An access chain or forwarded OpLoads from such access chains |
17038 | // will generally have the storage class of the underlying variable, but if the load was not forwarded |
17039 | // we have lost any address space qualifiers. |
17040 | bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(id: ptr).access_chain && |
17041 | (forced_temporaries.count(x: ptr) != 0 || forwarded_temporaries.count(x: ptr) == 0); |
17042 | |
17043 | if (var && !forced_temporary) |
17044 | { |
17045 | if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassWorkgroup)) |
17046 | return StorageClassWorkgroup; |
17047 | if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassStorageBuffer)) |
17048 | return StorageClassStorageBuffer; |
17049 | |
17050 | // Normalize SSBOs to StorageBuffer here. |
17051 | if (var->storage == StorageClassUniform && |
17052 | has_decoration(id: get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock)) |
17053 | return StorageClassStorageBuffer; |
17054 | else |
17055 | return var->storage; |
17056 | } |
17057 | else |
17058 | return expression_type(id: ptr).storage; |
17059 | } |
17060 | |
17061 | uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const |
17062 | { |
17063 | uint32_t count; |
17064 | if (type.basetype == SPIRType::Struct) |
17065 | { |
17066 | uint32_t mbr_count = uint32_t(type.member_types.size()); |
17067 | count = 0; |
17068 | for (uint32_t i = 0; i < mbr_count; i++) |
17069 | count += type_to_location_count(type: get<SPIRType>(id: type.member_types[i])); |
17070 | } |
17071 | else |
17072 | { |
17073 | count = type.columns > 1 ? type.columns : 1; |
17074 | } |
17075 | |
17076 | uint32_t dim_count = uint32_t(type.array.size()); |
17077 | for (uint32_t i = 0; i < dim_count; i++) |
17078 | count *= to_array_size_literal(type, index: i); |
17079 | |
17080 | return count; |
17081 | } |
17082 | |