1 | /* |
---|---|
2 | * Copyright 2015-2021 Arm Limited |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #include "spirv_glsl.hpp" |
25 | #include "GLSL.std.450.h" |
26 | #include "spirv_common.hpp" |
27 | #include <algorithm> |
28 | #include <assert.h> |
29 | #include <cmath> |
30 | #include <limits> |
31 | #include <locale.h> |
32 | #include <utility> |
33 | #include <array> |
34 | |
35 | #ifndef _WIN32 |
36 | #ifndef __ghs__ |
37 | #include <langinfo.h> |
38 | #endif |
39 | #endif |
40 | #include <locale.h> |
41 | |
42 | using namespace spv; |
43 | using namespace SPIRV_CROSS_NAMESPACE; |
44 | using namespace std; |
45 | |
46 | enum ExtraSubExpressionType |
47 | { |
48 | // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. |
49 | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000, |
50 | EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000 |
51 | }; |
52 | |
53 | static bool is_unsigned_opcode(Op op) |
54 | { |
55 | // Don't have to be exhaustive, only relevant for legacy target checking ... |
56 | switch (op) |
57 | { |
58 | case OpShiftRightLogical: |
59 | case OpUGreaterThan: |
60 | case OpUGreaterThanEqual: |
61 | case OpULessThan: |
62 | case OpULessThanEqual: |
63 | case OpUConvert: |
64 | case OpUDiv: |
65 | case OpUMod: |
66 | case OpUMulExtended: |
67 | case OpConvertUToF: |
68 | case OpConvertFToU: |
69 | return true; |
70 | |
71 | default: |
72 | return false; |
73 | } |
74 | } |
75 | |
76 | static bool is_unsigned_glsl_opcode(GLSLstd450 op) |
77 | { |
78 | // Don't have to be exhaustive, only relevant for legacy target checking ... |
79 | switch (op) |
80 | { |
81 | case GLSLstd450UClamp: |
82 | case GLSLstd450UMin: |
83 | case GLSLstd450UMax: |
84 | case GLSLstd450FindUMsb: |
85 | return true; |
86 | |
87 | default: |
88 | return false; |
89 | } |
90 | } |
91 | |
92 | static bool packing_is_vec4_padded(BufferPackingStandard packing) |
93 | { |
94 | switch (packing) |
95 | { |
96 | case BufferPackingHLSLCbuffer: |
97 | case BufferPackingHLSLCbufferPackOffset: |
98 | case BufferPackingStd140: |
99 | case BufferPackingStd140EnhancedLayout: |
100 | return true; |
101 | |
102 | default: |
103 | return false; |
104 | } |
105 | } |
106 | |
107 | static bool packing_is_hlsl(BufferPackingStandard packing) |
108 | { |
109 | switch (packing) |
110 | { |
111 | case BufferPackingHLSLCbuffer: |
112 | case BufferPackingHLSLCbufferPackOffset: |
113 | return true; |
114 | |
115 | default: |
116 | return false; |
117 | } |
118 | } |
119 | |
120 | static bool packing_has_flexible_offset(BufferPackingStandard packing) |
121 | { |
122 | switch (packing) |
123 | { |
124 | case BufferPackingStd140: |
125 | case BufferPackingStd430: |
126 | case BufferPackingScalar: |
127 | case BufferPackingHLSLCbuffer: |
128 | return false; |
129 | |
130 | default: |
131 | return true; |
132 | } |
133 | } |
134 | |
135 | static bool packing_is_scalar(BufferPackingStandard packing) |
136 | { |
137 | switch (packing) |
138 | { |
139 | case BufferPackingScalar: |
140 | case BufferPackingScalarEnhancedLayout: |
141 | return true; |
142 | |
143 | default: |
144 | return false; |
145 | } |
146 | } |
147 | |
148 | static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) |
149 | { |
150 | switch (packing) |
151 | { |
152 | case BufferPackingStd140EnhancedLayout: |
153 | return BufferPackingStd140; |
154 | case BufferPackingStd430EnhancedLayout: |
155 | return BufferPackingStd430; |
156 | case BufferPackingHLSLCbufferPackOffset: |
157 | return BufferPackingHLSLCbuffer; |
158 | case BufferPackingScalarEnhancedLayout: |
159 | return BufferPackingScalar; |
160 | default: |
161 | return packing; |
162 | } |
163 | } |
164 | |
165 | void CompilerGLSL::init() |
166 | { |
167 | if (ir.source.known) |
168 | { |
169 | options.es = ir.source.es; |
170 | options.version = ir.source.version; |
171 | } |
172 | |
173 | // Query the locale to see what the decimal point is. |
174 | // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale |
175 | // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather |
176 | // tricky. |
177 | #ifdef _WIN32 |
178 | // On Windows, localeconv uses thread-local storage, so it should be fine. |
179 | const struct lconv *conv = localeconv(); |
180 | if (conv && conv->decimal_point) |
181 | current_locale_radix_character = *conv->decimal_point; |
182 | #elif defined(__ANDROID__) && __ANDROID_API__ < 26 || defined(__ghs__) || defined(__QNXNTO__) || defined(__VXWORKS__) |
183 | // nl_langinfo is not supported on this platform, fall back to the worse alternative. |
184 | const struct lconv *conv = localeconv(); |
185 | if (conv && conv->decimal_point) |
186 | current_locale_radix_character = *conv->decimal_point; |
187 | #else |
188 | // localeconv, the portable function is not MT safe ... |
189 | const char *decimal_point = nl_langinfo(RADIXCHAR); |
190 | if (decimal_point && *decimal_point != '\0') |
191 | current_locale_radix_character = *decimal_point; |
192 | #endif |
193 | } |
194 | |
195 | static const char *to_pls_layout(PlsFormat format) |
196 | { |
197 | switch (format) |
198 | { |
199 | case PlsR11FG11FB10F: |
200 | return "layout(r11f_g11f_b10f) "; |
201 | case PlsR32F: |
202 | return "layout(r32f) "; |
203 | case PlsRG16F: |
204 | return "layout(rg16f) "; |
205 | case PlsRGB10A2: |
206 | return "layout(rgb10_a2) "; |
207 | case PlsRGBA8: |
208 | return "layout(rgba8) "; |
209 | case PlsRG16: |
210 | return "layout(rg16) "; |
211 | case PlsRGBA8I: |
212 | return "layout(rgba8i)"; |
213 | case PlsRG16I: |
214 | return "layout(rg16i) "; |
215 | case PlsRGB10A2UI: |
216 | return "layout(rgb10_a2ui) "; |
217 | case PlsRGBA8UI: |
218 | return "layout(rgba8ui) "; |
219 | case PlsRG16UI: |
220 | return "layout(rg16ui) "; |
221 | case PlsR32UI: |
222 | return "layout(r32ui) "; |
223 | default: |
224 | return ""; |
225 | } |
226 | } |
227 | |
228 | static std::pair<spv::Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format) |
229 | { |
230 | switch (format) |
231 | { |
232 | default: |
233 | case PlsR11FG11FB10F: |
234 | case PlsR32F: |
235 | case PlsRG16F: |
236 | case PlsRGB10A2: |
237 | case PlsRGBA8: |
238 | case PlsRG16: |
239 | return std::make_pair(x: spv::OpTypeFloat, y: SPIRType::Float); |
240 | |
241 | case PlsRGBA8I: |
242 | case PlsRG16I: |
243 | return std::make_pair(x: spv::OpTypeInt, y: SPIRType::Int); |
244 | |
245 | case PlsRGB10A2UI: |
246 | case PlsRGBA8UI: |
247 | case PlsRG16UI: |
248 | case PlsR32UI: |
249 | return std::make_pair(x: spv::OpTypeInt, y: SPIRType::UInt); |
250 | } |
251 | } |
252 | |
253 | static uint32_t pls_format_to_components(PlsFormat format) |
254 | { |
255 | switch (format) |
256 | { |
257 | default: |
258 | case PlsR32F: |
259 | case PlsR32UI: |
260 | return 1; |
261 | |
262 | case PlsRG16F: |
263 | case PlsRG16: |
264 | case PlsRG16UI: |
265 | case PlsRG16I: |
266 | return 2; |
267 | |
268 | case PlsR11FG11FB10F: |
269 | return 3; |
270 | |
271 | case PlsRGB10A2: |
272 | case PlsRGBA8: |
273 | case PlsRGBA8I: |
274 | case PlsRGB10A2UI: |
275 | case PlsRGBA8UI: |
276 | return 4; |
277 | } |
278 | } |
279 | |
280 | const char *CompilerGLSL::vector_swizzle(int vecsize, int index) |
281 | { |
282 | static const char *const swizzle[4][4] = { |
283 | { ".x", ".y", ".z", ".w"}, |
284 | { ".xy", ".yz", ".zw", nullptr }, |
285 | { ".xyz", ".yzw", nullptr, nullptr }, |
286 | #if defined(__GNUC__) && (__GNUC__ == 9) |
287 | // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. |
288 | // This array ends up being compiled as all nullptrs, tripping the assertions below. |
289 | { "", nullptr, nullptr, "$"}, |
290 | #else |
291 | { "", nullptr, nullptr, nullptr }, |
292 | #endif |
293 | }; |
294 | |
295 | assert(vecsize >= 1 && vecsize <= 4); |
296 | assert(index >= 0 && index < 4); |
297 | assert(swizzle[vecsize - 1][index]); |
298 | |
299 | return swizzle[vecsize - 1][index]; |
300 | } |
301 | |
302 | void CompilerGLSL::reset(uint32_t iteration_count) |
303 | { |
304 | // Sanity check the iteration count to be robust against a certain class of bugs where |
305 | // we keep forcing recompilations without making clear forward progress. |
306 | // In buggy situations we will loop forever, or loop for an unbounded number of iterations. |
307 | // Certain types of recompilations are considered to make forward progress, |
308 | // but in almost all situations, we'll never see more than 3 iterations. |
309 | // It is highly context-sensitive when we need to force recompilation, |
310 | // and it is not practical with the current architecture |
311 | // to resolve everything up front. |
312 | if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) |
313 | SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); |
314 | |
315 | // We do some speculative optimizations which should pretty much always work out, |
316 | // but just in case the SPIR-V is rather weird, recompile until it's happy. |
317 | // This typically only means one extra pass. |
318 | clear_force_recompile(); |
319 | |
320 | // Clear invalid expression tracking. |
321 | invalid_expressions.clear(); |
322 | composite_insert_overwritten.clear(); |
323 | current_function = nullptr; |
324 | |
325 | // Clear temporary usage tracking. |
326 | expression_usage_counts.clear(); |
327 | forwarded_temporaries.clear(); |
328 | suppressed_usage_tracking.clear(); |
329 | |
330 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
331 | flushed_phi_variables.clear(); |
332 | |
333 | current_emitting_switch_stack.clear(); |
334 | |
335 | reset_name_caches(); |
336 | |
337 | ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) { |
338 | func.active = false; |
339 | func.flush_undeclared = true; |
340 | }); |
341 | |
342 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); |
343 | |
344 | ir.reset_all_of_type<SPIRExpression>(); |
345 | ir.reset_all_of_type<SPIRAccessChain>(); |
346 | |
347 | statement_count = 0; |
348 | indent = 0; |
349 | current_loop_level = 0; |
350 | } |
351 | |
352 | void CompilerGLSL::remap_pls_variables() |
353 | { |
354 | for (auto &input : pls_inputs) |
355 | { |
356 | auto &var = get<SPIRVariable>(id: input.id); |
357 | |
358 | bool input_is_target = false; |
359 | if (var.storage == StorageClassUniformConstant) |
360 | { |
361 | auto &type = get<SPIRType>(id: var.basetype); |
362 | input_is_target = type.image.dim == DimSubpassData; |
363 | } |
364 | |
365 | if (var.storage != StorageClassInput && !input_is_target) |
366 | SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); |
367 | var.remapped_variable = true; |
368 | } |
369 | |
370 | for (auto &output : pls_outputs) |
371 | { |
372 | auto &var = get<SPIRVariable>(id: output.id); |
373 | if (var.storage != StorageClassOutput) |
374 | SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); |
375 | var.remapped_variable = true; |
376 | } |
377 | } |
378 | |
379 | void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent) |
380 | { |
381 | subpass_to_framebuffer_fetch_attachment.push_back(x: { input_attachment_index, color_location }); |
382 | inout_color_attachments.push_back(x: { color_location, coherent }); |
383 | } |
384 | |
385 | bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const |
386 | { |
387 | return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments), |
388 | pred: [&](const std::pair<uint32_t, bool> &elem) { |
389 | return elem.first == location; |
390 | }) != end(cont: inout_color_attachments); |
391 | } |
392 | |
393 | bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const |
394 | { |
395 | return std::find_if(first: begin(cont: inout_color_attachments), last: end(cont: inout_color_attachments), |
396 | pred: [&](const std::pair<uint32_t, bool> &elem) { |
397 | return elem.first == location && !elem.second; |
398 | }) != end(cont: inout_color_attachments); |
399 | } |
400 | |
401 | void CompilerGLSL::find_static_extensions() |
402 | { |
403 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) { |
404 | if (type.basetype == SPIRType::Double) |
405 | { |
406 | if (options.es) |
407 | SPIRV_CROSS_THROW("FP64 not supported in ES profile."); |
408 | if (!options.es && options.version < 400) |
409 | require_extension_internal(ext: "GL_ARB_gpu_shader_fp64"); |
410 | } |
411 | else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) |
412 | { |
413 | if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. |
414 | SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); |
415 | require_extension_internal(ext: "GL_ARB_gpu_shader_int64"); |
416 | } |
417 | else if (type.basetype == SPIRType::Half) |
418 | { |
419 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_float16"); |
420 | if (options.vulkan_semantics) |
421 | require_extension_internal(ext: "GL_EXT_shader_16bit_storage"); |
422 | } |
423 | else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) |
424 | { |
425 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int8"); |
426 | if (options.vulkan_semantics) |
427 | require_extension_internal(ext: "GL_EXT_shader_8bit_storage"); |
428 | } |
429 | else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) |
430 | { |
431 | require_extension_internal(ext: "GL_EXT_shader_explicit_arithmetic_types_int16"); |
432 | if (options.vulkan_semantics) |
433 | require_extension_internal(ext: "GL_EXT_shader_16bit_storage"); |
434 | } |
435 | }); |
436 | |
437 | auto &execution = get_entry_point(); |
438 | switch (execution.model) |
439 | { |
440 | case ExecutionModelGLCompute: |
441 | if (!options.es && options.version < 430) |
442 | require_extension_internal(ext: "GL_ARB_compute_shader"); |
443 | if (options.es && options.version < 310) |
444 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); |
445 | break; |
446 | |
447 | case ExecutionModelGeometry: |
448 | if (options.es && options.version < 320) |
449 | require_extension_internal(ext: "GL_EXT_geometry_shader"); |
450 | if (!options.es && options.version < 150) |
451 | require_extension_internal(ext: "GL_ARB_geometry_shader4"); |
452 | |
453 | if (execution.flags.get(bit: ExecutionModeInvocations) && execution.invocations != 1) |
454 | { |
455 | // Instanced GS is part of 400 core or this extension. |
456 | if (!options.es && options.version < 400) |
457 | require_extension_internal(ext: "GL_ARB_gpu_shader5"); |
458 | } |
459 | break; |
460 | |
461 | case ExecutionModelTessellationEvaluation: |
462 | case ExecutionModelTessellationControl: |
463 | if (options.es && options.version < 320) |
464 | require_extension_internal(ext: "GL_EXT_tessellation_shader"); |
465 | if (!options.es && options.version < 400) |
466 | require_extension_internal(ext: "GL_ARB_tessellation_shader"); |
467 | break; |
468 | |
469 | case ExecutionModelRayGenerationKHR: |
470 | case ExecutionModelIntersectionKHR: |
471 | case ExecutionModelAnyHitKHR: |
472 | case ExecutionModelClosestHitKHR: |
473 | case ExecutionModelMissKHR: |
474 | case ExecutionModelCallableKHR: |
475 | // NV enums are aliases. |
476 | if (options.es || options.version < 460) |
477 | SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); |
478 | if (!options.vulkan_semantics) |
479 | SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics."); |
480 | |
481 | // Need to figure out if we should target KHR or NV extension based on capabilities. |
482 | for (auto &cap : ir.declared_capabilities) |
483 | { |
484 | if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR || |
485 | cap == CapabilityRayTraversalPrimitiveCullingKHR) |
486 | { |
487 | ray_tracing_is_khr = true; |
488 | break; |
489 | } |
490 | } |
491 | |
492 | if (ray_tracing_is_khr) |
493 | { |
494 | // In KHR ray tracing we pass payloads by pointer instead of location, |
495 | // so make sure we assign locations properly. |
496 | ray_tracing_khr_fixup_locations(); |
497 | require_extension_internal(ext: "GL_EXT_ray_tracing"); |
498 | } |
499 | else |
500 | require_extension_internal(ext: "GL_NV_ray_tracing"); |
501 | break; |
502 | |
503 | case ExecutionModelMeshEXT: |
504 | case ExecutionModelTaskEXT: |
505 | if (options.es || options.version < 450) |
506 | SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); |
507 | if (!options.vulkan_semantics) |
508 | SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); |
509 | require_extension_internal(ext: "GL_EXT_mesh_shader"); |
510 | break; |
511 | |
512 | default: |
513 | break; |
514 | } |
515 | |
516 | if (!pls_inputs.empty() || !pls_outputs.empty()) |
517 | { |
518 | if (execution.model != ExecutionModelFragment) |
519 | SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders."); |
520 | require_extension_internal(ext: "GL_EXT_shader_pixel_local_storage"); |
521 | } |
522 | |
523 | if (!inout_color_attachments.empty()) |
524 | { |
525 | if (execution.model != ExecutionModelFragment) |
526 | SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders."); |
527 | if (options.vulkan_semantics) |
528 | SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL."); |
529 | |
530 | bool has_coherent = false; |
531 | bool has_incoherent = false; |
532 | |
533 | for (auto &att : inout_color_attachments) |
534 | { |
535 | if (att.second) |
536 | has_coherent = true; |
537 | else |
538 | has_incoherent = true; |
539 | } |
540 | |
541 | if (has_coherent) |
542 | require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch"); |
543 | if (has_incoherent) |
544 | require_extension_internal(ext: "GL_EXT_shader_framebuffer_fetch_non_coherent"); |
545 | } |
546 | |
547 | if (options.separate_shader_objects && !options.es && options.version < 410) |
548 | require_extension_internal(ext: "GL_ARB_separate_shader_objects"); |
549 | |
550 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
551 | { |
552 | if (!options.vulkan_semantics) |
553 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); |
554 | if (options.es && options.version < 320) |
555 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320."); |
556 | else if (!options.es && options.version < 450) |
557 | SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450."); |
558 | require_extension_internal(ext: "GL_EXT_buffer_reference2"); |
559 | } |
560 | else if (ir.addressing_model != AddressingModelLogical) |
561 | { |
562 | SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); |
563 | } |
564 | |
565 | // Check for nonuniform qualifier and passthrough. |
566 | // Instead of looping over all decorations to find this, just look at capabilities. |
567 | for (auto &cap : ir.declared_capabilities) |
568 | { |
569 | switch (cap) |
570 | { |
571 | case CapabilityShaderNonUniformEXT: |
572 | if (!options.vulkan_semantics) |
573 | require_extension_internal(ext: "GL_NV_gpu_shader5"); |
574 | else |
575 | require_extension_internal(ext: "GL_EXT_nonuniform_qualifier"); |
576 | break; |
577 | case CapabilityRuntimeDescriptorArrayEXT: |
578 | if (!options.vulkan_semantics) |
579 | SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); |
580 | require_extension_internal(ext: "GL_EXT_nonuniform_qualifier"); |
581 | break; |
582 | |
583 | case CapabilityGeometryShaderPassthroughNV: |
584 | if (execution.model == ExecutionModelGeometry) |
585 | { |
586 | require_extension_internal(ext: "GL_NV_geometry_shader_passthrough"); |
587 | execution.geometry_passthrough = true; |
588 | } |
589 | break; |
590 | |
591 | case CapabilityVariablePointers: |
592 | case CapabilityVariablePointersStorageBuffer: |
593 | SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL."); |
594 | |
595 | case CapabilityMultiView: |
596 | if (options.vulkan_semantics) |
597 | require_extension_internal(ext: "GL_EXT_multiview"); |
598 | else |
599 | { |
600 | require_extension_internal(ext: "GL_OVR_multiview2"); |
601 | if (options.ovr_multiview_view_count == 0) |
602 | SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2."); |
603 | if (get_execution_model() != ExecutionModelVertex) |
604 | SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); |
605 | } |
606 | break; |
607 | |
608 | case CapabilityRayQueryKHR: |
609 | if (options.es || options.version < 460 || !options.vulkan_semantics) |
610 | SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); |
611 | require_extension_internal(ext: "GL_EXT_ray_query"); |
612 | ray_tracing_is_khr = true; |
613 | break; |
614 | |
615 | case CapabilityRayTraversalPrimitiveCullingKHR: |
616 | if (options.es || options.version < 460 || !options.vulkan_semantics) |
617 | SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); |
618 | require_extension_internal(ext: "GL_EXT_ray_flags_primitive_culling"); |
619 | ray_tracing_is_khr = true; |
620 | break; |
621 | |
622 | default: |
623 | break; |
624 | } |
625 | } |
626 | |
627 | if (options.ovr_multiview_view_count) |
628 | { |
629 | if (options.vulkan_semantics) |
630 | SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics."); |
631 | if (get_execution_model() != ExecutionModelVertex) |
632 | SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); |
633 | require_extension_internal(ext: "GL_OVR_multiview2"); |
634 | } |
635 | |
636 | // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. |
637 | for (auto &ext : ir.declared_extensions) |
638 | if (ext == "SPV_NV_fragment_shader_barycentric") |
639 | barycentric_is_nv = true; |
640 | } |
641 | |
642 | void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) |
643 | { |
644 | uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ? |
645 | required_polyfills_relaxed : required_polyfills; |
646 | |
647 | if ((polyfills & polyfill) == 0) |
648 | { |
649 | polyfills |= polyfill; |
650 | force_recompile(); |
651 | } |
652 | } |
653 | |
654 | void CompilerGLSL::ray_tracing_khr_fixup_locations() |
655 | { |
656 | uint32_t location = 0; |
657 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
658 | // Incoming payload storage can also be used for tracing. |
659 | if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && |
660 | var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) |
661 | return; |
662 | if (is_hidden_variable(var)) |
663 | return; |
664 | set_decoration(id: var.self, decoration: DecorationLocation, argument: location++); |
665 | }); |
666 | } |
667 | |
668 | string CompilerGLSL::compile() |
669 | { |
670 | ir.fixup_reserved_names(); |
671 | |
672 | if (!options.vulkan_semantics) |
673 | { |
674 | // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers |
675 | backend.nonuniform_qualifier = ""; |
676 | backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround; |
677 | } |
678 | backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; |
679 | backend.force_gl_in_out_block = true; |
680 | backend.supports_extensions = true; |
681 | backend.use_array_constructor = true; |
682 | backend.workgroup_size_is_hidden = true; |
683 | backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; |
684 | backend.support_precise_qualifier = |
685 | (!options.es && options.version >= 400) || (options.es && options.version >= 320); |
686 | |
687 | if (is_legacy_es()) |
688 | backend.support_case_fallthrough = false; |
689 | |
690 | // Scan the SPIR-V to find trivial uses of extensions. |
691 | fixup_anonymous_struct_names(); |
692 | fixup_type_alias(); |
693 | reorder_type_alias(); |
694 | build_function_control_flow_graphs_and_analyze(); |
695 | find_static_extensions(); |
696 | fixup_image_load_store_access(); |
697 | update_active_builtins(); |
698 | analyze_image_and_sampler_usage(); |
699 | analyze_interlocked_resource_usage(); |
700 | if (!inout_color_attachments.empty()) |
701 | emit_inout_fragment_outputs_copy_to_subpass_inputs(); |
702 | |
703 | // Shaders might cast unrelated data to pointers of non-block types. |
704 | // Find all such instances and make sure we can cast the pointers to a synthesized block type. |
705 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
706 | analyze_non_block_pointer_types(); |
707 | |
708 | uint32_t pass_count = 0; |
709 | do |
710 | { |
711 | reset(iteration_count: pass_count); |
712 | |
713 | buffer.reset(); |
714 | |
715 | emit_header(); |
716 | emit_resources(); |
717 | emit_extension_workarounds(model: get_execution_model()); |
718 | |
719 | if (required_polyfills != 0) |
720 | emit_polyfills(polyfills: required_polyfills, relaxed: false); |
721 | if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0) |
722 | emit_polyfills(polyfills: required_polyfills_relaxed, relaxed: true); |
723 | |
724 | emit_function(func&: get<SPIRFunction>(id: ir.default_entry_point), return_flags: Bitset()); |
725 | |
726 | pass_count++; |
727 | } while (is_forcing_recompilation()); |
728 | |
729 | // Implement the interlocked wrapper function at the end. |
730 | // The body was implemented in lieu of main(). |
731 | if (interlocked_is_complex) |
732 | { |
733 | statement(ts: "void main()"); |
734 | begin_scope(); |
735 | statement(ts: "// Interlocks were used in a way not compatible with GLSL, this is very slow."); |
736 | statement(ts: "SPIRV_Cross_beginInvocationInterlock();"); |
737 | statement(ts: "spvMainInterlockedBody();"); |
738 | statement(ts: "SPIRV_Cross_endInvocationInterlock();"); |
739 | end_scope(); |
740 | } |
741 | |
742 | // Entry point in GLSL is always main(). |
743 | get_entry_point().name = "main"; |
744 | |
745 | return buffer.str(); |
746 | } |
747 | |
748 | std::string CompilerGLSL::get_partial_source() |
749 | { |
750 | return buffer.str(); |
751 | } |
752 | |
753 | void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x, |
754 | const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) |
755 | { |
756 | auto &execution = get_entry_point(); |
757 | bool builtin_workgroup = execution.workgroup_size.constant != 0; |
758 | bool use_local_size_id = !builtin_workgroup && execution.flags.get(bit: ExecutionModeLocalSizeId); |
759 | |
760 | if (wg_x.id) |
761 | { |
762 | if (options.vulkan_semantics) |
763 | arguments.push_back(t: join(ts: "local_size_x_id = ", ts: wg_x.constant_id)); |
764 | else |
765 | arguments.push_back(t: join(ts: "local_size_x = ", ts&: get<SPIRConstant>(id: wg_x.id).specialization_constant_macro_name)); |
766 | } |
767 | else if (use_local_size_id && execution.workgroup_size.id_x) |
768 | arguments.push_back(t: join(ts: "local_size_x = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar())); |
769 | else |
770 | arguments.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x)); |
771 | |
772 | if (wg_y.id) |
773 | { |
774 | if (options.vulkan_semantics) |
775 | arguments.push_back(t: join(ts: "local_size_y_id = ", ts: wg_y.constant_id)); |
776 | else |
777 | arguments.push_back(t: join(ts: "local_size_y = ", ts&: get<SPIRConstant>(id: wg_y.id).specialization_constant_macro_name)); |
778 | } |
779 | else if (use_local_size_id && execution.workgroup_size.id_y) |
780 | arguments.push_back(t: join(ts: "local_size_y = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar())); |
781 | else |
782 | arguments.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y)); |
783 | |
784 | if (wg_z.id) |
785 | { |
786 | if (options.vulkan_semantics) |
787 | arguments.push_back(t: join(ts: "local_size_z_id = ", ts: wg_z.constant_id)); |
788 | else |
789 | arguments.push_back(t: join(ts: "local_size_z = ", ts&: get<SPIRConstant>(id: wg_z.id).specialization_constant_macro_name)); |
790 | } |
791 | else if (use_local_size_id && execution.workgroup_size.id_z) |
792 | arguments.push_back(t: join(ts: "local_size_z = ", ts: get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar())); |
793 | else |
794 | arguments.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z)); |
795 | } |
796 | |
797 | void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) |
798 | { |
799 | if (options.vulkan_semantics) |
800 | { |
801 | auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); |
802 | require_extension_internal(ext: ShaderSubgroupSupportHelper::get_extension_name(c: khr_extension)); |
803 | } |
804 | else |
805 | { |
806 | if (!shader_subgroup_supporter.is_feature_requested(feature)) |
807 | force_recompile(); |
808 | shader_subgroup_supporter.request_feature(feature); |
809 | } |
810 | } |
811 | |
812 | void CompilerGLSL::emit_header() |
813 | { |
814 | auto &execution = get_entry_point(); |
815 | statement(ts: "#version ", ts&: options.version, ts: options.es && options.version > 100 ? " es": ""); |
816 | |
817 | if (!options.es && options.version < 420) |
818 | { |
819 | // Needed for binding = # on UBOs, etc. |
820 | if (options.enable_420pack_extension) |
821 | { |
822 | statement(ts: "#ifdef GL_ARB_shading_language_420pack"); |
823 | statement(ts: "#extension GL_ARB_shading_language_420pack : require"); |
824 | statement(ts: "#endif"); |
825 | } |
826 | // Needed for: layout(early_fragment_tests) in; |
827 | if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests)) |
828 | require_extension_internal(ext: "GL_ARB_shader_image_load_store"); |
829 | } |
830 | |
831 | // Needed for: layout(post_depth_coverage) in; |
832 | if (execution.flags.get(bit: ExecutionModePostDepthCoverage)) |
833 | require_extension_internal(ext: "GL_ARB_post_depth_coverage"); |
834 | |
835 | // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; |
836 | bool interlock_used = execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT) || |
837 | execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) || |
838 | execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) || |
839 | execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT); |
840 | |
841 | if (interlock_used) |
842 | { |
843 | if (options.es) |
844 | { |
845 | if (options.version < 310) |
846 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); |
847 | require_extension_internal(ext: "GL_NV_fragment_shader_interlock"); |
848 | } |
849 | else |
850 | { |
851 | if (options.version < 420) |
852 | require_extension_internal(ext: "GL_ARB_shader_image_load_store"); |
853 | require_extension_internal(ext: "GL_ARB_fragment_shader_interlock"); |
854 | } |
855 | } |
856 | |
857 | for (auto &ext : forced_extensions) |
858 | { |
859 | if (ext == "GL_ARB_gpu_shader_int64") |
860 | { |
861 | statement(ts: "#if defined(GL_ARB_gpu_shader_int64)"); |
862 | statement(ts: "#extension GL_ARB_gpu_shader_int64 : require"); |
863 | if (!options.vulkan_semantics || options.es) |
864 | { |
865 | statement(ts: "#elif defined(GL_NV_gpu_shader5)"); |
866 | statement(ts: "#extension GL_NV_gpu_shader5 : require"); |
867 | } |
868 | statement(ts: "#else"); |
869 | statement(ts: "#error No extension available for 64-bit integers."); |
870 | statement(ts: "#endif"); |
871 | } |
872 | else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") |
873 | { |
874 | // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. |
875 | // GL_AMD_gpu_shader_half_float is a superset, so try that first. |
876 | statement(ts: "#if defined(GL_AMD_gpu_shader_half_float)"); |
877 | statement(ts: "#extension GL_AMD_gpu_shader_half_float : require"); |
878 | if (!options.vulkan_semantics) |
879 | { |
880 | statement(ts: "#elif defined(GL_NV_gpu_shader5)"); |
881 | statement(ts: "#extension GL_NV_gpu_shader5 : require"); |
882 | } |
883 | else |
884 | { |
885 | statement(ts: "#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); |
886 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); |
887 | } |
888 | statement(ts: "#else"); |
889 | statement(ts: "#error No extension available for FP16."); |
890 | statement(ts: "#endif"); |
891 | } |
892 | else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8") |
893 | { |
894 | if (options.vulkan_semantics) |
895 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); |
896 | else |
897 | { |
898 | statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)"); |
899 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); |
900 | statement(ts: "#elif defined(GL_NV_gpu_shader5)"); |
901 | statement(ts: "#extension GL_NV_gpu_shader5 : require"); |
902 | statement(ts: "#else"); |
903 | statement(ts: "#error No extension available for Int8."); |
904 | statement(ts: "#endif"); |
905 | } |
906 | } |
907 | else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") |
908 | { |
909 | if (options.vulkan_semantics) |
910 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); |
911 | else |
912 | { |
913 | statement(ts: "#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)"); |
914 | statement(ts: "#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); |
915 | statement(ts: "#elif defined(GL_AMD_gpu_shader_int16)"); |
916 | statement(ts: "#extension GL_AMD_gpu_shader_int16 : require"); |
917 | statement(ts: "#elif defined(GL_NV_gpu_shader5)"); |
918 | statement(ts: "#extension GL_NV_gpu_shader5 : require"); |
919 | statement(ts: "#else"); |
920 | statement(ts: "#error No extension available for Int16."); |
921 | statement(ts: "#endif"); |
922 | } |
923 | } |
924 | else if (ext == "GL_ARB_post_depth_coverage") |
925 | { |
926 | if (options.es) |
927 | statement(ts: "#extension GL_EXT_post_depth_coverage : require"); |
928 | else |
929 | { |
930 | statement(ts: "#if defined(GL_ARB_post_depth_coverge)"); |
931 | statement(ts: "#extension GL_ARB_post_depth_coverage : require"); |
932 | statement(ts: "#else"); |
933 | statement(ts: "#extension GL_EXT_post_depth_coverage : require"); |
934 | statement(ts: "#endif"); |
935 | } |
936 | } |
937 | else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters") |
938 | { |
939 | // Soft-enable this extension on plain GLSL. |
940 | statement(ts: "#ifdef ", ts&: ext); |
941 | statement(ts: "#extension ", ts&: ext, ts: " : enable"); |
942 | statement(ts: "#endif"); |
943 | } |
944 | else if (ext == "GL_EXT_control_flow_attributes") |
945 | { |
946 | // These are just hints so we can conditionally enable and fallback in the shader. |
947 | statement(ts: "#if defined(GL_EXT_control_flow_attributes)"); |
948 | statement(ts: "#extension GL_EXT_control_flow_attributes : require"); |
949 | statement(ts: "#define SPIRV_CROSS_FLATTEN [[flatten]]"); |
950 | statement(ts: "#define SPIRV_CROSS_BRANCH [[dont_flatten]]"); |
951 | statement(ts: "#define SPIRV_CROSS_UNROLL [[unroll]]"); |
952 | statement(ts: "#define SPIRV_CROSS_LOOP [[dont_unroll]]"); |
953 | statement(ts: "#else"); |
954 | statement(ts: "#define SPIRV_CROSS_FLATTEN"); |
955 | statement(ts: "#define SPIRV_CROSS_BRANCH"); |
956 | statement(ts: "#define SPIRV_CROSS_UNROLL"); |
957 | statement(ts: "#define SPIRV_CROSS_LOOP"); |
958 | statement(ts: "#endif"); |
959 | } |
960 | else if (ext == "GL_NV_fragment_shader_interlock") |
961 | { |
962 | statement(ts: "#extension GL_NV_fragment_shader_interlock : require"); |
963 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()"); |
964 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()"); |
965 | } |
966 | else if (ext == "GL_ARB_fragment_shader_interlock") |
967 | { |
968 | statement(ts: "#ifdef GL_ARB_fragment_shader_interlock"); |
969 | statement(ts: "#extension GL_ARB_fragment_shader_interlock : enable"); |
970 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()"); |
971 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()"); |
972 | statement(ts: "#elif defined(GL_INTEL_fragment_shader_ordering)"); |
973 | statement(ts: "#extension GL_INTEL_fragment_shader_ordering : enable"); |
974 | statement(ts: "#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()"); |
975 | statement(ts: "#define SPIRV_Cross_endInvocationInterlock()"); |
976 | statement(ts: "#endif"); |
977 | } |
978 | else |
979 | statement(ts: "#extension ", ts&: ext, ts: " : require"); |
980 | } |
981 | |
982 | if (!options.vulkan_semantics) |
983 | { |
984 | using Supp = ShaderSubgroupSupportHelper; |
985 | auto result = shader_subgroup_supporter.resolve(); |
986 | |
987 | for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) |
988 | { |
989 | auto feature = static_cast<Supp::Feature>(feature_index); |
990 | if (!shader_subgroup_supporter.is_feature_requested(feature)) |
991 | continue; |
992 | |
993 | auto exts = Supp::get_candidates_for_feature(ft: feature, r: result); |
994 | if (exts.empty()) |
995 | continue; |
996 | |
997 | statement(ts: ""); |
998 | |
999 | for (auto &ext : exts) |
1000 | { |
1001 | const char *name = Supp::get_extension_name(c: ext); |
1002 | const char *extra_predicate = Supp::get_extra_required_extension_predicate(c: ext); |
1003 | auto extra_names = Supp::get_extra_required_extension_names(c: ext); |
1004 | statement(ts: &ext != &exts.front() ? "#elif": "#if", ts: " defined(", ts&: name, ts: ")", |
1005 | ts: (*extra_predicate != '\0' ? " && ": ""), ts&: extra_predicate); |
1006 | for (const auto &e : extra_names) |
1007 | statement(ts: "#extension ", ts: e, ts: " : enable"); |
1008 | statement(ts: "#extension ", ts&: name, ts: " : require"); |
1009 | } |
1010 | |
1011 | if (!Supp::can_feature_be_implemented_without_extensions(feature)) |
1012 | { |
1013 | statement(ts: "#else"); |
1014 | statement(ts: "#error No extensions available to emulate requested subgroup feature."); |
1015 | } |
1016 | |
1017 | statement(ts: "#endif"); |
1018 | } |
1019 | } |
1020 | |
1021 | for (auto &header : header_lines) |
1022 | statement(ts&: header); |
1023 | |
1024 | SmallVector<string> inputs; |
1025 | SmallVector<string> outputs; |
1026 | |
1027 | switch (execution.model) |
1028 | { |
1029 | case ExecutionModelVertex: |
1030 | if (options.ovr_multiview_view_count) |
1031 | inputs.push_back(t: join(ts: "num_views = ", ts&: options.ovr_multiview_view_count)); |
1032 | break; |
1033 | case ExecutionModelGeometry: |
1034 | if ((execution.flags.get(bit: ExecutionModeInvocations)) && execution.invocations != 1) |
1035 | inputs.push_back(t: join(ts: "invocations = ", ts&: execution.invocations)); |
1036 | if (execution.flags.get(bit: ExecutionModeInputPoints)) |
1037 | inputs.push_back(t: "points"); |
1038 | if (execution.flags.get(bit: ExecutionModeInputLines)) |
1039 | inputs.push_back(t: "lines"); |
1040 | if (execution.flags.get(bit: ExecutionModeInputLinesAdjacency)) |
1041 | inputs.push_back(t: "lines_adjacency"); |
1042 | if (execution.flags.get(bit: ExecutionModeTriangles)) |
1043 | inputs.push_back(t: "triangles"); |
1044 | if (execution.flags.get(bit: ExecutionModeInputTrianglesAdjacency)) |
1045 | inputs.push_back(t: "triangles_adjacency"); |
1046 | |
1047 | if (!execution.geometry_passthrough) |
1048 | { |
1049 | // For passthrough, these are implies and cannot be declared in shader. |
1050 | outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices)); |
1051 | if (execution.flags.get(bit: ExecutionModeOutputTriangleStrip)) |
1052 | outputs.push_back(t: "triangle_strip"); |
1053 | if (execution.flags.get(bit: ExecutionModeOutputPoints)) |
1054 | outputs.push_back(t: "points"); |
1055 | if (execution.flags.get(bit: ExecutionModeOutputLineStrip)) |
1056 | outputs.push_back(t: "line_strip"); |
1057 | } |
1058 | break; |
1059 | |
1060 | case ExecutionModelTessellationControl: |
1061 | if (execution.flags.get(bit: ExecutionModeOutputVertices)) |
1062 | outputs.push_back(t: join(ts: "vertices = ", ts&: execution.output_vertices)); |
1063 | break; |
1064 | |
1065 | case ExecutionModelTessellationEvaluation: |
1066 | if (execution.flags.get(bit: ExecutionModeQuads)) |
1067 | inputs.push_back(t: "quads"); |
1068 | if (execution.flags.get(bit: ExecutionModeTriangles)) |
1069 | inputs.push_back(t: "triangles"); |
1070 | if (execution.flags.get(bit: ExecutionModeIsolines)) |
1071 | inputs.push_back(t: "isolines"); |
1072 | if (execution.flags.get(bit: ExecutionModePointMode)) |
1073 | inputs.push_back(t: "point_mode"); |
1074 | |
1075 | if (!execution.flags.get(bit: ExecutionModeIsolines)) |
1076 | { |
1077 | if (execution.flags.get(bit: ExecutionModeVertexOrderCw)) |
1078 | inputs.push_back(t: "cw"); |
1079 | if (execution.flags.get(bit: ExecutionModeVertexOrderCcw)) |
1080 | inputs.push_back(t: "ccw"); |
1081 | } |
1082 | |
1083 | if (execution.flags.get(bit: ExecutionModeSpacingFractionalEven)) |
1084 | inputs.push_back(t: "fractional_even_spacing"); |
1085 | if (execution.flags.get(bit: ExecutionModeSpacingFractionalOdd)) |
1086 | inputs.push_back(t: "fractional_odd_spacing"); |
1087 | if (execution.flags.get(bit: ExecutionModeSpacingEqual)) |
1088 | inputs.push_back(t: "equal_spacing"); |
1089 | break; |
1090 | |
1091 | case ExecutionModelGLCompute: |
1092 | case ExecutionModelTaskEXT: |
1093 | case ExecutionModelMeshEXT: |
1094 | { |
1095 | if (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId)) |
1096 | { |
1097 | SpecializationConstant wg_x, wg_y, wg_z; |
1098 | get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
1099 | |
1100 | // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro |
1101 | // declarations before we can emit the work group size. |
1102 | if (options.vulkan_semantics || |
1103 | ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) |
1104 | build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z); |
1105 | } |
1106 | else |
1107 | { |
1108 | inputs.push_back(t: join(ts: "local_size_x = ", ts&: execution.workgroup_size.x)); |
1109 | inputs.push_back(t: join(ts: "local_size_y = ", ts&: execution.workgroup_size.y)); |
1110 | inputs.push_back(t: join(ts: "local_size_z = ", ts&: execution.workgroup_size.z)); |
1111 | } |
1112 | |
1113 | if (execution.model == ExecutionModelMeshEXT) |
1114 | { |
1115 | outputs.push_back(t: join(ts: "max_vertices = ", ts&: execution.output_vertices)); |
1116 | outputs.push_back(t: join(ts: "max_primitives = ", ts&: execution.output_primitives)); |
1117 | if (execution.flags.get(bit: ExecutionModeOutputTrianglesEXT)) |
1118 | outputs.push_back(t: "triangles"); |
1119 | else if (execution.flags.get(bit: ExecutionModeOutputLinesEXT)) |
1120 | outputs.push_back(t: "lines"); |
1121 | else if (execution.flags.get(bit: ExecutionModeOutputPoints)) |
1122 | outputs.push_back(t: "points"); |
1123 | } |
1124 | break; |
1125 | } |
1126 | |
1127 | case ExecutionModelFragment: |
1128 | if (options.es) |
1129 | { |
1130 | switch (options.fragment.default_float_precision) |
1131 | { |
1132 | case Options::Lowp: |
1133 | statement(ts: "precision lowp float;"); |
1134 | break; |
1135 | |
1136 | case Options::Mediump: |
1137 | statement(ts: "precision mediump float;"); |
1138 | break; |
1139 | |
1140 | case Options::Highp: |
1141 | statement(ts: "precision highp float;"); |
1142 | break; |
1143 | |
1144 | default: |
1145 | break; |
1146 | } |
1147 | |
1148 | switch (options.fragment.default_int_precision) |
1149 | { |
1150 | case Options::Lowp: |
1151 | statement(ts: "precision lowp int;"); |
1152 | break; |
1153 | |
1154 | case Options::Mediump: |
1155 | statement(ts: "precision mediump int;"); |
1156 | break; |
1157 | |
1158 | case Options::Highp: |
1159 | statement(ts: "precision highp int;"); |
1160 | break; |
1161 | |
1162 | default: |
1163 | break; |
1164 | } |
1165 | } |
1166 | |
1167 | if (execution.flags.get(bit: ExecutionModeEarlyFragmentTests)) |
1168 | inputs.push_back(t: "early_fragment_tests"); |
1169 | if (execution.flags.get(bit: ExecutionModePostDepthCoverage)) |
1170 | inputs.push_back(t: "post_depth_coverage"); |
1171 | |
1172 | if (interlock_used) |
1173 | statement(ts: "#if defined(GL_ARB_fragment_shader_interlock)"); |
1174 | |
1175 | if (execution.flags.get(bit: ExecutionModePixelInterlockOrderedEXT)) |
1176 | statement(ts: "layout(pixel_interlock_ordered) in;"); |
1177 | else if (execution.flags.get(bit: ExecutionModePixelInterlockUnorderedEXT)) |
1178 | statement(ts: "layout(pixel_interlock_unordered) in;"); |
1179 | else if (execution.flags.get(bit: ExecutionModeSampleInterlockOrderedEXT)) |
1180 | statement(ts: "layout(sample_interlock_ordered) in;"); |
1181 | else if (execution.flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT)) |
1182 | statement(ts: "layout(sample_interlock_unordered) in;"); |
1183 | |
1184 | if (interlock_used) |
1185 | { |
1186 | statement(ts: "#elif !defined(GL_INTEL_fragment_shader_ordering)"); |
1187 | statement(ts: "#error Fragment Shader Interlock/Ordering extension missing!"); |
1188 | statement(ts: "#endif"); |
1189 | } |
1190 | |
1191 | if (!options.es && execution.flags.get(bit: ExecutionModeDepthGreater)) |
1192 | statement(ts: "layout(depth_greater) out float gl_FragDepth;"); |
1193 | else if (!options.es && execution.flags.get(bit: ExecutionModeDepthLess)) |
1194 | statement(ts: "layout(depth_less) out float gl_FragDepth;"); |
1195 | |
1196 | break; |
1197 | |
1198 | default: |
1199 | break; |
1200 | } |
1201 | |
1202 | for (auto &cap : ir.declared_capabilities) |
1203 | if (cap == CapabilityRayTraversalPrimitiveCullingKHR) |
1204 | statement(ts: "layout(primitive_culling);"); |
1205 | |
1206 | if (!inputs.empty()) |
1207 | statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;"); |
1208 | if (!outputs.empty()) |
1209 | statement(ts: "layout(", ts: merge(list: outputs), ts: ") out;"); |
1210 | |
1211 | statement(ts: ""); |
1212 | } |
1213 | |
1214 | bool CompilerGLSL::type_is_empty(const SPIRType &type) |
1215 | { |
1216 | return type.basetype == SPIRType::Struct && type.member_types.empty(); |
1217 | } |
1218 | |
1219 | void CompilerGLSL::emit_struct(SPIRType &type) |
1220 | { |
1221 | // Struct types can be stamped out multiple times |
1222 | // with just different offsets, matrix layouts, etc ... |
1223 | // Type-punning with these types is legal, which complicates things |
1224 | // when we are storing struct and array types in an SSBO for example. |
1225 | // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. |
1226 | if (type.type_alias != TypeID(0) && |
1227 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
1228 | return; |
1229 | |
1230 | add_resource_name(id: type.self); |
1231 | auto name = type_to_glsl(type); |
1232 | |
1233 | statement(ts: !backend.explicit_struct_type ? "struct ": "", ts&: name); |
1234 | begin_scope(); |
1235 | |
1236 | type.member_name_cache.clear(); |
1237 | |
1238 | uint32_t i = 0; |
1239 | bool emitted = false; |
1240 | for (auto &member : type.member_types) |
1241 | { |
1242 | add_member_name(type, name: i); |
1243 | emit_struct_member(type, member_type_id: member, index: i); |
1244 | i++; |
1245 | emitted = true; |
1246 | } |
1247 | |
1248 | // Don't declare empty structs in GLSL, this is not allowed. |
1249 | if (type_is_empty(type) && !backend.supports_empty_struct) |
1250 | { |
1251 | statement(ts: "int empty_struct_member;"); |
1252 | emitted = true; |
1253 | } |
1254 | |
1255 | if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationPaddingTarget)) |
1256 | emit_struct_padding_target(type); |
1257 | |
1258 | end_scope_decl(); |
1259 | |
1260 | if (emitted) |
1261 | statement(ts: ""); |
1262 | } |
1263 | |
1264 | string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) |
1265 | { |
1266 | string res; |
1267 | //if (flags & (1ull << DecorationSmooth)) |
1268 | // res += "smooth "; |
1269 | if (flags.get(bit: DecorationFlat)) |
1270 | res += "flat "; |
1271 | if (flags.get(bit: DecorationNoPerspective)) |
1272 | { |
1273 | if (options.es) |
1274 | { |
1275 | if (options.version < 300) |
1276 | SPIRV_CROSS_THROW("noperspective requires ESSL 300."); |
1277 | require_extension_internal(ext: "GL_NV_shader_noperspective_interpolation"); |
1278 | } |
1279 | else if (is_legacy_desktop()) |
1280 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
1281 | res += "noperspective "; |
1282 | } |
1283 | if (flags.get(bit: DecorationCentroid)) |
1284 | res += "centroid "; |
1285 | if (flags.get(bit: DecorationPatch)) |
1286 | res += "patch "; |
1287 | if (flags.get(bit: DecorationSample)) |
1288 | { |
1289 | if (options.es) |
1290 | { |
1291 | if (options.version < 300) |
1292 | SPIRV_CROSS_THROW("sample requires ESSL 300."); |
1293 | else if (options.version < 320) |
1294 | require_extension_internal(ext: "GL_OES_shader_multisample_interpolation"); |
1295 | } |
1296 | res += "sample "; |
1297 | } |
1298 | if (flags.get(bit: DecorationInvariant) && (options.es || options.version >= 120)) |
1299 | res += "invariant "; |
1300 | if (flags.get(bit: DecorationPerPrimitiveEXT)) |
1301 | { |
1302 | res += "perprimitiveEXT "; |
1303 | require_extension_internal(ext: "GL_EXT_mesh_shader"); |
1304 | } |
1305 | |
1306 | if (flags.get(bit: DecorationExplicitInterpAMD)) |
1307 | { |
1308 | require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter"); |
1309 | res += "__explicitInterpAMD "; |
1310 | } |
1311 | |
1312 | if (flags.get(bit: DecorationPerVertexKHR)) |
1313 | { |
1314 | if (options.es && options.version < 320) |
1315 | SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); |
1316 | else if (!options.es && options.version < 450) |
1317 | SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); |
1318 | |
1319 | if (barycentric_is_nv) |
1320 | { |
1321 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric"); |
1322 | res += "pervertexNV "; |
1323 | } |
1324 | else |
1325 | { |
1326 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric"); |
1327 | res += "pervertexEXT "; |
1328 | } |
1329 | } |
1330 | |
1331 | return res; |
1332 | } |
1333 | |
1334 | string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) |
1335 | { |
1336 | if (is_legacy()) |
1337 | return ""; |
1338 | |
1339 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock); |
1340 | if (!is_block) |
1341 | return ""; |
1342 | |
1343 | auto &memb = ir.meta[type.self].members; |
1344 | if (index >= memb.size()) |
1345 | return ""; |
1346 | auto &dec = memb[index]; |
1347 | |
1348 | SmallVector<string> attr; |
1349 | |
1350 | if (has_member_decoration(id: type.self, index, decoration: DecorationPassthroughNV)) |
1351 | attr.push_back(t: "passthrough"); |
1352 | |
1353 | // We can only apply layouts on members in block interfaces. |
1354 | // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. |
1355 | // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct |
1356 | // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. |
1357 | // |
1358 | // We would like to go from (SPIR-V style): |
1359 | // |
1360 | // struct Foo { layout(row_major) mat4 matrix; }; |
1361 | // buffer UBO { Foo foo; }; |
1362 | // |
1363 | // to |
1364 | // |
1365 | // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. |
1366 | // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. |
1367 | auto flags = combined_decoration_for_member(type, index); |
1368 | |
1369 | if (flags.get(bit: DecorationRowMajor)) |
1370 | attr.push_back(t: "row_major"); |
1371 | // We don't emit any global layouts, so column_major is default. |
1372 | //if (flags & (1ull << DecorationColMajor)) |
1373 | // attr.push_back("column_major"); |
1374 | |
1375 | if (dec.decoration_flags.get(bit: DecorationLocation) && can_use_io_location(storage: type.storage, block: true)) |
1376 | attr.push_back(t: join(ts: "location = ", ts&: dec.location)); |
1377 | |
1378 | // Can only declare component if we can declare location. |
1379 | if (dec.decoration_flags.get(bit: DecorationComponent) && can_use_io_location(storage: type.storage, block: true)) |
1380 | { |
1381 | if (!options.es) |
1382 | { |
1383 | if (options.version < 440 && options.version >= 140) |
1384 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
1385 | else if (options.version < 140) |
1386 | SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); |
1387 | attr.push_back(t: join(ts: "component = ", ts&: dec.component)); |
1388 | } |
1389 | else |
1390 | SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); |
1391 | } |
1392 | |
1393 | // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. |
1394 | // This is only done selectively in GLSL as needed. |
1395 | if (has_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset) && |
1396 | dec.decoration_flags.get(bit: DecorationOffset)) |
1397 | attr.push_back(t: join(ts: "offset = ", ts&: dec.offset)); |
1398 | else if (type.storage == StorageClassOutput && dec.decoration_flags.get(bit: DecorationOffset)) |
1399 | attr.push_back(t: join(ts: "xfb_offset = ", ts&: dec.offset)); |
1400 | |
1401 | if (attr.empty()) |
1402 | return ""; |
1403 | |
1404 | string res = "layout("; |
1405 | res += merge(list: attr); |
1406 | res += ") "; |
1407 | return res; |
1408 | } |
1409 | |
1410 | const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) |
1411 | { |
1412 | if (options.es && is_desktop_only_format(format)) |
1413 | SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); |
1414 | |
1415 | switch (format) |
1416 | { |
1417 | case ImageFormatRgba32f: |
1418 | return "rgba32f"; |
1419 | case ImageFormatRgba16f: |
1420 | return "rgba16f"; |
1421 | case ImageFormatR32f: |
1422 | return "r32f"; |
1423 | case ImageFormatRgba8: |
1424 | return "rgba8"; |
1425 | case ImageFormatRgba8Snorm: |
1426 | return "rgba8_snorm"; |
1427 | case ImageFormatRg32f: |
1428 | return "rg32f"; |
1429 | case ImageFormatRg16f: |
1430 | return "rg16f"; |
1431 | case ImageFormatRgba32i: |
1432 | return "rgba32i"; |
1433 | case ImageFormatRgba16i: |
1434 | return "rgba16i"; |
1435 | case ImageFormatR32i: |
1436 | return "r32i"; |
1437 | case ImageFormatRgba8i: |
1438 | return "rgba8i"; |
1439 | case ImageFormatRg32i: |
1440 | return "rg32i"; |
1441 | case ImageFormatRg16i: |
1442 | return "rg16i"; |
1443 | case ImageFormatRgba32ui: |
1444 | return "rgba32ui"; |
1445 | case ImageFormatRgba16ui: |
1446 | return "rgba16ui"; |
1447 | case ImageFormatR32ui: |
1448 | return "r32ui"; |
1449 | case ImageFormatRgba8ui: |
1450 | return "rgba8ui"; |
1451 | case ImageFormatRg32ui: |
1452 | return "rg32ui"; |
1453 | case ImageFormatRg16ui: |
1454 | return "rg16ui"; |
1455 | case ImageFormatR11fG11fB10f: |
1456 | return "r11f_g11f_b10f"; |
1457 | case ImageFormatR16f: |
1458 | return "r16f"; |
1459 | case ImageFormatRgb10A2: |
1460 | return "rgb10_a2"; |
1461 | case ImageFormatR8: |
1462 | return "r8"; |
1463 | case ImageFormatRg8: |
1464 | return "rg8"; |
1465 | case ImageFormatR16: |
1466 | return "r16"; |
1467 | case ImageFormatRg16: |
1468 | return "rg16"; |
1469 | case ImageFormatRgba16: |
1470 | return "rgba16"; |
1471 | case ImageFormatR16Snorm: |
1472 | return "r16_snorm"; |
1473 | case ImageFormatRg16Snorm: |
1474 | return "rg16_snorm"; |
1475 | case ImageFormatRgba16Snorm: |
1476 | return "rgba16_snorm"; |
1477 | case ImageFormatR8Snorm: |
1478 | return "r8_snorm"; |
1479 | case ImageFormatRg8Snorm: |
1480 | return "rg8_snorm"; |
1481 | case ImageFormatR8ui: |
1482 | return "r8ui"; |
1483 | case ImageFormatRg8ui: |
1484 | return "rg8ui"; |
1485 | case ImageFormatR16ui: |
1486 | return "r16ui"; |
1487 | case ImageFormatRgb10a2ui: |
1488 | return "rgb10_a2ui"; |
1489 | case ImageFormatR8i: |
1490 | return "r8i"; |
1491 | case ImageFormatRg8i: |
1492 | return "rg8i"; |
1493 | case ImageFormatR16i: |
1494 | return "r16i"; |
1495 | case ImageFormatR64i: |
1496 | return "r64i"; |
1497 | case ImageFormatR64ui: |
1498 | return "r64ui"; |
1499 | default: |
1500 | case ImageFormatUnknown: |
1501 | return nullptr; |
1502 | } |
1503 | } |
1504 | |
1505 | uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) |
1506 | { |
1507 | switch (type.basetype) |
1508 | { |
1509 | case SPIRType::Double: |
1510 | case SPIRType::Int64: |
1511 | case SPIRType::UInt64: |
1512 | return 8; |
1513 | case SPIRType::Float: |
1514 | case SPIRType::Int: |
1515 | case SPIRType::UInt: |
1516 | return 4; |
1517 | case SPIRType::Half: |
1518 | case SPIRType::Short: |
1519 | case SPIRType::UShort: |
1520 | return 2; |
1521 | case SPIRType::SByte: |
1522 | case SPIRType::UByte: |
1523 | return 1; |
1524 | |
1525 | default: |
1526 | SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); |
1527 | } |
1528 | } |
1529 | |
1530 | uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, |
1531 | BufferPackingStandard packing) |
1532 | { |
1533 | // If using PhysicalStorageBufferEXT storage class, this is a pointer, |
1534 | // and is 64-bit. |
1535 | if (is_physical_pointer(type)) |
1536 | { |
1537 | if (!type.pointer) |
1538 | SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); |
1539 | |
1540 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
1541 | { |
1542 | if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) |
1543 | return 16; |
1544 | else |
1545 | return 8; |
1546 | } |
1547 | else |
1548 | SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); |
1549 | } |
1550 | else if (is_array(type)) |
1551 | { |
1552 | uint32_t minimum_alignment = 1; |
1553 | if (packing_is_vec4_padded(packing)) |
1554 | minimum_alignment = 16; |
1555 | |
1556 | auto *tmp = &get<SPIRType>(id: type.parent_type); |
1557 | while (!tmp->array.empty()) |
1558 | tmp = &get<SPIRType>(id: tmp->parent_type); |
1559 | |
1560 | // Get the alignment of the base type, then maybe round up. |
1561 | return max(a: minimum_alignment, b: type_to_packed_alignment(type: *tmp, flags, packing)); |
1562 | } |
1563 | |
1564 | if (type.basetype == SPIRType::Struct) |
1565 | { |
1566 | // Rule 9. Structs alignments are maximum alignment of its members. |
1567 | uint32_t alignment = 1; |
1568 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1569 | { |
1570 | auto member_flags = ir.meta[type.self].members[i].decoration_flags; |
1571 | alignment = |
1572 | max(a: alignment, b: type_to_packed_alignment(type: get<SPIRType>(id: type.member_types[i]), flags: member_flags, packing)); |
1573 | } |
1574 | |
1575 | // In std140, struct alignment is rounded up to 16. |
1576 | if (packing_is_vec4_padded(packing)) |
1577 | alignment = max<uint32_t>(a: alignment, b: 16u); |
1578 | |
1579 | return alignment; |
1580 | } |
1581 | else |
1582 | { |
1583 | const uint32_t base_alignment = type_to_packed_base_size(type, packing); |
1584 | |
1585 | // Alignment requirement for scalar block layout is always the alignment for the most basic component. |
1586 | if (packing_is_scalar(packing)) |
1587 | return base_alignment; |
1588 | |
1589 | // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle |
1590 | // a vec4, this is handled outside since that part knows our current offset. |
1591 | if (type.columns == 1 && packing_is_hlsl(packing)) |
1592 | return base_alignment; |
1593 | |
1594 | // From 7.6.2.2 in GL 4.5 core spec. |
1595 | // Rule 1 |
1596 | if (type.vecsize == 1 && type.columns == 1) |
1597 | return base_alignment; |
1598 | |
1599 | // Rule 2 |
1600 | if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) |
1601 | return type.vecsize * base_alignment; |
1602 | |
1603 | // Rule 3 |
1604 | if (type.vecsize == 3 && type.columns == 1) |
1605 | return 4 * base_alignment; |
1606 | |
1607 | // Rule 4 implied. Alignment does not change in std430. |
1608 | |
1609 | // Rule 5. Column-major matrices are stored as arrays of |
1610 | // vectors. |
1611 | if (flags.get(bit: DecorationColMajor) && type.columns > 1) |
1612 | { |
1613 | if (packing_is_vec4_padded(packing)) |
1614 | return 4 * base_alignment; |
1615 | else if (type.vecsize == 3) |
1616 | return 4 * base_alignment; |
1617 | else |
1618 | return type.vecsize * base_alignment; |
1619 | } |
1620 | |
1621 | // Rule 6 implied. |
1622 | |
1623 | // Rule 7. |
1624 | if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1) |
1625 | { |
1626 | if (packing_is_vec4_padded(packing)) |
1627 | return 4 * base_alignment; |
1628 | else if (type.columns == 3) |
1629 | return 4 * base_alignment; |
1630 | else |
1631 | return type.columns * base_alignment; |
1632 | } |
1633 | |
1634 | // Rule 8 implied. |
1635 | } |
1636 | |
1637 | SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); |
1638 | } |
1639 | |
1640 | uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, |
1641 | BufferPackingStandard packing) |
1642 | { |
1643 | // Array stride is equal to aligned size of the underlying type. |
1644 | uint32_t parent = type.parent_type; |
1645 | assert(parent); |
1646 | |
1647 | auto &tmp = get<SPIRType>(id: parent); |
1648 | |
1649 | uint32_t size = type_to_packed_size(type: tmp, flags, packing); |
1650 | uint32_t alignment = type_to_packed_alignment(type, flags, packing); |
1651 | return (size + alignment - 1) & ~(alignment - 1); |
1652 | } |
1653 | |
1654 | uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) |
1655 | { |
1656 | // If using PhysicalStorageBufferEXT storage class, this is a pointer, |
1657 | // and is 64-bit. |
1658 | if (is_physical_pointer(type)) |
1659 | { |
1660 | if (!type.pointer) |
1661 | SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); |
1662 | |
1663 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
1664 | return 8; |
1665 | else |
1666 | SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); |
1667 | } |
1668 | else if (is_array(type)) |
1669 | { |
1670 | uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); |
1671 | |
1672 | // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, |
1673 | // so that it is possible to pack other vectors into the last element. |
1674 | if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) |
1675 | packed_size -= (4 - type.vecsize) * (type.width / 8); |
1676 | |
1677 | return packed_size; |
1678 | } |
1679 | |
1680 | uint32_t size = 0; |
1681 | |
1682 | if (type.basetype == SPIRType::Struct) |
1683 | { |
1684 | uint32_t pad_alignment = 1; |
1685 | |
1686 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1687 | { |
1688 | auto member_flags = ir.meta[type.self].members[i].decoration_flags; |
1689 | auto &member_type = get<SPIRType>(id: type.member_types[i]); |
1690 | |
1691 | uint32_t packed_alignment = type_to_packed_alignment(type: member_type, flags: member_flags, packing); |
1692 | uint32_t alignment = max(a: packed_alignment, b: pad_alignment); |
1693 | |
1694 | // The next member following a struct member is aligned to the base alignment of the struct that came before. |
1695 | // GL 4.5 spec, 7.6.2.2. |
1696 | if (member_type.basetype == SPIRType::Struct) |
1697 | pad_alignment = packed_alignment; |
1698 | else |
1699 | pad_alignment = 1; |
1700 | |
1701 | size = (size + alignment - 1) & ~(alignment - 1); |
1702 | size += type_to_packed_size(type: member_type, flags: member_flags, packing); |
1703 | } |
1704 | } |
1705 | else |
1706 | { |
1707 | const uint32_t base_alignment = type_to_packed_base_size(type, packing); |
1708 | |
1709 | if (packing_is_scalar(packing)) |
1710 | { |
1711 | size = type.vecsize * type.columns * base_alignment; |
1712 | } |
1713 | else |
1714 | { |
1715 | if (type.columns == 1) |
1716 | size = type.vecsize * base_alignment; |
1717 | |
1718 | if (flags.get(bit: DecorationColMajor) && type.columns > 1) |
1719 | { |
1720 | if (packing_is_vec4_padded(packing)) |
1721 | size = type.columns * 4 * base_alignment; |
1722 | else if (type.vecsize == 3) |
1723 | size = type.columns * 4 * base_alignment; |
1724 | else |
1725 | size = type.columns * type.vecsize * base_alignment; |
1726 | } |
1727 | |
1728 | if (flags.get(bit: DecorationRowMajor) && type.vecsize > 1) |
1729 | { |
1730 | if (packing_is_vec4_padded(packing)) |
1731 | size = type.vecsize * 4 * base_alignment; |
1732 | else if (type.columns == 3) |
1733 | size = type.vecsize * 4 * base_alignment; |
1734 | else |
1735 | size = type.vecsize * type.columns * base_alignment; |
1736 | } |
1737 | |
1738 | // For matrices in HLSL, the last element has a size which depends on its vector size, |
1739 | // so that it is possible to pack other vectors into the last element. |
1740 | if (packing_is_hlsl(packing) && type.columns > 1) |
1741 | size -= (4 - type.vecsize) * (type.width / 8); |
1742 | } |
1743 | } |
1744 | |
1745 | return size; |
1746 | } |
1747 | |
1748 | bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, |
1749 | uint32_t *failed_validation_index, uint32_t start_offset, |
1750 | uint32_t end_offset) |
1751 | { |
1752 | // This is very tricky and error prone, but try to be exhaustive and correct here. |
1753 | // SPIR-V doesn't directly say if we're using std430 or std140. |
1754 | // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), |
1755 | // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. |
1756 | // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). |
1757 | // |
1758 | // It is almost certain that we're using std430, but it gets tricky with arrays in particular. |
1759 | // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. |
1760 | // |
1761 | // The only two differences between std140 and std430 are related to padding alignment/array stride |
1762 | // in arrays and structs. In std140 they take minimum vec4 alignment. |
1763 | // std430 only removes the vec4 requirement. |
1764 | |
1765 | uint32_t offset = 0; |
1766 | uint32_t pad_alignment = 1; |
1767 | |
1768 | bool is_top_level_block = |
1769 | has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock); |
1770 | |
1771 | for (uint32_t i = 0; i < type.member_types.size(); i++) |
1772 | { |
1773 | auto &memb_type = get<SPIRType>(id: type.member_types[i]); |
1774 | |
1775 | auto *type_meta = ir.find_meta(id: type.self); |
1776 | auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{}; |
1777 | |
1778 | // Verify alignment rules. |
1779 | uint32_t packed_alignment = type_to_packed_alignment(type: memb_type, flags: member_flags, packing); |
1780 | |
1781 | // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: |
1782 | // layout(constant_id = 0) const int s = 10; |
1783 | // const int S = s + 5; // SpecConstantOp |
1784 | // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, |
1785 | // we would need full implementation of compile-time constant folding. :( |
1786 | // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant |
1787 | // for our analysis (e.g. unsized arrays). |
1788 | // This lets us simply ignore that there are spec constant op sized arrays in our buffers. |
1789 | // Querying size of this member will fail, so just don't call it unless we have to. |
1790 | // |
1791 | // This is likely "best effort" we can support without going into unacceptably complicated workarounds. |
1792 | bool member_can_be_unsized = |
1793 | is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); |
1794 | |
1795 | uint32_t packed_size = 0; |
1796 | if (!member_can_be_unsized || packing_is_hlsl(packing)) |
1797 | packed_size = type_to_packed_size(type: memb_type, flags: member_flags, packing); |
1798 | |
1799 | // We only need to care about this if we have non-array types which can straddle the vec4 boundary. |
1800 | uint32_t actual_offset = type_struct_member_offset(type, index: i); |
1801 | |
1802 | if (packing_is_hlsl(packing)) |
1803 | { |
1804 | // If a member straddles across a vec4 boundary, alignment is actually vec4. |
1805 | uint32_t target_offset; |
1806 | |
1807 | // If we intend to use explicit packing, we must check for improper straddle with that offset. |
1808 | // In implicit packing, we must check with implicit offset, since the explicit offset |
1809 | // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4. |
1810 | // This is important when packing sub-structs that don't support packoffset(). |
1811 | if (packing_has_flexible_offset(packing)) |
1812 | target_offset = actual_offset; |
1813 | else |
1814 | target_offset = offset; |
1815 | |
1816 | uint32_t begin_word = target_offset / 16; |
1817 | uint32_t end_word = (target_offset + packed_size - 1) / 16; |
1818 | |
1819 | if (begin_word != end_word) |
1820 | packed_alignment = max<uint32_t>(a: packed_alignment, b: 16u); |
1821 | } |
1822 | |
1823 | // Field is not in the specified range anymore and we can ignore any further fields. |
1824 | if (actual_offset >= end_offset) |
1825 | break; |
1826 | |
1827 | uint32_t alignment = max(a: packed_alignment, b: pad_alignment); |
1828 | offset = (offset + alignment - 1) & ~(alignment - 1); |
1829 | |
1830 | // The next member following a struct member is aligned to the base alignment of the struct that came before. |
1831 | // GL 4.5 spec, 7.6.2.2. |
1832 | if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) |
1833 | pad_alignment = packed_alignment; |
1834 | else |
1835 | pad_alignment = 1; |
1836 | |
1837 | // Only care about packing if we are in the given range |
1838 | if (actual_offset >= start_offset) |
1839 | { |
1840 | // We only care about offsets in std140, std430, etc ... |
1841 | // For EnhancedLayout variants, we have the flexibility to choose our own offsets. |
1842 | if (!packing_has_flexible_offset(packing)) |
1843 | { |
1844 | if (actual_offset != offset) // This cannot be the packing we're looking for. |
1845 | { |
1846 | if (failed_validation_index) |
1847 | *failed_validation_index = i; |
1848 | return false; |
1849 | } |
1850 | } |
1851 | else if ((actual_offset & (alignment - 1)) != 0) |
1852 | { |
1853 | // We still need to verify that alignment rules are observed, even if we have explicit offset. |
1854 | if (failed_validation_index) |
1855 | *failed_validation_index = i; |
1856 | return false; |
1857 | } |
1858 | |
1859 | // Verify array stride rules. |
1860 | if (is_array(type: memb_type) && |
1861 | type_to_packed_array_stride(type: memb_type, flags: member_flags, packing) != |
1862 | type_struct_member_array_stride(type, index: i)) |
1863 | { |
1864 | if (failed_validation_index) |
1865 | *failed_validation_index = i; |
1866 | return false; |
1867 | } |
1868 | |
1869 | // Verify that sub-structs also follow packing rules. |
1870 | // We cannot use enhanced layouts on substructs, so they better be up to spec. |
1871 | auto substruct_packing = packing_to_substruct_packing(packing); |
1872 | |
1873 | if (!memb_type.pointer && !memb_type.member_types.empty() && |
1874 | !buffer_is_packing_standard(type: memb_type, packing: substruct_packing)) |
1875 | { |
1876 | if (failed_validation_index) |
1877 | *failed_validation_index = i; |
1878 | return false; |
1879 | } |
1880 | } |
1881 | |
1882 | // Bump size. |
1883 | offset = actual_offset + packed_size; |
1884 | } |
1885 | |
1886 | return true; |
1887 | } |
1888 | |
1889 | bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) |
1890 | { |
1891 | // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. |
1892 | // Be very explicit here about how to solve the issue. |
1893 | if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || |
1894 | (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) |
1895 | { |
1896 | uint32_t minimum_desktop_version = block ? 440 : 410; |
1897 | // ARB_enhanced_layouts vs ARB_separate_shader_objects ... |
1898 | |
1899 | if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) |
1900 | return false; |
1901 | else if (options.es && options.version < 310) |
1902 | return false; |
1903 | } |
1904 | |
1905 | if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || |
1906 | (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) |
1907 | { |
1908 | if (options.es && options.version < 300) |
1909 | return false; |
1910 | else if (!options.es && options.version < 330) |
1911 | return false; |
1912 | } |
1913 | |
1914 | if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) |
1915 | { |
1916 | if (options.es && options.version < 310) |
1917 | return false; |
1918 | else if (!options.es && options.version < 430) |
1919 | return false; |
1920 | } |
1921 | |
1922 | return true; |
1923 | } |
1924 | |
1925 | string CompilerGLSL::layout_for_variable(const SPIRVariable &var) |
1926 | { |
1927 | // FIXME: Come up with a better solution for when to disable layouts. |
1928 | // Having layouts depend on extensions as well as which types |
1929 | // of layouts are used. For now, the simple solution is to just disable |
1930 | // layouts for legacy versions. |
1931 | if (is_legacy()) |
1932 | return ""; |
1933 | |
1934 | if (subpass_input_is_framebuffer_fetch(id: var.self)) |
1935 | return ""; |
1936 | |
1937 | SmallVector<string> attr; |
1938 | |
1939 | auto &type = get<SPIRType>(id: var.basetype); |
1940 | auto &flags = get_decoration_bitset(id: var.self); |
1941 | auto &typeflags = get_decoration_bitset(id: type.self); |
1942 | |
1943 | if (flags.get(bit: DecorationPassthroughNV)) |
1944 | attr.push_back(t: "passthrough"); |
1945 | |
1946 | if (options.vulkan_semantics && var.storage == StorageClassPushConstant) |
1947 | attr.push_back(t: "push_constant"); |
1948 | else if (var.storage == StorageClassShaderRecordBufferKHR) |
1949 | attr.push_back(t: ray_tracing_is_khr ? "shaderRecordEXT": "shaderRecordNV"); |
1950 | |
1951 | if (flags.get(bit: DecorationRowMajor)) |
1952 | attr.push_back(t: "row_major"); |
1953 | if (flags.get(bit: DecorationColMajor)) |
1954 | attr.push_back(t: "column_major"); |
1955 | |
1956 | if (options.vulkan_semantics) |
1957 | { |
1958 | if (flags.get(bit: DecorationInputAttachmentIndex)) |
1959 | attr.push_back(t: join(ts: "input_attachment_index = ", ts: get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex))); |
1960 | } |
1961 | |
1962 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
1963 | if (flags.get(bit: DecorationLocation) && can_use_io_location(storage: var.storage, block: is_block)) |
1964 | { |
1965 | Bitset combined_decoration; |
1966 | for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) |
1967 | combined_decoration.merge_or(other: combined_decoration_for_member(type, index: i)); |
1968 | |
1969 | // If our members have location decorations, we don't need to |
1970 | // emit location decorations at the top as well (looks weird). |
1971 | if (!combined_decoration.get(bit: DecorationLocation)) |
1972 | attr.push_back(t: join(ts: "location = ", ts: get_decoration(id: var.self, decoration: DecorationLocation))); |
1973 | } |
1974 | |
1975 | if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput && |
1976 | location_is_non_coherent_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation))) |
1977 | { |
1978 | attr.push_back(t: "noncoherent"); |
1979 | } |
1980 | |
1981 | // Transform feedback |
1982 | bool uses_enhanced_layouts = false; |
1983 | if (is_block && var.storage == StorageClassOutput) |
1984 | { |
1985 | // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, |
1986 | // since all members must match the same xfb_buffer. The only thing we will declare for members of the block |
1987 | // is the xfb_offset. |
1988 | uint32_t member_count = uint32_t(type.member_types.size()); |
1989 | bool have_xfb_buffer_stride = false; |
1990 | bool have_any_xfb_offset = false; |
1991 | bool have_geom_stream = false; |
1992 | uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; |
1993 | |
1994 | if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride)) |
1995 | { |
1996 | have_xfb_buffer_stride = true; |
1997 | xfb_buffer = get_decoration(id: var.self, decoration: DecorationXfbBuffer); |
1998 | xfb_stride = get_decoration(id: var.self, decoration: DecorationXfbStride); |
1999 | } |
2000 | |
2001 | if (flags.get(bit: DecorationStream)) |
2002 | { |
2003 | have_geom_stream = true; |
2004 | geom_stream = get_decoration(id: var.self, decoration: DecorationStream); |
2005 | } |
2006 | |
2007 | // Verify that none of the members violate our assumption. |
2008 | for (uint32_t i = 0; i < member_count; i++) |
2009 | { |
2010 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationStream)) |
2011 | { |
2012 | uint32_t member_geom_stream = get_member_decoration(id: type.self, index: i, decoration: DecorationStream); |
2013 | if (have_geom_stream && member_geom_stream != geom_stream) |
2014 | SPIRV_CROSS_THROW("IO block member Stream mismatch."); |
2015 | have_geom_stream = true; |
2016 | geom_stream = member_geom_stream; |
2017 | } |
2018 | |
2019 | // Only members with an Offset decoration participate in XFB. |
2020 | if (!has_member_decoration(id: type.self, index: i, decoration: DecorationOffset)) |
2021 | continue; |
2022 | have_any_xfb_offset = true; |
2023 | |
2024 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer)) |
2025 | { |
2026 | uint32_t buffer_index = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbBuffer); |
2027 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
2028 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); |
2029 | have_xfb_buffer_stride = true; |
2030 | xfb_buffer = buffer_index; |
2031 | } |
2032 | |
2033 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride)) |
2034 | { |
2035 | uint32_t stride = get_member_decoration(id: type.self, index: i, decoration: DecorationXfbStride); |
2036 | if (have_xfb_buffer_stride && stride != xfb_stride) |
2037 | SPIRV_CROSS_THROW("IO block member XfbStride mismatch."); |
2038 | have_xfb_buffer_stride = true; |
2039 | xfb_stride = stride; |
2040 | } |
2041 | } |
2042 | |
2043 | if (have_xfb_buffer_stride && have_any_xfb_offset) |
2044 | { |
2045 | attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer)); |
2046 | attr.push_back(t: join(ts: "xfb_stride = ", ts&: xfb_stride)); |
2047 | uses_enhanced_layouts = true; |
2048 | } |
2049 | |
2050 | if (have_geom_stream) |
2051 | { |
2052 | if (get_execution_model() != ExecutionModelGeometry) |
2053 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); |
2054 | if (options.es) |
2055 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); |
2056 | if (options.version < 400) |
2057 | require_extension_internal(ext: "GL_ARB_transform_feedback3"); |
2058 | attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream))); |
2059 | } |
2060 | } |
2061 | else if (var.storage == StorageClassOutput) |
2062 | { |
2063 | if (flags.get(bit: DecorationXfbBuffer) && flags.get(bit: DecorationXfbStride) && flags.get(bit: DecorationOffset)) |
2064 | { |
2065 | // XFB for standalone variables, we can emit all decorations. |
2066 | attr.push_back(t: join(ts: "xfb_buffer = ", ts: get_decoration(id: var.self, decoration: DecorationXfbBuffer))); |
2067 | attr.push_back(t: join(ts: "xfb_stride = ", ts: get_decoration(id: var.self, decoration: DecorationXfbStride))); |
2068 | attr.push_back(t: join(ts: "xfb_offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset))); |
2069 | uses_enhanced_layouts = true; |
2070 | } |
2071 | |
2072 | if (flags.get(bit: DecorationStream)) |
2073 | { |
2074 | if (get_execution_model() != ExecutionModelGeometry) |
2075 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); |
2076 | if (options.es) |
2077 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); |
2078 | if (options.version < 400) |
2079 | require_extension_internal(ext: "GL_ARB_transform_feedback3"); |
2080 | attr.push_back(t: join(ts: "stream = ", ts: get_decoration(id: var.self, decoration: DecorationStream))); |
2081 | } |
2082 | } |
2083 | |
2084 | // Can only declare Component if we can declare location. |
2085 | if (flags.get(bit: DecorationComponent) && can_use_io_location(storage: var.storage, block: is_block)) |
2086 | { |
2087 | uses_enhanced_layouts = true; |
2088 | attr.push_back(t: join(ts: "component = ", ts: get_decoration(id: var.self, decoration: DecorationComponent))); |
2089 | } |
2090 | |
2091 | if (uses_enhanced_layouts) |
2092 | { |
2093 | if (!options.es) |
2094 | { |
2095 | if (options.version < 440 && options.version >= 140) |
2096 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
2097 | else if (options.version < 140) |
2098 | SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40."); |
2099 | if (!options.es && options.version < 440) |
2100 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
2101 | } |
2102 | else if (options.es) |
2103 | SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL."); |
2104 | } |
2105 | |
2106 | if (flags.get(bit: DecorationIndex)) |
2107 | attr.push_back(t: join(ts: "index = ", ts: get_decoration(id: var.self, decoration: DecorationIndex))); |
2108 | |
2109 | // Do not emit set = decoration in regular GLSL output, but |
2110 | // we need to preserve it in Vulkan GLSL mode. |
2111 | if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR) |
2112 | { |
2113 | if (flags.get(bit: DecorationDescriptorSet) && options.vulkan_semantics) |
2114 | attr.push_back(t: join(ts: "set = ", ts: get_decoration(id: var.self, decoration: DecorationDescriptorSet))); |
2115 | } |
2116 | |
2117 | bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; |
2118 | bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || |
2119 | (var.storage == StorageClassUniform && typeflags.get(bit: DecorationBufferBlock)); |
2120 | bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; |
2121 | bool ubo_block = var.storage == StorageClassUniform && typeflags.get(bit: DecorationBlock); |
2122 | |
2123 | // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... |
2124 | bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); |
2125 | |
2126 | // pretend no UBOs when options say so |
2127 | if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) |
2128 | can_use_buffer_blocks = false; |
2129 | |
2130 | bool can_use_binding; |
2131 | if (options.es) |
2132 | can_use_binding = options.version >= 310; |
2133 | else |
2134 | can_use_binding = options.enable_420pack_extension || (options.version >= 420); |
2135 | |
2136 | // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. |
2137 | if (!can_use_buffer_blocks && var.storage == StorageClassUniform) |
2138 | can_use_binding = false; |
2139 | |
2140 | if (var.storage == StorageClassShaderRecordBufferKHR) |
2141 | can_use_binding = false; |
2142 | |
2143 | if (can_use_binding && flags.get(bit: DecorationBinding)) |
2144 | attr.push_back(t: join(ts: "binding = ", ts: get_decoration(id: var.self, decoration: DecorationBinding))); |
2145 | |
2146 | if (var.storage != StorageClassOutput && flags.get(bit: DecorationOffset)) |
2147 | attr.push_back(t: join(ts: "offset = ", ts: get_decoration(id: var.self, decoration: DecorationOffset))); |
2148 | |
2149 | // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. |
2150 | // If SPIR-V does not comply with either layout, we cannot really work around it. |
2151 | if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) |
2152 | { |
2153 | attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: false, support_enhanced_layouts: true)); |
2154 | } |
2155 | else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) |
2156 | { |
2157 | attr.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true)); |
2158 | } |
2159 | |
2160 | // For images, the type itself adds a layout qualifer. |
2161 | // Only emit the format for storage images. |
2162 | if (type.basetype == SPIRType::Image && type.image.sampled == 2) |
2163 | { |
2164 | const char *fmt = format_to_glsl(format: type.image.format); |
2165 | if (fmt) |
2166 | attr.push_back(t: fmt); |
2167 | } |
2168 | |
2169 | if (attr.empty()) |
2170 | return ""; |
2171 | |
2172 | string res = "layout("; |
2173 | res += merge(list: attr); |
2174 | res += ") "; |
2175 | return res; |
2176 | } |
2177 | |
2178 | string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, |
2179 | bool support_std430_without_scalar_layout, |
2180 | bool support_enhanced_layouts) |
2181 | { |
2182 | if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, packing: BufferPackingStd430)) |
2183 | return "std430"; |
2184 | else if (buffer_is_packing_standard(type, packing: BufferPackingStd140)) |
2185 | return "std140"; |
2186 | else if (options.vulkan_semantics && buffer_is_packing_standard(type, packing: BufferPackingScalar)) |
2187 | { |
2188 | require_extension_internal(ext: "GL_EXT_scalar_block_layout"); |
2189 | return "scalar"; |
2190 | } |
2191 | else if (support_std430_without_scalar_layout && |
2192 | support_enhanced_layouts && |
2193 | buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout)) |
2194 | { |
2195 | if (options.es && !options.vulkan_semantics) |
2196 | SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " |
2197 | "not support GL_ARB_enhanced_layouts."); |
2198 | if (!options.es && !options.vulkan_semantics && options.version < 440) |
2199 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
2200 | |
2201 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2202 | return "std430"; |
2203 | } |
2204 | else if (support_enhanced_layouts && |
2205 | buffer_is_packing_standard(type, packing: BufferPackingStd140EnhancedLayout)) |
2206 | { |
2207 | // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, |
2208 | // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. |
2209 | // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. |
2210 | if (options.es && !options.vulkan_semantics) |
2211 | SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " |
2212 | "not support GL_ARB_enhanced_layouts."); |
2213 | if (!options.es && !options.vulkan_semantics && options.version < 440) |
2214 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
2215 | |
2216 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2217 | return "std140"; |
2218 | } |
2219 | else if (options.vulkan_semantics && |
2220 | support_enhanced_layouts && |
2221 | buffer_is_packing_standard(type, packing: BufferPackingScalarEnhancedLayout)) |
2222 | { |
2223 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2224 | require_extension_internal(ext: "GL_EXT_scalar_block_layout"); |
2225 | return "scalar"; |
2226 | } |
2227 | else if (!support_std430_without_scalar_layout && options.vulkan_semantics && |
2228 | buffer_is_packing_standard(type, packing: BufferPackingStd430)) |
2229 | { |
2230 | // UBOs can support std430 with GL_EXT_scalar_block_layout. |
2231 | require_extension_internal(ext: "GL_EXT_scalar_block_layout"); |
2232 | return "std430"; |
2233 | } |
2234 | else if (!support_std430_without_scalar_layout && options.vulkan_semantics && |
2235 | support_enhanced_layouts && |
2236 | buffer_is_packing_standard(type, packing: BufferPackingStd430EnhancedLayout)) |
2237 | { |
2238 | // UBOs can support std430 with GL_EXT_scalar_block_layout. |
2239 | set_extended_decoration(id: type.self, decoration: SPIRVCrossDecorationExplicitOffset); |
2240 | require_extension_internal(ext: "GL_EXT_scalar_block_layout"); |
2241 | return "std430"; |
2242 | } |
2243 | else |
2244 | { |
2245 | SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " |
2246 | "layouts. You can try flattening this block to support a more flexible layout."); |
2247 | } |
2248 | } |
2249 | |
2250 | void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) |
2251 | { |
2252 | if (flattened_buffer_blocks.count(x: var.self)) |
2253 | emit_buffer_block_flattened(type: var); |
2254 | else if (options.vulkan_semantics) |
2255 | emit_push_constant_block_vulkan(var); |
2256 | else if (options.emit_push_constant_as_uniform_buffer) |
2257 | emit_buffer_block_native(var); |
2258 | else |
2259 | emit_push_constant_block_glsl(var); |
2260 | } |
2261 | |
2262 | void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) |
2263 | { |
2264 | emit_buffer_block(type: var); |
2265 | } |
2266 | |
2267 | void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) |
2268 | { |
2269 | // OpenGL has no concept of push constant blocks, implement it as a uniform struct. |
2270 | auto &type = get<SPIRType>(id: var.basetype); |
2271 | |
2272 | unset_decoration(id: var.self, decoration: DecorationBinding); |
2273 | unset_decoration(id: var.self, decoration: DecorationDescriptorSet); |
2274 | |
2275 | #if 0 |
2276 | if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) |
2277 | SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " |
2278 | "Remap to location with reflection API first or disable these decorations."); |
2279 | #endif |
2280 | |
2281 | // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. |
2282 | // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. |
2283 | bool block_flag = has_decoration(id: type.self, decoration: DecorationBlock); |
2284 | unset_decoration(id: type.self, decoration: DecorationBlock); |
2285 | |
2286 | emit_struct(type); |
2287 | |
2288 | if (block_flag) |
2289 | set_decoration(id: type.self, decoration: DecorationBlock); |
2290 | |
2291 | emit_uniform(var); |
2292 | statement(ts: ""); |
2293 | } |
2294 | |
2295 | void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) |
2296 | { |
2297 | auto &type = get<SPIRType>(id: var.basetype); |
2298 | bool ubo_block = var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock); |
2299 | |
2300 | if (flattened_buffer_blocks.count(x: var.self)) |
2301 | emit_buffer_block_flattened(type: var); |
2302 | else if (is_legacy() || (!options.es && options.version == 130) || |
2303 | (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) |
2304 | emit_buffer_block_legacy(var); |
2305 | else |
2306 | emit_buffer_block_native(var); |
2307 | } |
2308 | |
2309 | void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) |
2310 | { |
2311 | auto &type = get<SPIRType>(id: var.basetype); |
2312 | bool ssbo = var.storage == StorageClassStorageBuffer || |
2313 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
2314 | if (ssbo) |
2315 | SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); |
2316 | |
2317 | // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. |
2318 | // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. |
2319 | auto &block_flags = ir.meta[type.self].decoration.decoration_flags; |
2320 | bool block_flag = block_flags.get(bit: DecorationBlock); |
2321 | block_flags.clear(bit: DecorationBlock); |
2322 | emit_struct(type); |
2323 | if (block_flag) |
2324 | block_flags.set(DecorationBlock); |
2325 | emit_uniform(var); |
2326 | statement(ts: ""); |
2327 | } |
2328 | |
2329 | void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration) |
2330 | { |
2331 | auto &type = get<SPIRType>(id: type_id); |
2332 | string buffer_name; |
2333 | |
2334 | if (forward_declaration && is_physical_pointer_to_buffer_block(type)) |
2335 | { |
2336 | // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... |
2337 | // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. |
2338 | // The names must match up. |
2339 | buffer_name = to_name(id: type.self, allow_alias: false); |
2340 | |
2341 | // Shaders never use the block by interface name, so we don't |
2342 | // have to track this other than updating name caches. |
2343 | // If we have a collision for any reason, just fallback immediately. |
2344 | if (ir.meta[type.self].decoration.alias.empty() || |
2345 | block_ssbo_names.find(x: buffer_name) != end(cont&: block_ssbo_names) || |
2346 | resource_names.find(x: buffer_name) != end(cont&: resource_names)) |
2347 | { |
2348 | buffer_name = join(ts: "_", ts&: type.self); |
2349 | } |
2350 | |
2351 | // Make sure we get something unique for both global name scope and block name scope. |
2352 | // See GLSL 4.5 spec: section 4.3.9 for details. |
2353 | add_variable(variables_primary&: block_ssbo_names, variables_secondary: resource_names, name&: buffer_name); |
2354 | |
2355 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2356 | // This cannot conflict with anything else, so we're safe now. |
2357 | // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. |
2358 | if (buffer_name.empty()) |
2359 | buffer_name = join(ts: "_", ts&: type.self); |
2360 | |
2361 | block_names.insert(x: buffer_name); |
2362 | block_ssbo_names.insert(x: buffer_name); |
2363 | |
2364 | // Ensure we emit the correct name when emitting non-forward pointer type. |
2365 | ir.meta[type.self].decoration.alias = buffer_name; |
2366 | } |
2367 | else |
2368 | { |
2369 | buffer_name = type_to_glsl(type); |
2370 | } |
2371 | |
2372 | if (!forward_declaration) |
2373 | { |
2374 | auto itr = physical_storage_type_to_alignment.find(x: type_id); |
2375 | uint32_t alignment = 0; |
2376 | if (itr != physical_storage_type_to_alignment.end()) |
2377 | alignment = itr->second.alignment; |
2378 | |
2379 | if (is_physical_pointer_to_buffer_block(type)) |
2380 | { |
2381 | SmallVector<std::string> attributes; |
2382 | attributes.push_back(t: "buffer_reference"); |
2383 | if (alignment) |
2384 | attributes.push_back(t: join(ts: "buffer_reference_align = ", ts&: alignment)); |
2385 | attributes.push_back(t: buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: true)); |
2386 | |
2387 | auto flags = ir.get_buffer_block_type_flags(type); |
2388 | string decorations; |
2389 | if (flags.get(bit: DecorationRestrict)) |
2390 | decorations += " restrict"; |
2391 | if (flags.get(bit: DecorationCoherent)) |
2392 | decorations += " coherent"; |
2393 | if (flags.get(bit: DecorationNonReadable)) |
2394 | decorations += " writeonly"; |
2395 | if (flags.get(bit: DecorationNonWritable)) |
2396 | decorations += " readonly"; |
2397 | |
2398 | statement(ts: "layout(", ts: merge(list: attributes), ts: ")", ts&: decorations, ts: " buffer ", ts&: buffer_name); |
2399 | } |
2400 | else |
2401 | { |
2402 | string packing_standard; |
2403 | if (type.basetype == SPIRType::Struct) |
2404 | { |
2405 | // The non-block type is embedded in a block, so we cannot use enhanced layouts :( |
2406 | packing_standard = buffer_to_packing_standard(type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", "; |
2407 | } |
2408 | else if (is_array(type: get_pointee_type(type))) |
2409 | { |
2410 | SPIRType wrap_type{OpTypeStruct}; |
2411 | wrap_type.self = ir.increase_bound_by(count: 1); |
2412 | wrap_type.member_types.push_back(t: get_pointee_type_id(type_id)); |
2413 | ir.set_member_decoration(id: wrap_type.self, index: 0, decoration: DecorationOffset, argument: 0); |
2414 | packing_standard = buffer_to_packing_standard(type: wrap_type, support_std430_without_scalar_layout: true, support_enhanced_layouts: false) + ", "; |
2415 | } |
2416 | |
2417 | if (alignment) |
2418 | statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference, buffer_reference_align = ", ts&: alignment, ts: ") buffer ", ts&: buffer_name); |
2419 | else |
2420 | statement(ts: "layout(", ts&: packing_standard, ts: "buffer_reference) buffer ", ts&: buffer_name); |
2421 | } |
2422 | |
2423 | begin_scope(); |
2424 | |
2425 | if (is_physical_pointer_to_buffer_block(type)) |
2426 | { |
2427 | type.member_name_cache.clear(); |
2428 | |
2429 | uint32_t i = 0; |
2430 | for (auto &member : type.member_types) |
2431 | { |
2432 | add_member_name(type, name: i); |
2433 | emit_struct_member(type, member_type_id: member, index: i); |
2434 | i++; |
2435 | } |
2436 | } |
2437 | else |
2438 | { |
2439 | auto &pointee_type = get_pointee_type(type); |
2440 | statement(ts: type_to_glsl(type: pointee_type), ts: " value", ts: type_to_array_glsl(type: pointee_type, variable_id: 0), ts: ";"); |
2441 | } |
2442 | |
2443 | end_scope_decl(); |
2444 | statement(ts: ""); |
2445 | } |
2446 | else |
2447 | { |
2448 | statement(ts: "layout(buffer_reference) buffer ", ts&: buffer_name, ts: ";"); |
2449 | } |
2450 | } |
2451 | |
2452 | void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) |
2453 | { |
2454 | auto &type = get<SPIRType>(id: var.basetype); |
2455 | |
2456 | Bitset flags = ir.get_buffer_block_flags(var); |
2457 | bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || |
2458 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
2459 | bool is_restrict = ssbo && flags.get(bit: DecorationRestrict); |
2460 | bool is_writeonly = ssbo && flags.get(bit: DecorationNonReadable); |
2461 | bool is_readonly = ssbo && flags.get(bit: DecorationNonWritable); |
2462 | bool is_coherent = ssbo && flags.get(bit: DecorationCoherent); |
2463 | |
2464 | // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... |
2465 | auto buffer_name = to_name(id: type.self, allow_alias: false); |
2466 | |
2467 | auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; |
2468 | |
2469 | // Shaders never use the block by interface name, so we don't |
2470 | // have to track this other than updating name caches. |
2471 | // If we have a collision for any reason, just fallback immediately. |
2472 | if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(x: buffer_name) != end(cont&: block_namespace) || |
2473 | resource_names.find(x: buffer_name) != end(cont&: resource_names)) |
2474 | { |
2475 | buffer_name = get_block_fallback_name(id: var.self); |
2476 | } |
2477 | |
2478 | // Make sure we get something unique for both global name scope and block name scope. |
2479 | // See GLSL 4.5 spec: section 4.3.9 for details. |
2480 | add_variable(variables_primary&: block_namespace, variables_secondary: resource_names, name&: buffer_name); |
2481 | |
2482 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2483 | // This cannot conflict with anything else, so we're safe now. |
2484 | // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. |
2485 | if (buffer_name.empty()) |
2486 | buffer_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self); |
2487 | |
2488 | block_names.insert(x: buffer_name); |
2489 | block_namespace.insert(x: buffer_name); |
2490 | |
2491 | // Save for post-reflection later. |
2492 | declared_block_names[var.self] = buffer_name; |
2493 | |
2494 | statement(ts: layout_for_variable(var), ts: is_coherent ? "coherent ": "", ts: is_restrict ? "restrict ": "", |
2495 | ts: is_writeonly ? "writeonly ": "", ts: is_readonly ? "readonly ": "", ts: ssbo ? "buffer ": "uniform ", |
2496 | ts&: buffer_name); |
2497 | |
2498 | begin_scope(); |
2499 | |
2500 | type.member_name_cache.clear(); |
2501 | |
2502 | uint32_t i = 0; |
2503 | for (auto &member : type.member_types) |
2504 | { |
2505 | add_member_name(type, name: i); |
2506 | emit_struct_member(type, member_type_id: member, index: i); |
2507 | i++; |
2508 | } |
2509 | |
2510 | // Don't declare empty blocks in GLSL, this is not allowed. |
2511 | if (type_is_empty(type) && !backend.supports_empty_struct) |
2512 | statement(ts: "int empty_struct_member;"); |
2513 | |
2514 | // var.self can be used as a backup name for the block name, |
2515 | // so we need to make sure we don't disturb the name here on a recompile. |
2516 | // It will need to be reset if we have to recompile. |
2517 | preserve_alias_on_reset(id: var.self); |
2518 | add_resource_name(id: var.self); |
2519 | end_scope_decl(decl: to_name(id: var.self) + type_to_array_glsl(type, variable_id: var.self)); |
2520 | statement(ts: ""); |
2521 | } |
2522 | |
2523 | void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) |
2524 | { |
2525 | auto &type = get<SPIRType>(id: var.basetype); |
2526 | |
2527 | // Block names should never alias. |
2528 | auto buffer_name = to_name(id: type.self, allow_alias: false); |
2529 | size_t buffer_size = (get_declared_struct_size(struct_type: type) + 15) / 16; |
2530 | |
2531 | SPIRType::BaseType basic_type; |
2532 | if (get_common_basic_type(type, base_type&: basic_type)) |
2533 | { |
2534 | SPIRType tmp { OpTypeVector }; |
2535 | tmp.basetype = basic_type; |
2536 | tmp.vecsize = 4; |
2537 | if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) |
2538 | SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); |
2539 | |
2540 | auto flags = ir.get_buffer_block_flags(var); |
2541 | statement(ts: "uniform ", ts: flags_to_qualifiers_glsl(type: tmp, flags), ts: type_to_glsl(type: tmp), ts: " ", ts&: buffer_name, ts: "[", |
2542 | ts&: buffer_size, ts: "];"); |
2543 | } |
2544 | else |
2545 | SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); |
2546 | } |
2547 | |
2548 | const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) |
2549 | { |
2550 | auto &execution = get_entry_point(); |
2551 | |
2552 | if (subpass_input_is_framebuffer_fetch(id: var.self)) |
2553 | return ""; |
2554 | |
2555 | if (var.storage == StorageClassInput || var.storage == StorageClassOutput) |
2556 | { |
2557 | if (is_legacy() && execution.model == ExecutionModelVertex) |
2558 | return var.storage == StorageClassInput ? "attribute ": "varying "; |
2559 | else if (is_legacy() && execution.model == ExecutionModelFragment) |
2560 | return "varying "; // Fragment outputs are renamed so they never hit this case. |
2561 | else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) |
2562 | { |
2563 | uint32_t loc = get_decoration(id: var.self, decoration: DecorationLocation); |
2564 | bool is_inout = location_is_framebuffer_fetch(location: loc); |
2565 | if (is_inout) |
2566 | return "inout "; |
2567 | else |
2568 | return "out "; |
2569 | } |
2570 | else |
2571 | return var.storage == StorageClassInput ? "in ": "out "; |
2572 | } |
2573 | else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || |
2574 | var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter) |
2575 | { |
2576 | return "uniform "; |
2577 | } |
2578 | else if (var.storage == StorageClassRayPayloadKHR) |
2579 | { |
2580 | return ray_tracing_is_khr ? "rayPayloadEXT ": "rayPayloadNV "; |
2581 | } |
2582 | else if (var.storage == StorageClassIncomingRayPayloadKHR) |
2583 | { |
2584 | return ray_tracing_is_khr ? "rayPayloadInEXT ": "rayPayloadInNV "; |
2585 | } |
2586 | else if (var.storage == StorageClassHitAttributeKHR) |
2587 | { |
2588 | return ray_tracing_is_khr ? "hitAttributeEXT ": "hitAttributeNV "; |
2589 | } |
2590 | else if (var.storage == StorageClassCallableDataKHR) |
2591 | { |
2592 | return ray_tracing_is_khr ? "callableDataEXT ": "callableDataNV "; |
2593 | } |
2594 | else if (var.storage == StorageClassIncomingCallableDataKHR) |
2595 | { |
2596 | return ray_tracing_is_khr ? "callableDataInEXT ": "callableDataInNV "; |
2597 | } |
2598 | |
2599 | return ""; |
2600 | } |
2601 | |
2602 | void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, |
2603 | const SmallVector<uint32_t> &indices) |
2604 | { |
2605 | uint32_t member_type_id = type.self; |
2606 | const SPIRType *member_type = &type; |
2607 | const SPIRType *parent_type = nullptr; |
2608 | auto flattened_name = basename; |
2609 | for (auto &index : indices) |
2610 | { |
2611 | flattened_name += "_"; |
2612 | flattened_name += to_member_name(type: *member_type, index); |
2613 | parent_type = member_type; |
2614 | member_type_id = member_type->member_types[index]; |
2615 | member_type = &get<SPIRType>(id: member_type_id); |
2616 | } |
2617 | |
2618 | assert(member_type->basetype != SPIRType::Struct); |
2619 | |
2620 | // We're overriding struct member names, so ensure we do so on the primary type. |
2621 | if (parent_type->type_alias) |
2622 | parent_type = &get<SPIRType>(id: parent_type->type_alias); |
2623 | |
2624 | // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, |
2625 | // which is not allowed. |
2626 | ParsedIR::sanitize_underscores(str&: flattened_name); |
2627 | |
2628 | uint32_t last_index = indices.back(); |
2629 | |
2630 | // Pass in the varying qualifier here so it will appear in the correct declaration order. |
2631 | // Replace member name while emitting it so it encodes both struct name and member name. |
2632 | auto backup_name = get_member_name(id: parent_type->self, index: last_index); |
2633 | auto member_name = to_member_name(type: *parent_type, index: last_index); |
2634 | set_member_name(id: parent_type->self, index: last_index, name: flattened_name); |
2635 | emit_struct_member(type: *parent_type, member_type_id, index: last_index, qualifier: qual); |
2636 | // Restore member name. |
2637 | set_member_name(id: parent_type->self, index: last_index, name: member_name); |
2638 | } |
2639 | |
2640 | void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, |
2641 | const SmallVector<uint32_t> &indices) |
2642 | { |
2643 | auto sub_indices = indices; |
2644 | sub_indices.push_back(t: 0); |
2645 | |
2646 | const SPIRType *member_type = &type; |
2647 | for (auto &index : indices) |
2648 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
2649 | |
2650 | assert(member_type->basetype == SPIRType::Struct); |
2651 | |
2652 | if (!member_type->array.empty()) |
2653 | SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks."); |
2654 | |
2655 | for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) |
2656 | { |
2657 | sub_indices.back() = i; |
2658 | if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct) |
2659 | emit_flattened_io_block_struct(basename, type, qual, indices: sub_indices); |
2660 | else |
2661 | emit_flattened_io_block_member(basename, type, qual, indices: sub_indices); |
2662 | } |
2663 | } |
2664 | |
2665 | void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) |
2666 | { |
2667 | auto &var_type = get<SPIRType>(id: var.basetype); |
2668 | if (!var_type.array.empty()) |
2669 | SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); |
2670 | |
2671 | // Emit flattened types based on the type alias. Normally, we are never supposed to emit |
2672 | // struct declarations for aliased types. |
2673 | auto &type = var_type.type_alias ? get<SPIRType>(id: var_type.type_alias) : var_type; |
2674 | |
2675 | auto old_flags = ir.meta[type.self].decoration.decoration_flags; |
2676 | // Emit the members as if they are part of a block to get all qualifiers. |
2677 | ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); |
2678 | |
2679 | type.member_name_cache.clear(); |
2680 | |
2681 | SmallVector<uint32_t> member_indices; |
2682 | member_indices.push_back(t: 0); |
2683 | auto basename = to_name(id: var.self); |
2684 | |
2685 | uint32_t i = 0; |
2686 | for (auto &member : type.member_types) |
2687 | { |
2688 | add_member_name(type, name: i); |
2689 | auto &membertype = get<SPIRType>(id: member); |
2690 | |
2691 | member_indices.back() = i; |
2692 | if (membertype.basetype == SPIRType::Struct) |
2693 | emit_flattened_io_block_struct(basename, type, qual, indices: member_indices); |
2694 | else |
2695 | emit_flattened_io_block_member(basename, type, qual, indices: member_indices); |
2696 | i++; |
2697 | } |
2698 | |
2699 | ir.meta[type.self].decoration.decoration_flags = old_flags; |
2700 | |
2701 | // Treat this variable as fully flattened from now on. |
2702 | flattened_structs[var.self] = true; |
2703 | } |
2704 | |
2705 | void CompilerGLSL::emit_interface_block(const SPIRVariable &var) |
2706 | { |
2707 | auto &type = get<SPIRType>(id: var.basetype); |
2708 | |
2709 | if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && |
2710 | !options.es && options.version < 410) |
2711 | { |
2712 | require_extension_internal(ext: "GL_ARB_vertex_attrib_64bit"); |
2713 | } |
2714 | |
2715 | // Either make it plain in/out or in/out blocks depending on what shader is doing ... |
2716 | bool block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock); |
2717 | const char *qual = to_storage_qualifiers_glsl(var); |
2718 | |
2719 | if (block) |
2720 | { |
2721 | // ESSL earlier than 310 and GLSL earlier than 150 did not support |
2722 | // I/O variables which are struct types. |
2723 | // To support this, flatten the struct into separate varyings instead. |
2724 | if (options.force_flattened_io_blocks || (options.es && options.version < 310) || |
2725 | (!options.es && options.version < 150)) |
2726 | { |
2727 | // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. |
2728 | // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). |
2729 | emit_flattened_io_block(var, qual); |
2730 | } |
2731 | else |
2732 | { |
2733 | if (options.es && options.version < 320) |
2734 | { |
2735 | // Geometry and tessellation extensions imply this extension. |
2736 | if (!has_extension(ext: "GL_EXT_geometry_shader") && !has_extension(ext: "GL_EXT_tessellation_shader")) |
2737 | require_extension_internal(ext: "GL_EXT_shader_io_blocks"); |
2738 | } |
2739 | |
2740 | // Workaround to make sure we can emit "patch in/out" correctly. |
2741 | fixup_io_block_patch_primitive_qualifiers(var); |
2742 | |
2743 | // Block names should never alias. |
2744 | auto block_name = to_name(id: type.self, allow_alias: false); |
2745 | |
2746 | // The namespace for I/O blocks is separate from other variables in GLSL. |
2747 | auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; |
2748 | |
2749 | // Shaders never use the block by interface name, so we don't |
2750 | // have to track this other than updating name caches. |
2751 | if (block_name.empty() || block_namespace.find(x: block_name) != end(cont&: block_namespace)) |
2752 | block_name = get_fallback_name(id: type.self); |
2753 | else |
2754 | block_namespace.insert(x: block_name); |
2755 | |
2756 | // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
2757 | // This cannot conflict with anything else, so we're safe now. |
2758 | if (block_name.empty()) |
2759 | block_name = join(ts: "_", ts&: get<SPIRType>(id: var.basetype).self, ts: "_", ts: var.self); |
2760 | |
2761 | // Instance names cannot alias block names. |
2762 | resource_names.insert(x: block_name); |
2763 | |
2764 | const char *block_qualifier; |
2765 | if (has_decoration(id: var.self, decoration: DecorationPatch)) |
2766 | block_qualifier = "patch "; |
2767 | else if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT)) |
2768 | block_qualifier = "perprimitiveEXT "; |
2769 | else |
2770 | block_qualifier = ""; |
2771 | |
2772 | statement(ts: layout_for_variable(var), ts&: block_qualifier, ts&: qual, ts&: block_name); |
2773 | begin_scope(); |
2774 | |
2775 | type.member_name_cache.clear(); |
2776 | |
2777 | uint32_t i = 0; |
2778 | for (auto &member : type.member_types) |
2779 | { |
2780 | add_member_name(type, name: i); |
2781 | emit_struct_member(type, member_type_id: member, index: i); |
2782 | i++; |
2783 | } |
2784 | |
2785 | add_resource_name(id: var.self); |
2786 | end_scope_decl(decl: join(ts: to_name(id: var.self), ts: type_to_array_glsl(type, variable_id: var.self))); |
2787 | statement(ts: ""); |
2788 | } |
2789 | } |
2790 | else |
2791 | { |
2792 | // ESSL earlier than 310 and GLSL earlier than 150 did not support |
2793 | // I/O variables which are struct types. |
2794 | // To support this, flatten the struct into separate varyings instead. |
2795 | if (type.basetype == SPIRType::Struct && |
2796 | (options.force_flattened_io_blocks || (options.es && options.version < 310) || |
2797 | (!options.es && options.version < 150))) |
2798 | { |
2799 | emit_flattened_io_block(var, qual); |
2800 | } |
2801 | else |
2802 | { |
2803 | add_resource_name(id: var.self); |
2804 | |
2805 | // Legacy GLSL did not support int attributes, we automatically |
2806 | // declare them as float and cast them on load/store |
2807 | SPIRType newtype = type; |
2808 | if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int) |
2809 | newtype.basetype = SPIRType::Float; |
2810 | |
2811 | // Tessellation control and evaluation shaders must have either |
2812 | // gl_MaxPatchVertices or unsized arrays for input arrays. |
2813 | // Opt for unsized as it's the more "correct" variant to use. |
2814 | if (type.storage == StorageClassInput && !type.array.empty() && |
2815 | !has_decoration(id: var.self, decoration: DecorationPatch) && |
2816 | (get_entry_point().model == ExecutionModelTessellationControl || |
2817 | get_entry_point().model == ExecutionModelTessellationEvaluation)) |
2818 | { |
2819 | newtype.array.back() = 0; |
2820 | newtype.array_size_literal.back() = true; |
2821 | } |
2822 | |
2823 | statement(ts: layout_for_variable(var), ts: to_qualifiers_glsl(id: var.self), |
2824 | ts: variable_decl(type: newtype, name: to_name(id: var.self), id: var.self), ts: ";"); |
2825 | } |
2826 | } |
2827 | } |
2828 | |
2829 | void CompilerGLSL::emit_uniform(const SPIRVariable &var) |
2830 | { |
2831 | auto &type = get<SPIRType>(id: var.basetype); |
2832 | if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) |
2833 | { |
2834 | if (!options.es && options.version < 420) |
2835 | require_extension_internal(ext: "GL_ARB_shader_image_load_store"); |
2836 | else if (options.es && options.version < 310) |
2837 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); |
2838 | } |
2839 | |
2840 | add_resource_name(id: var.self); |
2841 | statement(ts: layout_for_variable(var), ts: variable_decl(variable: var), ts: ";"); |
2842 | } |
2843 | |
2844 | string CompilerGLSL::constant_value_macro_name(uint32_t id) |
2845 | { |
2846 | return join(ts: "SPIRV_CROSS_CONSTANT_ID_", ts&: id); |
2847 | } |
2848 | |
2849 | void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) |
2850 | { |
2851 | auto &type = get<SPIRType>(id: constant.basetype); |
2852 | // This will break. It is bogus and should not be legal. |
2853 | if (type_is_top_level_block(type)) |
2854 | return; |
2855 | add_resource_name(id: constant.self); |
2856 | auto name = to_name(id: constant.self); |
2857 | statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_op_expression(cop: constant), ts: ";"); |
2858 | } |
2859 | |
2860 | int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const |
2861 | { |
2862 | auto &entry_point = get_entry_point(); |
2863 | int index = -1; |
2864 | |
2865 | // Need to redirect specialization constants which are used as WorkGroupSize to the builtin, |
2866 | // since the spec constant declarations are never explicitly declared. |
2867 | if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(bit: ExecutionModeLocalSizeId)) |
2868 | { |
2869 | if (c.self == entry_point.workgroup_size.id_x) |
2870 | index = 0; |
2871 | else if (c.self == entry_point.workgroup_size.id_y) |
2872 | index = 1; |
2873 | else if (c.self == entry_point.workgroup_size.id_z) |
2874 | index = 2; |
2875 | } |
2876 | |
2877 | return index; |
2878 | } |
2879 | |
2880 | void CompilerGLSL::emit_constant(const SPIRConstant &constant) |
2881 | { |
2882 | auto &type = get<SPIRType>(id: constant.constant_type); |
2883 | |
2884 | // This will break. It is bogus and should not be legal. |
2885 | if (type_is_top_level_block(type)) |
2886 | return; |
2887 | |
2888 | SpecializationConstant wg_x, wg_y, wg_z; |
2889 | ID workgroup_size_id = get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
2890 | |
2891 | // This specialization constant is implicitly declared by emitting layout() in; |
2892 | if (constant.self == workgroup_size_id) |
2893 | return; |
2894 | |
2895 | // These specialization constants are implicitly declared by emitting layout() in; |
2896 | // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration |
2897 | // later can use macro overrides for work group size. |
2898 | bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || |
2899 | ConstantID(constant.self) == wg_z.id; |
2900 | |
2901 | if (options.vulkan_semantics && is_workgroup_size_constant) |
2902 | { |
2903 | // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). |
2904 | return; |
2905 | } |
2906 | else if (!options.vulkan_semantics && is_workgroup_size_constant && |
2907 | !has_decoration(id: constant.self, decoration: DecorationSpecId)) |
2908 | { |
2909 | // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. |
2910 | return; |
2911 | } |
2912 | |
2913 | add_resource_name(id: constant.self); |
2914 | auto name = to_name(id: constant.self); |
2915 | |
2916 | // Only scalars have constant IDs. |
2917 | if (has_decoration(id: constant.self, decoration: DecorationSpecId)) |
2918 | { |
2919 | if (options.vulkan_semantics) |
2920 | { |
2921 | statement(ts: "layout(constant_id = ", ts: get_decoration(id: constant.self, decoration: DecorationSpecId), ts: ") const ", |
2922 | ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";"); |
2923 | } |
2924 | else |
2925 | { |
2926 | const string ¯o_name = constant.specialization_constant_macro_name; |
2927 | statement(ts: "#ifndef ", ts: macro_name); |
2928 | statement(ts: "#define ", ts: macro_name, ts: " ", ts: constant_expression(c: constant)); |
2929 | statement(ts: "#endif"); |
2930 | |
2931 | // For workgroup size constants, only emit the macros. |
2932 | if (!is_workgroup_size_constant) |
2933 | statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: macro_name, ts: ";"); |
2934 | } |
2935 | } |
2936 | else |
2937 | { |
2938 | statement(ts: "const ", ts: variable_decl(type, name), ts: " = ", ts: constant_expression(c: constant), ts: ";"); |
2939 | } |
2940 | } |
2941 | |
2942 | void CompilerGLSL::emit_entry_point_declarations() |
2943 | { |
2944 | } |
2945 | |
2946 | void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords) |
2947 | { |
2948 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
2949 | if (is_hidden_variable(var)) |
2950 | return; |
2951 | |
2952 | auto *meta = ir.find_meta(id: var.self); |
2953 | if (!meta) |
2954 | return; |
2955 | |
2956 | auto &m = meta->decoration; |
2957 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2958 | m.alias = join(ts: "_", ts&: m.alias); |
2959 | }); |
2960 | |
2961 | ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, const SPIRFunction &func) { |
2962 | auto *meta = ir.find_meta(id: func.self); |
2963 | if (!meta) |
2964 | return; |
2965 | |
2966 | auto &m = meta->decoration; |
2967 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2968 | m.alias = join(ts: "_", ts&: m.alias); |
2969 | }); |
2970 | |
2971 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, const SPIRType &type) { |
2972 | auto *meta = ir.find_meta(id: type.self); |
2973 | if (!meta) |
2974 | return; |
2975 | |
2976 | auto &m = meta->decoration; |
2977 | if (keywords.find(x: m.alias) != end(cont: keywords)) |
2978 | m.alias = join(ts: "_", ts&: m.alias); |
2979 | |
2980 | for (auto &memb : meta->members) |
2981 | if (keywords.find(x: memb.alias) != end(cont: keywords)) |
2982 | memb.alias = join(ts: "_", ts&: memb.alias); |
2983 | }); |
2984 | } |
2985 | |
2986 | void CompilerGLSL::replace_illegal_names() |
2987 | { |
2988 | // clang-format off |
2989 | static const unordered_set<string> keywords = { |
2990 | "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", |
2991 | "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", |
2992 | "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", |
2993 | "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", |
2994 | "ceil", "cos", "cosh", "cross", "degrees", |
2995 | "dFdx", "dFdxCoarse", "dFdxFine", |
2996 | "dFdy", "dFdyCoarse", "dFdyFine", |
2997 | "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", |
2998 | "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", |
2999 | "frexp", "fwidth", "fwidthCoarse", "fwidthFine", |
3000 | "greaterThan", "greaterThanEqual", "groupMemoryBarrier", |
3001 | "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", |
3002 | "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", |
3003 | "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", |
3004 | "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", |
3005 | "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", |
3006 | "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", |
3007 | "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", |
3008 | "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", |
3009 | "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", |
3010 | "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", |
3011 | "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", |
3012 | "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", |
3013 | "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", |
3014 | |
3015 | "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", |
3016 | "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", |
3017 | "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", |
3018 | "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", |
3019 | "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", |
3020 | "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", |
3021 | "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", |
3022 | "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", |
3023 | "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", |
3024 | "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", |
3025 | "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", |
3026 | "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", |
3027 | "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", |
3028 | "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", |
3029 | "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", |
3030 | "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", |
3031 | "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", |
3032 | "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", |
3033 | "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", |
3034 | "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", |
3035 | "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", |
3036 | "while", "writeonly", |
3037 | }; |
3038 | // clang-format on |
3039 | |
3040 | replace_illegal_names(keywords); |
3041 | } |
3042 | |
3043 | void CompilerGLSL::replace_fragment_output(SPIRVariable &var) |
3044 | { |
3045 | auto &m = ir.meta[var.self].decoration; |
3046 | uint32_t location = 0; |
3047 | if (m.decoration_flags.get(bit: DecorationLocation)) |
3048 | location = m.location; |
3049 | |
3050 | // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will |
3051 | // do the access chain part of this for us. |
3052 | auto &type = get<SPIRType>(id: var.basetype); |
3053 | |
3054 | if (type.array.empty()) |
3055 | { |
3056 | // Redirect the write to a specific render target in legacy GLSL. |
3057 | m.alias = join(ts: "gl_FragData[", ts&: location, ts: "]"); |
3058 | |
3059 | if (is_legacy_es() && location != 0) |
3060 | require_extension_internal(ext: "GL_EXT_draw_buffers"); |
3061 | } |
3062 | else if (type.array.size() == 1) |
3063 | { |
3064 | // If location is non-zero, we probably have to add an offset. |
3065 | // This gets really tricky since we'd have to inject an offset in the access chain. |
3066 | // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. |
3067 | m.alias = "gl_FragData"; |
3068 | if (location != 0) |
3069 | SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " |
3070 | "This is unimplemented in SPIRV-Cross."); |
3071 | |
3072 | if (is_legacy_es()) |
3073 | require_extension_internal(ext: "GL_EXT_draw_buffers"); |
3074 | } |
3075 | else |
3076 | SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); |
3077 | |
3078 | var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. |
3079 | } |
3080 | |
3081 | void CompilerGLSL::replace_fragment_outputs() |
3082 | { |
3083 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3084 | auto &type = this->get<SPIRType>(id: var.basetype); |
3085 | |
3086 | if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) |
3087 | replace_fragment_output(var); |
3088 | }); |
3089 | } |
3090 | |
3091 | string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) |
3092 | { |
3093 | if (out_type.vecsize == input_components) |
3094 | return expr; |
3095 | else if (input_components == 1 && !backend.can_swizzle_scalar) |
3096 | return join(ts: type_to_glsl(type: out_type), ts: "(", ts: expr, ts: ")"); |
3097 | else |
3098 | { |
3099 | // FIXME: This will not work with packed expressions. |
3100 | auto e = enclose_expression(expr) + "."; |
3101 | // Just clamp the swizzle index if we have more outputs than inputs. |
3102 | for (uint32_t c = 0; c < out_type.vecsize; c++) |
3103 | e += index_to_swizzle(index: min(a: c, b: input_components - 1)); |
3104 | if (backend.swizzle_is_function && out_type.vecsize > 1) |
3105 | e += "()"; |
3106 | |
3107 | remove_duplicate_swizzle(op&: e); |
3108 | return e; |
3109 | } |
3110 | } |
3111 | |
3112 | void CompilerGLSL::emit_pls() |
3113 | { |
3114 | auto &execution = get_entry_point(); |
3115 | if (execution.model != ExecutionModelFragment) |
3116 | SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); |
3117 | |
3118 | if (!options.es) |
3119 | SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); |
3120 | |
3121 | if (options.version < 300) |
3122 | SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); |
3123 | |
3124 | if (!pls_inputs.empty()) |
3125 | { |
3126 | statement(ts: "__pixel_local_inEXT _PLSIn"); |
3127 | begin_scope(); |
3128 | for (auto &input : pls_inputs) |
3129 | statement(ts: pls_decl(variable: input), ts: ";"); |
3130 | end_scope_decl(); |
3131 | statement(ts: ""); |
3132 | } |
3133 | |
3134 | if (!pls_outputs.empty()) |
3135 | { |
3136 | statement(ts: "__pixel_local_outEXT _PLSOut"); |
3137 | begin_scope(); |
3138 | for (auto &output : pls_outputs) |
3139 | statement(ts: pls_decl(variable: output), ts: ";"); |
3140 | end_scope_decl(); |
3141 | statement(ts: ""); |
3142 | } |
3143 | } |
3144 | |
3145 | void CompilerGLSL::fixup_image_load_store_access() |
3146 | { |
3147 | if (!options.enable_storage_image_qualifier_deduction) |
3148 | return; |
3149 | |
3150 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t var, const SPIRVariable &) { |
3151 | auto &vartype = expression_type(id: var); |
3152 | if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) |
3153 | { |
3154 | // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. |
3155 | // Solve this by making the image access as restricted as possible and loosen up if we need to. |
3156 | // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. |
3157 | |
3158 | if (!has_decoration(id: var, decoration: DecorationNonWritable) && !has_decoration(id: var, decoration: DecorationNonReadable)) |
3159 | { |
3160 | set_decoration(id: var, decoration: DecorationNonWritable); |
3161 | set_decoration(id: var, decoration: DecorationNonReadable); |
3162 | } |
3163 | } |
3164 | }); |
3165 | } |
3166 | |
3167 | static bool is_block_builtin(BuiltIn builtin) |
3168 | { |
3169 | return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || |
3170 | builtin == BuiltInCullDistance; |
3171 | } |
3172 | |
3173 | bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) |
3174 | { |
3175 | // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. |
3176 | |
3177 | if (storage != StorageClassOutput) |
3178 | return false; |
3179 | bool should_force = false; |
3180 | |
3181 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3182 | if (should_force) |
3183 | return; |
3184 | |
3185 | auto &type = this->get<SPIRType>(id: var.basetype); |
3186 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3187 | if (var.storage == storage && block && is_builtin_variable(var)) |
3188 | { |
3189 | uint32_t member_count = uint32_t(type.member_types.size()); |
3190 | for (uint32_t i = 0; i < member_count; i++) |
3191 | { |
3192 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) && |
3193 | is_block_builtin(builtin: BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))) && |
3194 | has_member_decoration(id: type.self, index: i, decoration: DecorationOffset)) |
3195 | { |
3196 | should_force = true; |
3197 | } |
3198 | } |
3199 | } |
3200 | else if (var.storage == storage && !block && is_builtin_variable(var)) |
3201 | { |
3202 | if (is_block_builtin(builtin: BuiltIn(get_decoration(id: type.self, decoration: DecorationBuiltIn))) && |
3203 | has_decoration(id: var.self, decoration: DecorationOffset)) |
3204 | { |
3205 | should_force = true; |
3206 | } |
3207 | } |
3208 | }); |
3209 | |
3210 | // If we're declaring clip/cull planes with control points we need to force block declaration. |
3211 | if ((get_execution_model() == ExecutionModelTessellationControl || |
3212 | get_execution_model() == ExecutionModelMeshEXT) && |
3213 | (clip_distance_count || cull_distance_count)) |
3214 | { |
3215 | should_force = true; |
3216 | } |
3217 | |
3218 | // Either glslang bug or oversight, but global invariant position does not work in mesh shaders. |
3219 | if (get_execution_model() == ExecutionModelMeshEXT && position_invariant) |
3220 | should_force = true; |
3221 | |
3222 | return should_force; |
3223 | } |
3224 | |
3225 | void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) |
3226 | { |
3227 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3228 | auto &type = this->get<SPIRType>(id: var.basetype); |
3229 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3230 | if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && |
3231 | is_builtin_variable(var)) |
3232 | { |
3233 | if (model != ExecutionModelMeshEXT) |
3234 | { |
3235 | // Make sure the array has a supported name in the code. |
3236 | if (var.storage == StorageClassOutput) |
3237 | set_name(id: var.self, name: "gl_out"); |
3238 | else if (var.storage == StorageClassInput) |
3239 | set_name(id: var.self, name: "gl_in"); |
3240 | } |
3241 | else |
3242 | { |
3243 | auto flags = get_buffer_block_flags(id: var.self); |
3244 | if (flags.get(bit: DecorationPerPrimitiveEXT)) |
3245 | { |
3246 | set_name(id: var.self, name: "gl_MeshPrimitivesEXT"); |
3247 | set_name(id: type.self, name: "gl_MeshPerPrimitiveEXT"); |
3248 | } |
3249 | else |
3250 | { |
3251 | set_name(id: var.self, name: "gl_MeshVerticesEXT"); |
3252 | set_name(id: type.self, name: "gl_MeshPerVertexEXT"); |
3253 | } |
3254 | } |
3255 | } |
3256 | |
3257 | if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) |
3258 | { |
3259 | auto *m = ir.find_meta(id: var.self); |
3260 | if (m && m->decoration.builtin) |
3261 | { |
3262 | auto builtin_type = m->decoration.builtin_type; |
3263 | if (builtin_type == BuiltInPrimitivePointIndicesEXT) |
3264 | set_name(id: var.self, name: "gl_PrimitivePointIndicesEXT"); |
3265 | else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) |
3266 | set_name(id: var.self, name: "gl_PrimitiveLineIndicesEXT"); |
3267 | else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) |
3268 | set_name(id: var.self, name: "gl_PrimitiveTriangleIndicesEXT"); |
3269 | } |
3270 | } |
3271 | }); |
3272 | } |
3273 | |
3274 | void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) |
3275 | { |
3276 | Bitset emitted_builtins; |
3277 | Bitset global_builtins; |
3278 | const SPIRVariable *block_var = nullptr; |
3279 | bool emitted_block = false; |
3280 | |
3281 | // Need to use declared size in the type. |
3282 | // These variables might have been declared, but not statically used, so we haven't deduced their size yet. |
3283 | uint32_t cull_distance_size = 0; |
3284 | uint32_t clip_distance_size = 0; |
3285 | |
3286 | bool have_xfb_buffer_stride = false; |
3287 | bool have_geom_stream = false; |
3288 | bool have_any_xfb_offset = false; |
3289 | uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; |
3290 | std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets; |
3291 | |
3292 | const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { |
3293 | return builtin == BuiltInPosition || builtin == BuiltInPointSize || |
3294 | builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; |
3295 | }; |
3296 | |
3297 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3298 | auto &type = this->get<SPIRType>(id: var.basetype); |
3299 | bool block = has_decoration(id: type.self, decoration: DecorationBlock); |
3300 | Bitset builtins; |
3301 | |
3302 | if (var.storage == storage && block && is_builtin_variable(var)) |
3303 | { |
3304 | uint32_t index = 0; |
3305 | for (auto &m : ir.meta[type.self].members) |
3306 | { |
3307 | if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) |
3308 | { |
3309 | builtins.set(m.builtin_type); |
3310 | if (m.builtin_type == BuiltInCullDistance) |
3311 | cull_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index])); |
3312 | else if (m.builtin_type == BuiltInClipDistance) |
3313 | clip_distance_size = to_array_size_literal(type: this->get<SPIRType>(id: type.member_types[index])); |
3314 | |
3315 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationOffset)) |
3316 | { |
3317 | have_any_xfb_offset = true; |
3318 | builtin_xfb_offsets[m.builtin_type] = m.offset; |
3319 | } |
3320 | |
3321 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream)) |
3322 | { |
3323 | uint32_t stream = m.stream; |
3324 | if (have_geom_stream && geom_stream != stream) |
3325 | SPIRV_CROSS_THROW("IO block member Stream mismatch."); |
3326 | have_geom_stream = true; |
3327 | geom_stream = stream; |
3328 | } |
3329 | } |
3330 | index++; |
3331 | } |
3332 | |
3333 | if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationXfbBuffer) && |
3334 | has_decoration(id: var.self, decoration: DecorationXfbStride)) |
3335 | { |
3336 | uint32_t buffer_index = get_decoration(id: var.self, decoration: DecorationXfbBuffer); |
3337 | uint32_t stride = get_decoration(id: var.self, decoration: DecorationXfbStride); |
3338 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
3339 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); |
3340 | if (have_xfb_buffer_stride && stride != xfb_stride) |
3341 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); |
3342 | have_xfb_buffer_stride = true; |
3343 | xfb_buffer = buffer_index; |
3344 | xfb_stride = stride; |
3345 | } |
3346 | |
3347 | if (storage == StorageClassOutput && has_decoration(id: var.self, decoration: DecorationStream)) |
3348 | { |
3349 | uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream); |
3350 | if (have_geom_stream && geom_stream != stream) |
3351 | SPIRV_CROSS_THROW("IO block member Stream mismatch."); |
3352 | have_geom_stream = true; |
3353 | geom_stream = stream; |
3354 | } |
3355 | } |
3356 | else if (var.storage == storage && !block && is_builtin_variable(var)) |
3357 | { |
3358 | // While we're at it, collect all declared global builtins (HLSL mostly ...). |
3359 | auto &m = ir.meta[var.self].decoration; |
3360 | if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) |
3361 | { |
3362 | // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type |
3363 | // for correct result. |
3364 | global_builtins.set(m.builtin_type); |
3365 | if (m.builtin_type == BuiltInCullDistance) |
3366 | cull_distance_size = to_array_size_literal(type, index: 0); |
3367 | else if (m.builtin_type == BuiltInClipDistance) |
3368 | clip_distance_size = to_array_size_literal(type, index: 0); |
3369 | |
3370 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationXfbStride) && |
3371 | m.decoration_flags.get(bit: DecorationXfbBuffer) && m.decoration_flags.get(bit: DecorationOffset)) |
3372 | { |
3373 | have_any_xfb_offset = true; |
3374 | builtin_xfb_offsets[m.builtin_type] = m.offset; |
3375 | uint32_t buffer_index = m.xfb_buffer; |
3376 | uint32_t stride = m.xfb_stride; |
3377 | if (have_xfb_buffer_stride && buffer_index != xfb_buffer) |
3378 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); |
3379 | if (have_xfb_buffer_stride && stride != xfb_stride) |
3380 | SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); |
3381 | have_xfb_buffer_stride = true; |
3382 | xfb_buffer = buffer_index; |
3383 | xfb_stride = stride; |
3384 | } |
3385 | |
3386 | if (is_block_builtin(builtin: m.builtin_type) && m.decoration_flags.get(bit: DecorationStream)) |
3387 | { |
3388 | uint32_t stream = get_decoration(id: var.self, decoration: DecorationStream); |
3389 | if (have_geom_stream && geom_stream != stream) |
3390 | SPIRV_CROSS_THROW("IO block member Stream mismatch."); |
3391 | have_geom_stream = true; |
3392 | geom_stream = stream; |
3393 | } |
3394 | } |
3395 | } |
3396 | |
3397 | if (builtins.empty()) |
3398 | return; |
3399 | |
3400 | if (emitted_block) |
3401 | SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); |
3402 | |
3403 | emitted_builtins = builtins; |
3404 | emitted_block = true; |
3405 | block_var = &var; |
3406 | }); |
3407 | |
3408 | global_builtins = |
3409 | Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | |
3410 | (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); |
3411 | |
3412 | // Try to collect all other declared builtins. |
3413 | if (!emitted_block) |
3414 | emitted_builtins = global_builtins; |
3415 | |
3416 | // Can't declare an empty interface block. |
3417 | if (emitted_builtins.empty()) |
3418 | return; |
3419 | |
3420 | if (storage == StorageClassOutput) |
3421 | { |
3422 | SmallVector<string> attr; |
3423 | if (have_xfb_buffer_stride && have_any_xfb_offset) |
3424 | { |
3425 | if (!options.es) |
3426 | { |
3427 | if (options.version < 440 && options.version >= 140) |
3428 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
3429 | else if (options.version < 140) |
3430 | SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); |
3431 | if (!options.es && options.version < 440) |
3432 | require_extension_internal(ext: "GL_ARB_enhanced_layouts"); |
3433 | } |
3434 | else if (options.es) |
3435 | SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); |
3436 | attr.push_back(t: join(ts: "xfb_buffer = ", ts&: xfb_buffer, ts: ", xfb_stride = ", ts&: xfb_stride)); |
3437 | } |
3438 | |
3439 | if (have_geom_stream) |
3440 | { |
3441 | if (get_execution_model() != ExecutionModelGeometry) |
3442 | SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); |
3443 | if (options.es) |
3444 | SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); |
3445 | if (options.version < 400) |
3446 | require_extension_internal(ext: "GL_ARB_transform_feedback3"); |
3447 | attr.push_back(t: join(ts: "stream = ", ts&: geom_stream)); |
3448 | } |
3449 | |
3450 | if (model == ExecutionModelMeshEXT) |
3451 | statement(ts: "out gl_MeshPerVertexEXT"); |
3452 | else if (!attr.empty()) |
3453 | statement(ts: "layout(", ts: merge(list: attr), ts: ") out gl_PerVertex"); |
3454 | else |
3455 | statement(ts: "out gl_PerVertex"); |
3456 | } |
3457 | else |
3458 | { |
3459 | // If we have passthrough, there is no way PerVertex cannot be passthrough. |
3460 | if (get_entry_point().geometry_passthrough) |
3461 | statement(ts: "layout(passthrough) in gl_PerVertex"); |
3462 | else |
3463 | statement(ts: "in gl_PerVertex"); |
3464 | } |
3465 | |
3466 | begin_scope(); |
3467 | if (emitted_builtins.get(bit: BuiltInPosition)) |
3468 | { |
3469 | auto itr = builtin_xfb_offsets.find(x: BuiltInPosition); |
3470 | if (itr != end(cont&: builtin_xfb_offsets)) |
3471 | statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") vec4 gl_Position;"); |
3472 | else if (position_invariant) |
3473 | statement(ts: "invariant vec4 gl_Position;"); |
3474 | else |
3475 | statement(ts: "vec4 gl_Position;"); |
3476 | } |
3477 | |
3478 | if (emitted_builtins.get(bit: BuiltInPointSize)) |
3479 | { |
3480 | auto itr = builtin_xfb_offsets.find(x: BuiltInPointSize); |
3481 | if (itr != end(cont&: builtin_xfb_offsets)) |
3482 | statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_PointSize;"); |
3483 | else |
3484 | statement(ts: "float gl_PointSize;"); |
3485 | } |
3486 | |
3487 | if (emitted_builtins.get(bit: BuiltInClipDistance)) |
3488 | { |
3489 | auto itr = builtin_xfb_offsets.find(x: BuiltInClipDistance); |
3490 | if (itr != end(cont&: builtin_xfb_offsets)) |
3491 | statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_ClipDistance[", ts&: clip_distance_size, ts: "];"); |
3492 | else |
3493 | statement(ts: "float gl_ClipDistance[", ts&: clip_distance_size, ts: "];"); |
3494 | } |
3495 | |
3496 | if (emitted_builtins.get(bit: BuiltInCullDistance)) |
3497 | { |
3498 | auto itr = builtin_xfb_offsets.find(x: BuiltInCullDistance); |
3499 | if (itr != end(cont&: builtin_xfb_offsets)) |
3500 | statement(ts: "layout(xfb_offset = ", ts&: itr->second, ts: ") float gl_CullDistance[", ts&: cull_distance_size, ts: "];"); |
3501 | else |
3502 | statement(ts: "float gl_CullDistance[", ts&: cull_distance_size, ts: "];"); |
3503 | } |
3504 | |
3505 | bool builtin_array = model == ExecutionModelTessellationControl || |
3506 | (model == ExecutionModelMeshEXT && storage == StorageClassOutput) || |
3507 | (model == ExecutionModelGeometry && storage == StorageClassInput) || |
3508 | (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput); |
3509 | |
3510 | if (builtin_array) |
3511 | { |
3512 | const char *instance_name; |
3513 | if (model == ExecutionModelMeshEXT) |
3514 | instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized. |
3515 | else |
3516 | instance_name = storage == StorageClassInput ? "gl_in": "gl_out"; |
3517 | |
3518 | if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) |
3519 | end_scope_decl(decl: join(ts&: instance_name, ts: "[", ts&: get_entry_point().output_vertices, ts: "]")); |
3520 | else |
3521 | end_scope_decl(decl: join(ts&: instance_name, ts: "[]")); |
3522 | } |
3523 | else |
3524 | end_scope_decl(); |
3525 | statement(ts: ""); |
3526 | } |
3527 | |
3528 | bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const |
3529 | { |
3530 | bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; |
3531 | |
3532 | if (statically_assigned) |
3533 | { |
3534 | auto *constant = maybe_get<SPIRConstant>(id: var.static_expression); |
3535 | if (constant && constant->is_used_as_lut) |
3536 | return true; |
3537 | } |
3538 | |
3539 | return false; |
3540 | } |
3541 | |
3542 | void CompilerGLSL::emit_resources() |
3543 | { |
3544 | auto &execution = get_entry_point(); |
3545 | |
3546 | replace_illegal_names(); |
3547 | |
3548 | // Legacy GL uses gl_FragData[], redeclare all fragment outputs |
3549 | // with builtins. |
3550 | if (execution.model == ExecutionModelFragment && is_legacy()) |
3551 | replace_fragment_outputs(); |
3552 | |
3553 | // Emit PLS blocks if we have such variables. |
3554 | if (!pls_inputs.empty() || !pls_outputs.empty()) |
3555 | emit_pls(); |
3556 | |
3557 | switch (execution.model) |
3558 | { |
3559 | case ExecutionModelGeometry: |
3560 | case ExecutionModelTessellationControl: |
3561 | case ExecutionModelTessellationEvaluation: |
3562 | case ExecutionModelMeshEXT: |
3563 | fixup_implicit_builtin_block_names(model: execution.model); |
3564 | break; |
3565 | |
3566 | default: |
3567 | break; |
3568 | } |
3569 | |
3570 | bool global_invariant_position = position_invariant && (options.es || options.version >= 120); |
3571 | |
3572 | // Emit custom gl_PerVertex for SSO compatibility. |
3573 | if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) |
3574 | { |
3575 | switch (execution.model) |
3576 | { |
3577 | case ExecutionModelGeometry: |
3578 | case ExecutionModelTessellationControl: |
3579 | case ExecutionModelTessellationEvaluation: |
3580 | emit_declared_builtin_block(storage: StorageClassInput, model: execution.model); |
3581 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3582 | global_invariant_position = false; |
3583 | break; |
3584 | |
3585 | case ExecutionModelVertex: |
3586 | case ExecutionModelMeshEXT: |
3587 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3588 | global_invariant_position = false; |
3589 | break; |
3590 | |
3591 | default: |
3592 | break; |
3593 | } |
3594 | } |
3595 | else if (should_force_emit_builtin_block(storage: StorageClassOutput)) |
3596 | { |
3597 | emit_declared_builtin_block(storage: StorageClassOutput, model: execution.model); |
3598 | global_invariant_position = false; |
3599 | } |
3600 | else if (execution.geometry_passthrough) |
3601 | { |
3602 | // Need to declare gl_in with Passthrough. |
3603 | // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. |
3604 | emit_declared_builtin_block(storage: StorageClassInput, model: execution.model); |
3605 | } |
3606 | else |
3607 | { |
3608 | // Need to redeclare clip/cull distance with explicit size to use them. |
3609 | // SPIR-V mandates these builtins have a size declared. |
3610 | const char *storage = execution.model == ExecutionModelFragment ? "in": "out"; |
3611 | if (clip_distance_count != 0) |
3612 | statement(ts&: storage, ts: " float gl_ClipDistance[", ts&: clip_distance_count, ts: "];"); |
3613 | if (cull_distance_count != 0) |
3614 | statement(ts&: storage, ts: " float gl_CullDistance[", ts&: cull_distance_count, ts: "];"); |
3615 | if (clip_distance_count != 0 || cull_distance_count != 0) |
3616 | statement(ts: ""); |
3617 | } |
3618 | |
3619 | if (global_invariant_position) |
3620 | { |
3621 | statement(ts: "invariant gl_Position;"); |
3622 | statement(ts: ""); |
3623 | } |
3624 | |
3625 | bool emitted = false; |
3626 | |
3627 | // If emitted Vulkan GLSL, |
3628 | // emit specialization constants as actual floats, |
3629 | // spec op expressions will redirect to the constant name. |
3630 | // |
3631 | { |
3632 | auto loop_lock = ir.create_loop_hard_lock(); |
3633 | for (auto &id_ : ir.ids_for_constant_undef_or_type) |
3634 | { |
3635 | auto &id = ir.ids[id_]; |
3636 | |
3637 | // Skip declaring any bogus constants or undefs which use block types. |
3638 | // We don't declare block types directly, so this will never work. |
3639 | // Should not be legal SPIR-V, so this is considered a workaround. |
3640 | |
3641 | if (id.get_type() == TypeConstant) |
3642 | { |
3643 | auto &c = id.get<SPIRConstant>(); |
3644 | |
3645 | bool needs_declaration = c.specialization || c.is_used_as_lut; |
3646 | |
3647 | if (needs_declaration) |
3648 | { |
3649 | if (!options.vulkan_semantics && c.specialization) |
3650 | { |
3651 | c.specialization_constant_macro_name = |
3652 | constant_value_macro_name(id: get_decoration(id: c.self, decoration: DecorationSpecId)); |
3653 | } |
3654 | emit_constant(constant: c); |
3655 | emitted = true; |
3656 | } |
3657 | } |
3658 | else if (id.get_type() == TypeConstantOp) |
3659 | { |
3660 | emit_specialization_constant_op(constant: id.get<SPIRConstantOp>()); |
3661 | emitted = true; |
3662 | } |
3663 | else if (id.get_type() == TypeType) |
3664 | { |
3665 | auto *type = &id.get<SPIRType>(); |
3666 | |
3667 | bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && |
3668 | (!has_decoration(id: type->self, decoration: DecorationBlock) && |
3669 | !has_decoration(id: type->self, decoration: DecorationBufferBlock)); |
3670 | |
3671 | // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. |
3672 | if (type->basetype == SPIRType::Struct && type->pointer && |
3673 | has_decoration(id: type->self, decoration: DecorationBlock) && |
3674 | (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR || |
3675 | type->storage == StorageClassHitAttributeKHR)) |
3676 | { |
3677 | type = &get<SPIRType>(id: type->parent_type); |
3678 | is_natural_struct = true; |
3679 | } |
3680 | |
3681 | if (is_natural_struct) |
3682 | { |
3683 | if (emitted) |
3684 | statement(ts: ""); |
3685 | emitted = false; |
3686 | |
3687 | emit_struct(type&: *type); |
3688 | } |
3689 | } |
3690 | else if (id.get_type() == TypeUndef) |
3691 | { |
3692 | auto &undef = id.get<SPIRUndef>(); |
3693 | auto &type = this->get<SPIRType>(id: undef.basetype); |
3694 | // OpUndef can be void for some reason ... |
3695 | if (type.basetype == SPIRType::Void) |
3696 | return; |
3697 | |
3698 | // This will break. It is bogus and should not be legal. |
3699 | if (type_is_top_level_block(type)) |
3700 | return; |
3701 | |
3702 | string initializer; |
3703 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
3704 | initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: undef.basetype)); |
3705 | |
3706 | // FIXME: If used in a constant, we must declare it as one. |
3707 | statement(ts: variable_decl(type, name: to_name(id: undef.self), id: undef.self), ts&: initializer, ts: ";"); |
3708 | emitted = true; |
3709 | } |
3710 | } |
3711 | } |
3712 | |
3713 | if (emitted) |
3714 | statement(ts: ""); |
3715 | |
3716 | // If we needed to declare work group size late, check here. |
3717 | // If the work group size depends on a specialization constant, we need to declare the layout() block |
3718 | // after constants (and their macros) have been declared. |
3719 | if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && |
3720 | (execution.workgroup_size.constant != 0 || execution.flags.get(bit: ExecutionModeLocalSizeId))) |
3721 | { |
3722 | SpecializationConstant wg_x, wg_y, wg_z; |
3723 | get_work_group_size_specialization_constants(x&: wg_x, y&: wg_y, z&: wg_z); |
3724 | |
3725 | if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) |
3726 | { |
3727 | SmallVector<string> inputs; |
3728 | build_workgroup_size(arguments&: inputs, wg_x, wg_y, wg_z); |
3729 | statement(ts: "layout(", ts: merge(list: inputs), ts: ") in;"); |
3730 | statement(ts: ""); |
3731 | } |
3732 | } |
3733 | |
3734 | emitted = false; |
3735 | |
3736 | if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) |
3737 | { |
3738 | // Output buffer reference blocks. |
3739 | // Do this in two stages, one with forward declaration, |
3740 | // and one without. Buffer reference blocks can reference themselves |
3741 | // to support things like linked lists. |
3742 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) { |
3743 | if (is_physical_pointer(type)) |
3744 | { |
3745 | bool emit_type = true; |
3746 | if (!is_physical_pointer_to_buffer_block(type)) |
3747 | { |
3748 | // Only forward-declare if we intend to emit it in the non_block_pointer types. |
3749 | // Otherwise, these are just "benign" pointer types that exist as a result of access chains. |
3750 | emit_type = std::find(first: physical_storage_non_block_pointer_types.begin(), |
3751 | last: physical_storage_non_block_pointer_types.end(), |
3752 | val: id) != physical_storage_non_block_pointer_types.end(); |
3753 | } |
3754 | |
3755 | if (emit_type) |
3756 | emit_buffer_reference_block(type_id: id, forward_declaration: true); |
3757 | } |
3758 | }); |
3759 | |
3760 | for (auto type : physical_storage_non_block_pointer_types) |
3761 | emit_buffer_reference_block(type_id: type, forward_declaration: false); |
3762 | |
3763 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) { |
3764 | if (is_physical_pointer_to_buffer_block(type)) |
3765 | emit_buffer_reference_block(type_id: id, forward_declaration: false); |
3766 | }); |
3767 | } |
3768 | |
3769 | // Output UBOs and SSBOs |
3770 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3771 | auto &type = this->get<SPIRType>(id: var.basetype); |
3772 | |
3773 | bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || |
3774 | type.storage == StorageClassShaderRecordBufferKHR; |
3775 | bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) || |
3776 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
3777 | |
3778 | if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && |
3779 | has_block_flags) |
3780 | { |
3781 | emit_buffer_block(var); |
3782 | } |
3783 | }); |
3784 | |
3785 | // Output push constant blocks |
3786 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3787 | auto &type = this->get<SPIRType>(id: var.basetype); |
3788 | if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && |
3789 | !is_hidden_variable(var)) |
3790 | { |
3791 | emit_push_constant_block(var); |
3792 | } |
3793 | }); |
3794 | |
3795 | bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; |
3796 | |
3797 | // Output Uniform Constants (values, samplers, images, etc). |
3798 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3799 | auto &type = this->get<SPIRType>(id: var.basetype); |
3800 | |
3801 | // If we're remapping separate samplers and images, only emit the combined samplers. |
3802 | if (skip_separate_image_sampler) |
3803 | { |
3804 | // Sampler buffers are always used without a sampler, and they will also work in regular GL. |
3805 | bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; |
3806 | bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; |
3807 | bool separate_sampler = type.basetype == SPIRType::Sampler; |
3808 | if (!sampler_buffer && (separate_image || separate_sampler)) |
3809 | return; |
3810 | } |
3811 | |
3812 | if (var.storage != StorageClassFunction && type.pointer && |
3813 | (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || |
3814 | type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || |
3815 | type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || |
3816 | type.storage == StorageClassHitAttributeKHR) && |
3817 | !is_hidden_variable(var)) |
3818 | { |
3819 | emit_uniform(var); |
3820 | emitted = true; |
3821 | } |
3822 | }); |
3823 | |
3824 | if (emitted) |
3825 | statement(ts: ""); |
3826 | emitted = false; |
3827 | |
3828 | bool emitted_base_instance = false; |
3829 | |
3830 | // Output in/out interfaces. |
3831 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, SPIRVariable &var) { |
3832 | auto &type = this->get<SPIRType>(id: var.basetype); |
3833 | |
3834 | bool is_hidden = is_hidden_variable(var); |
3835 | |
3836 | // Unused output I/O variables might still be required to implement framebuffer fetch. |
3837 | if (var.storage == StorageClassOutput && !is_legacy() && |
3838 | location_is_framebuffer_fetch(location: get_decoration(id: var.self, decoration: DecorationLocation)) != 0) |
3839 | { |
3840 | is_hidden = false; |
3841 | } |
3842 | |
3843 | if (var.storage != StorageClassFunction && type.pointer && |
3844 | (var.storage == StorageClassInput || var.storage == StorageClassOutput) && |
3845 | interface_variable_exists_in_entry_point(id: var.self) && !is_hidden) |
3846 | { |
3847 | if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && |
3848 | type.array.size() == 1) |
3849 | { |
3850 | SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); |
3851 | } |
3852 | emit_interface_block(var); |
3853 | emitted = true; |
3854 | } |
3855 | else if (is_builtin_variable(var)) |
3856 | { |
3857 | auto builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)); |
3858 | // For gl_InstanceIndex emulation on GLES, the API user needs to |
3859 | // supply this uniform. |
3860 | |
3861 | // The draw parameter extension is soft-enabled on GL with some fallbacks. |
3862 | if (!options.vulkan_semantics) |
3863 | { |
3864 | if (!emitted_base_instance && |
3865 | ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || |
3866 | (builtin == BuiltInBaseInstance))) |
3867 | { |
3868 | statement(ts: "#ifdef GL_ARB_shader_draw_parameters"); |
3869 | statement(ts: "#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); |
3870 | statement(ts: "#else"); |
3871 | // A crude, but simple workaround which should be good enough for non-indirect draws. |
3872 | statement(ts: "uniform int SPIRV_Cross_BaseInstance;"); |
3873 | statement(ts: "#endif"); |
3874 | emitted = true; |
3875 | emitted_base_instance = true; |
3876 | } |
3877 | else if (builtin == BuiltInBaseVertex) |
3878 | { |
3879 | statement(ts: "#ifdef GL_ARB_shader_draw_parameters"); |
3880 | statement(ts: "#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); |
3881 | statement(ts: "#else"); |
3882 | // A crude, but simple workaround which should be good enough for non-indirect draws. |
3883 | statement(ts: "uniform int SPIRV_Cross_BaseVertex;"); |
3884 | statement(ts: "#endif"); |
3885 | } |
3886 | else if (builtin == BuiltInDrawIndex) |
3887 | { |
3888 | statement(ts: "#ifndef GL_ARB_shader_draw_parameters"); |
3889 | // Cannot really be worked around. |
3890 | statement(ts: "#error GL_ARB_shader_draw_parameters is not supported."); |
3891 | statement(ts: "#endif"); |
3892 | } |
3893 | } |
3894 | } |
3895 | }); |
3896 | |
3897 | // Global variables. |
3898 | for (auto global : global_variables) |
3899 | { |
3900 | auto &var = get<SPIRVariable>(id: global); |
3901 | if (is_hidden_variable(var, include_builtins: true)) |
3902 | continue; |
3903 | |
3904 | if (var.storage != StorageClassOutput) |
3905 | { |
3906 | if (!variable_is_lut(var)) |
3907 | { |
3908 | add_resource_name(id: var.self); |
3909 | |
3910 | string initializer; |
3911 | if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && |
3912 | !var.initializer && !var.static_expression && type_can_zero_initialize(type: get_variable_data_type(var))) |
3913 | { |
3914 | initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var))); |
3915 | } |
3916 | |
3917 | statement(ts: variable_decl(variable: var), ts&: initializer, ts: ";"); |
3918 | emitted = true; |
3919 | } |
3920 | } |
3921 | else if (var.initializer && maybe_get<SPIRConstant>(id: var.initializer) != nullptr) |
3922 | { |
3923 | emit_output_variable_initializer(var); |
3924 | } |
3925 | } |
3926 | |
3927 | if (emitted) |
3928 | statement(ts: ""); |
3929 | } |
3930 | |
3931 | void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) |
3932 | { |
3933 | // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). |
3934 | auto &entry_func = this->get<SPIRFunction>(id: ir.default_entry_point); |
3935 | auto &type = get<SPIRType>(id: var.basetype); |
3936 | bool is_patch = has_decoration(id: var.self, decoration: DecorationPatch); |
3937 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
3938 | bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch; |
3939 | |
3940 | if (is_block) |
3941 | { |
3942 | uint32_t member_count = uint32_t(type.member_types.size()); |
3943 | bool type_is_array = type.array.size() == 1; |
3944 | uint32_t array_size = 1; |
3945 | if (type_is_array) |
3946 | array_size = to_array_size_literal(type); |
3947 | uint32_t iteration_count = is_control_point ? 1 : array_size; |
3948 | |
3949 | // If the initializer is a block, we must initialize each block member one at a time. |
3950 | for (uint32_t i = 0; i < member_count; i++) |
3951 | { |
3952 | // These outputs might not have been properly declared, so don't initialize them in that case. |
3953 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn)) |
3954 | { |
3955 | if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInCullDistance && |
3956 | !cull_distance_count) |
3957 | continue; |
3958 | |
3959 | if (get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn) == BuiltInClipDistance && |
3960 | !clip_distance_count) |
3961 | continue; |
3962 | } |
3963 | |
3964 | // We need to build a per-member array first, essentially transposing from AoS to SoA. |
3965 | // This code path hits when we have an array of blocks. |
3966 | string lut_name; |
3967 | if (type_is_array) |
3968 | { |
3969 | lut_name = join(ts: "_", ts: var.self, ts: "_", ts&: i, ts: "_init"); |
3970 | uint32_t member_type_id = get<SPIRType>(id: var.basetype).member_types[i]; |
3971 | auto &member_type = get<SPIRType>(id: member_type_id); |
3972 | auto array_type = member_type; |
3973 | array_type.parent_type = member_type_id; |
3974 | array_type.op = OpTypeArray; |
3975 | array_type.array.push_back(t: array_size); |
3976 | array_type.array_size_literal.push_back(t: true); |
3977 | |
3978 | SmallVector<string> exprs; |
3979 | exprs.reserve(count: array_size); |
3980 | auto &c = get<SPIRConstant>(id: var.initializer); |
3981 | for (uint32_t j = 0; j < array_size; j++) |
3982 | exprs.push_back(t: to_expression(id: get<SPIRConstant>(id: c.subconstants[j]).subconstants[i])); |
3983 | statement(ts: "const ", ts: type_to_glsl(type: array_type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type: array_type, variable_id: 0), ts: " = ", |
3984 | ts: type_to_glsl_constructor(type: array_type), ts: "(", ts: merge(list: exprs, between: ", "), ts: ");"); |
3985 | } |
3986 | |
3987 | for (uint32_t j = 0; j < iteration_count; j++) |
3988 | { |
3989 | entry_func.fixup_hooks_in.push_back(t: [=, &var]() { |
3990 | AccessChainMeta meta; |
3991 | auto &c = this->get<SPIRConstant>(id: var.initializer); |
3992 | |
3993 | uint32_t invocation_id = 0; |
3994 | uint32_t member_index_id = 0; |
3995 | if (is_control_point) |
3996 | { |
3997 | uint32_t ids = ir.increase_bound_by(count: 3); |
3998 | auto &uint_type = set<SPIRType>(id: ids, args: OpTypeInt); |
3999 | uint_type.basetype = SPIRType::UInt; |
4000 | uint_type.width = 32; |
4001 | set<SPIRExpression>(id: ids + 1, args: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), args&: ids, args: true); |
4002 | set<SPIRConstant>(id: ids + 2, args&: ids, args: i, args: false); |
4003 | invocation_id = ids + 1; |
4004 | member_index_id = ids + 2; |
4005 | } |
4006 | |
4007 | if (is_patch) |
4008 | { |
4009 | statement(ts: "if (gl_InvocationID == 0)"); |
4010 | begin_scope(); |
4011 | } |
4012 | |
4013 | if (type_is_array && !is_control_point) |
4014 | { |
4015 | uint32_t indices[2] = { j, i }; |
4016 | auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta); |
4017 | statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: j, ts: "];"); |
4018 | } |
4019 | else if (is_control_point) |
4020 | { |
4021 | uint32_t indices[2] = { invocation_id, member_index_id }; |
4022 | auto chain = access_chain_internal(base: var.self, indices, count: 2, flags: 0, meta: &meta); |
4023 | statement(ts&: chain, ts: " = ", ts: lut_name, ts: "[", ts: builtin_to_glsl(builtin: BuiltInInvocationId, storage: StorageClassInput), ts: "];"); |
4024 | } |
4025 | else |
4026 | { |
4027 | auto chain = |
4028 | access_chain_internal(base: var.self, indices: &i, count: 1, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &meta); |
4029 | statement(ts&: chain, ts: " = ", ts: to_expression(id: c.subconstants[i]), ts: ";"); |
4030 | } |
4031 | |
4032 | if (is_patch) |
4033 | end_scope(); |
4034 | }); |
4035 | } |
4036 | } |
4037 | } |
4038 | else if (is_control_point) |
4039 | { |
4040 | auto lut_name = join(ts: "_", ts: var.self, ts: "_init"); |
4041 | statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name, ts: type_to_array_glsl(type, variable_id: 0), |
4042 | ts: " = ", ts: to_expression(id: var.initializer), ts: ";"); |
4043 | entry_func.fixup_hooks_in.push_back(t: [&, lut_name]() { |
4044 | statement(ts: to_expression(id: var.self), ts: "[gl_InvocationID] = ", ts: lut_name, ts: "[gl_InvocationID];"); |
4045 | }); |
4046 | } |
4047 | else if (has_decoration(id: var.self, decoration: DecorationBuiltIn) && |
4048 | BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn)) == BuiltInSampleMask) |
4049 | { |
4050 | // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< |
4051 | entry_func.fixup_hooks_in.push_back(t: [&] { |
4052 | auto &c = this->get<SPIRConstant>(id: var.initializer); |
4053 | uint32_t num_constants = uint32_t(c.subconstants.size()); |
4054 | for (uint32_t i = 0; i < num_constants; i++) |
4055 | { |
4056 | // Don't use to_expression on constant since it might be uint, just fish out the raw int. |
4057 | statement(ts: to_expression(id: var.self), ts: "[", ts&: i, ts: "] = ", |
4058 | ts: convert_to_string(value: this->get<SPIRConstant>(id: c.subconstants[i]).scalar_i32()), ts: ";"); |
4059 | } |
4060 | }); |
4061 | } |
4062 | else |
4063 | { |
4064 | auto lut_name = join(ts: "_", ts: var.self, ts: "_init"); |
4065 | statement(ts: "const ", ts: type_to_glsl(type), ts: " ", ts&: lut_name, |
4066 | ts: type_to_array_glsl(type, variable_id: var.self), ts: " = ", ts: to_expression(id: var.initializer), ts: ";"); |
4067 | entry_func.fixup_hooks_in.push_back(t: [&, lut_name, is_patch]() { |
4068 | if (is_patch) |
4069 | { |
4070 | statement(ts: "if (gl_InvocationID == 0)"); |
4071 | begin_scope(); |
4072 | } |
4073 | statement(ts: to_expression(id: var.self), ts: " = ", ts: lut_name, ts: ";"); |
4074 | if (is_patch) |
4075 | end_scope(); |
4076 | }); |
4077 | } |
4078 | } |
4079 | |
4080 | void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op) |
4081 | { |
4082 | std::string result; |
4083 | switch (group_op) |
4084 | { |
4085 | case GroupOperationReduce: |
4086 | result = "reduction"; |
4087 | break; |
4088 | |
4089 | case GroupOperationExclusiveScan: |
4090 | result = "excl_scan"; |
4091 | break; |
4092 | |
4093 | case GroupOperationInclusiveScan: |
4094 | result = "incl_scan"; |
4095 | break; |
4096 | |
4097 | default: |
4098 | SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); |
4099 | } |
4100 | |
4101 | struct TypeInfo |
4102 | { |
4103 | std::string type; |
4104 | std::string identity; |
4105 | }; |
4106 | |
4107 | std::vector<TypeInfo> type_infos; |
4108 | switch (op) |
4109 | { |
4110 | case OpGroupNonUniformIAdd: |
4111 | { |
4112 | type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "0u"}); |
4113 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(0u)"}); |
4114 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(0u)"}); |
4115 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(0u)"}); |
4116 | type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "0"}); |
4117 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(0)"}); |
4118 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(0)"}); |
4119 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(0)"}); |
4120 | break; |
4121 | } |
4122 | |
4123 | case OpGroupNonUniformFAdd: |
4124 | { |
4125 | type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "0.0f"}); |
4126 | type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(0.0f)"}); |
4127 | type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(0.0f)"}); |
4128 | type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(0.0f)"}); |
4129 | // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle |
4130 | type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF"}); |
4131 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(0.0LF)"}); |
4132 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(0.0LF)"}); |
4133 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(0.0LF)"}); |
4134 | break; |
4135 | } |
4136 | |
4137 | case OpGroupNonUniformIMul: |
4138 | { |
4139 | type_infos.emplace_back(args: TypeInfo{ .type: "uint", .identity: "1u"}); |
4140 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec2", .identity: "uvec2(1u)"}); |
4141 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec3", .identity: "uvec3(1u)"}); |
4142 | type_infos.emplace_back(args: TypeInfo{ .type: "uvec4", .identity: "uvec4(1u)"}); |
4143 | type_infos.emplace_back(args: TypeInfo{ .type: "int", .identity: "1"}); |
4144 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec2", .identity: "ivec2(1)"}); |
4145 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec3", .identity: "ivec3(1)"}); |
4146 | type_infos.emplace_back(args: TypeInfo{ .type: "ivec4", .identity: "ivec4(1)"}); |
4147 | break; |
4148 | } |
4149 | |
4150 | case OpGroupNonUniformFMul: |
4151 | { |
4152 | type_infos.emplace_back(args: TypeInfo{ .type: "float", .identity: "1.0f"}); |
4153 | type_infos.emplace_back(args: TypeInfo{ .type: "vec2", .identity: "vec2(1.0f)"}); |
4154 | type_infos.emplace_back(args: TypeInfo{ .type: "vec3", .identity: "vec3(1.0f)"}); |
4155 | type_infos.emplace_back(args: TypeInfo{ .type: "vec4", .identity: "vec4(1.0f)"}); |
4156 | type_infos.emplace_back(args: TypeInfo{ .type: "double", .identity: "0.0LF"}); |
4157 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec2", .identity: "dvec2(1.0LF)"}); |
4158 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec3", .identity: "dvec3(1.0LF)"}); |
4159 | type_infos.emplace_back(args: TypeInfo{ .type: "dvec4", .identity: "dvec4(1.0LF)"}); |
4160 | break; |
4161 | } |
4162 | |
4163 | default: |
4164 | SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); |
4165 | } |
4166 | |
4167 | const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd; |
4168 | const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul; |
4169 | std::string op_symbol; |
4170 | if (op_is_addition) |
4171 | { |
4172 | op_symbol = "+="; |
4173 | } |
4174 | else if (op_is_multiplication) |
4175 | { |
4176 | op_symbol = "*="; |
4177 | } |
4178 | |
4179 | for (const TypeInfo &t : type_infos) |
4180 | { |
4181 | statement(ts: t.type, ts: " ", ts: func, ts: "(", ts: t.type, ts: " v)"); |
4182 | begin_scope(); |
4183 | statement(ts: t.type, ts: " ", ts&: result, ts: " = ", ts: t.identity, ts: ";"); |
4184 | statement(ts: "uvec4 active_threads = subgroupBallot(true);"); |
4185 | statement(ts: "if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)"); |
4186 | begin_scope(); |
4187 | statement(ts: "uint total = gl_SubgroupSize / 2u;"); |
4188 | statement(ts&: result, ts: " = v;"); |
4189 | statement(ts: "for (uint i = 1u; i <= total; i <<= 1u)"); |
4190 | begin_scope(); |
4191 | statement(ts: "bool valid;"); |
4192 | if (group_op == GroupOperationReduce) |
4193 | { |
4194 | statement(ts: t.type, ts: " s = shuffleXorNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);"); |
4195 | } |
4196 | else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) |
4197 | { |
4198 | statement(ts: t.type, ts: " s = shuffleUpNV(", ts&: result, ts: ", i, gl_SubgroupSize, valid);"); |
4199 | } |
4200 | if (op_is_addition || op_is_multiplication) |
4201 | { |
4202 | statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";"); |
4203 | } |
4204 | end_scope(); |
4205 | if (group_op == GroupOperationExclusiveScan) |
4206 | { |
4207 | statement(ts&: result, ts: " = shuffleUpNV(", ts&: result, ts: ", 1u, gl_SubgroupSize);"); |
4208 | statement(ts: "if (subgroupElect())"); |
4209 | begin_scope(); |
4210 | statement(ts&: result, ts: " = ", ts: t.identity, ts: ";"); |
4211 | end_scope(); |
4212 | } |
4213 | end_scope(); |
4214 | statement(ts: "else"); |
4215 | begin_scope(); |
4216 | if (group_op == GroupOperationExclusiveScan) |
4217 | { |
4218 | statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLtMask);"); |
4219 | } |
4220 | else if (group_op == GroupOperationInclusiveScan) |
4221 | { |
4222 | statement(ts: "uint total = subgroupBallotBitCount(gl_SubgroupLeMask);"); |
4223 | } |
4224 | statement(ts: "for (uint i = 0u; i < gl_SubgroupSize; ++i)"); |
4225 | begin_scope(); |
4226 | statement(ts: "bool valid = subgroupBallotBitExtract(active_threads, i);"); |
4227 | statement(ts: t.type, ts: " s = shuffleNV(v, i, gl_SubgroupSize);"); |
4228 | if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) |
4229 | { |
4230 | statement(ts: "valid = valid && (i < total);"); |
4231 | } |
4232 | if (op_is_addition || op_is_multiplication) |
4233 | { |
4234 | statement(ts&: result, ts: " ", ts&: op_symbol, ts: " valid ? s : ", ts: t.identity, ts: ";"); |
4235 | } |
4236 | end_scope(); |
4237 | end_scope(); |
4238 | statement(ts: "return ", ts&: result, ts: ";"); |
4239 | end_scope(); |
4240 | } |
4241 | } |
4242 | |
4243 | void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) |
4244 | { |
4245 | static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", |
4246 | "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4"}; |
4247 | |
4248 | if (!options.vulkan_semantics) |
4249 | { |
4250 | using Supp = ShaderSubgroupSupportHelper; |
4251 | auto result = shader_subgroup_supporter.resolve(); |
4252 | |
4253 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMask)) |
4254 | { |
4255 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupMask, r: result); |
4256 | |
4257 | for (auto &e : exts) |
4258 | { |
4259 | const char *name = Supp::get_extension_name(c: e); |
4260 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4261 | |
4262 | switch (e) |
4263 | { |
4264 | case Supp::NV_shader_thread_group: |
4265 | statement(ts: "#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); |
4266 | statement(ts: "#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); |
4267 | statement(ts: "#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); |
4268 | statement(ts: "#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); |
4269 | statement(ts: "#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); |
4270 | break; |
4271 | case Supp::ARB_shader_ballot: |
4272 | statement(ts: "#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); |
4273 | statement(ts: "#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); |
4274 | statement(ts: "#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); |
4275 | statement(ts: "#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); |
4276 | statement(ts: "#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); |
4277 | break; |
4278 | default: |
4279 | break; |
4280 | } |
4281 | } |
4282 | statement(ts: "#endif"); |
4283 | statement(ts: ""); |
4284 | } |
4285 | |
4286 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupSize)) |
4287 | { |
4288 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupSize, r: result); |
4289 | |
4290 | for (auto &e : exts) |
4291 | { |
4292 | const char *name = Supp::get_extension_name(c: e); |
4293 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4294 | |
4295 | switch (e) |
4296 | { |
4297 | case Supp::NV_shader_thread_group: |
4298 | statement(ts: "#define gl_SubgroupSize gl_WarpSizeNV"); |
4299 | break; |
4300 | case Supp::ARB_shader_ballot: |
4301 | statement(ts: "#define gl_SubgroupSize gl_SubGroupSizeARB"); |
4302 | break; |
4303 | case Supp::AMD_gcn_shader: |
4304 | statement(ts: "#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); |
4305 | break; |
4306 | default: |
4307 | break; |
4308 | } |
4309 | } |
4310 | statement(ts: "#endif"); |
4311 | statement(ts: ""); |
4312 | } |
4313 | |
4314 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInvocationID)) |
4315 | { |
4316 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupInvocationID, r: result); |
4317 | |
4318 | for (auto &e : exts) |
4319 | { |
4320 | const char *name = Supp::get_extension_name(c: e); |
4321 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4322 | |
4323 | switch (e) |
4324 | { |
4325 | case Supp::NV_shader_thread_group: |
4326 | statement(ts: "#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); |
4327 | break; |
4328 | case Supp::ARB_shader_ballot: |
4329 | statement(ts: "#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); |
4330 | break; |
4331 | default: |
4332 | break; |
4333 | } |
4334 | } |
4335 | statement(ts: "#endif"); |
4336 | statement(ts: ""); |
4337 | } |
4338 | |
4339 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupID)) |
4340 | { |
4341 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupID, r: result); |
4342 | |
4343 | for (auto &e : exts) |
4344 | { |
4345 | const char *name = Supp::get_extension_name(c: e); |
4346 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4347 | |
4348 | switch (e) |
4349 | { |
4350 | case Supp::NV_shader_thread_group: |
4351 | statement(ts: "#define gl_SubgroupID gl_WarpIDNV"); |
4352 | break; |
4353 | default: |
4354 | break; |
4355 | } |
4356 | } |
4357 | statement(ts: "#endif"); |
4358 | statement(ts: ""); |
4359 | } |
4360 | |
4361 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::NumSubgroups)) |
4362 | { |
4363 | auto exts = Supp::get_candidates_for_feature(ft: Supp::NumSubgroups, r: result); |
4364 | |
4365 | for (auto &e : exts) |
4366 | { |
4367 | const char *name = Supp::get_extension_name(c: e); |
4368 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4369 | |
4370 | switch (e) |
4371 | { |
4372 | case Supp::NV_shader_thread_group: |
4373 | statement(ts: "#define gl_NumSubgroups gl_WarpsPerSMNV"); |
4374 | break; |
4375 | default: |
4376 | break; |
4377 | } |
4378 | } |
4379 | statement(ts: "#endif"); |
4380 | statement(ts: ""); |
4381 | } |
4382 | |
4383 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBroadcast_First)) |
4384 | { |
4385 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBroadcast_First, r: result); |
4386 | |
4387 | for (auto &e : exts) |
4388 | { |
4389 | const char *name = Supp::get_extension_name(c: e); |
4390 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4391 | |
4392 | switch (e) |
4393 | { |
4394 | case Supp::NV_shader_thread_shuffle: |
4395 | for (const char *t : workaround_types) |
4396 | { |
4397 | statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t, |
4398 | ts: " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); |
4399 | } |
4400 | for (const char *t : workaround_types) |
4401 | { |
4402 | statement(ts&: t, ts: " subgroupBroadcast(", ts&: t, |
4403 | ts: " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); |
4404 | } |
4405 | break; |
4406 | case Supp::ARB_shader_ballot: |
4407 | for (const char *t : workaround_types) |
4408 | { |
4409 | statement(ts&: t, ts: " subgroupBroadcastFirst(", ts&: t, |
4410 | ts: " value) { return readFirstInvocationARB(value); }"); |
4411 | } |
4412 | for (const char *t : workaround_types) |
4413 | { |
4414 | statement(ts&: t, ts: " subgroupBroadcast(", ts&: t, |
4415 | ts: " value, uint id) { return readInvocationARB(value, id); }"); |
4416 | } |
4417 | break; |
4418 | default: |
4419 | break; |
4420 | } |
4421 | } |
4422 | statement(ts: "#endif"); |
4423 | statement(ts: ""); |
4424 | } |
4425 | |
4426 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotFindLSB_MSB)) |
4427 | { |
4428 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallotFindLSB_MSB, r: result); |
4429 | |
4430 | for (auto &e : exts) |
4431 | { |
4432 | const char *name = Supp::get_extension_name(c: e); |
4433 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4434 | |
4435 | switch (e) |
4436 | { |
4437 | case Supp::NV_shader_thread_group: |
4438 | statement(ts: "uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); |
4439 | statement(ts: "uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); |
4440 | break; |
4441 | default: |
4442 | break; |
4443 | } |
4444 | } |
4445 | statement(ts: "#else"); |
4446 | statement(ts: "uint subgroupBallotFindLSB(uvec4 value)"); |
4447 | begin_scope(); |
4448 | statement(ts: "int firstLive = findLSB(value.x);"); |
4449 | statement(ts: "return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); |
4450 | end_scope(); |
4451 | statement(ts: "uint subgroupBallotFindMSB(uvec4 value)"); |
4452 | begin_scope(); |
4453 | statement(ts: "int firstLive = findMSB(value.y);"); |
4454 | statement(ts: "return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); |
4455 | end_scope(); |
4456 | statement(ts: "#endif"); |
4457 | statement(ts: ""); |
4458 | } |
4459 | |
4460 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAll_Any_AllEqualBool)) |
4461 | { |
4462 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupAll_Any_AllEqualBool, r: result); |
4463 | |
4464 | for (auto &e : exts) |
4465 | { |
4466 | const char *name = Supp::get_extension_name(c: e); |
4467 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4468 | |
4469 | switch (e) |
4470 | { |
4471 | case Supp::NV_gpu_shader_5: |
4472 | statement(ts: "bool subgroupAll(bool value) { return allThreadsNV(value); }"); |
4473 | statement(ts: "bool subgroupAny(bool value) { return anyThreadNV(value); }"); |
4474 | statement(ts: "bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); |
4475 | break; |
4476 | case Supp::ARB_shader_group_vote: |
4477 | statement(ts: "bool subgroupAll(bool v) { return allInvocationsARB(v); }"); |
4478 | statement(ts: "bool subgroupAny(bool v) { return anyInvocationARB(v); }"); |
4479 | statement(ts: "bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); |
4480 | break; |
4481 | case Supp::AMD_gcn_shader: |
4482 | statement(ts: "bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); |
4483 | statement(ts: "bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); |
4484 | statement(ts: "bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " |
4485 | "b == ballotAMD(true); }"); |
4486 | break; |
4487 | default: |
4488 | break; |
4489 | } |
4490 | } |
4491 | statement(ts: "#endif"); |
4492 | statement(ts: ""); |
4493 | } |
4494 | |
4495 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupAllEqualT)) |
4496 | { |
4497 | statement(ts: "#ifndef GL_KHR_shader_subgroup_vote"); |
4498 | statement( |
4499 | ts: "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " |
4500 | "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); |
4501 | for (const char *t : workaround_types) |
4502 | statement(ts: "_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", ts&: t, ts: ")"); |
4503 | statement(ts: "#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); |
4504 | statement(ts: "#endif"); |
4505 | statement(ts: ""); |
4506 | } |
4507 | |
4508 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallot)) |
4509 | { |
4510 | auto exts = Supp::get_candidates_for_feature(ft: Supp::SubgroupBallot, r: result); |
4511 | |
4512 | for (auto &e : exts) |
4513 | { |
4514 | const char *name = Supp::get_extension_name(c: e); |
4515 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4516 | |
4517 | switch (e) |
4518 | { |
4519 | case Supp::NV_shader_thread_group: |
4520 | statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); |
4521 | break; |
4522 | case Supp::ARB_shader_ballot: |
4523 | statement(ts: "uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); |
4524 | break; |
4525 | default: |
4526 | break; |
4527 | } |
4528 | } |
4529 | statement(ts: "#endif"); |
4530 | statement(ts: ""); |
4531 | } |
4532 | |
4533 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupElect)) |
4534 | { |
4535 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic"); |
4536 | statement(ts: "bool subgroupElect()"); |
4537 | begin_scope(); |
4538 | statement(ts: "uvec4 activeMask = subgroupBallot(true);"); |
4539 | statement(ts: "uint firstLive = subgroupBallotFindLSB(activeMask);"); |
4540 | statement(ts: "return gl_SubgroupInvocationID == firstLive;"); |
4541 | end_scope(); |
4542 | statement(ts: "#endif"); |
4543 | statement(ts: ""); |
4544 | } |
4545 | |
4546 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBarrier)) |
4547 | { |
4548 | // Extensions we're using in place of GL_KHR_shader_subgroup_basic state |
4549 | // that subgroup execute in lockstep so this barrier is implicit. |
4550 | // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, |
4551 | // and a specific test of optimizing scans by leveraging lock-step invocation execution, |
4552 | // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. |
4553 | // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 |
4554 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic"); |
4555 | statement(ts: "void subgroupBarrier() { memoryBarrierShared(); }"); |
4556 | statement(ts: "#endif"); |
4557 | statement(ts: ""); |
4558 | } |
4559 | |
4560 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupMemBarrier)) |
4561 | { |
4562 | if (model == spv::ExecutionModelGLCompute) |
4563 | { |
4564 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic"); |
4565 | statement(ts: "void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); |
4566 | statement(ts: "void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); |
4567 | statement(ts: "void subgroupMemoryBarrierShared() { memoryBarrierShared(); }"); |
4568 | statement(ts: "void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); |
4569 | statement(ts: "#endif"); |
4570 | } |
4571 | else |
4572 | { |
4573 | statement(ts: "#ifndef GL_KHR_shader_subgroup_basic"); |
4574 | statement(ts: "void subgroupMemoryBarrier() { memoryBarrier(); }"); |
4575 | statement(ts: "void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); |
4576 | statement(ts: "void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); |
4577 | statement(ts: "#endif"); |
4578 | } |
4579 | statement(ts: ""); |
4580 | } |
4581 | |
4582 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) |
4583 | { |
4584 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot"); |
4585 | statement(ts: "bool subgroupInverseBallot(uvec4 value)"); |
4586 | begin_scope(); |
4587 | statement(ts: "return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); |
4588 | end_scope(); |
4589 | |
4590 | statement(ts: "uint subgroupBallotInclusiveBitCount(uvec4 value)"); |
4591 | begin_scope(); |
4592 | statement(ts: "uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); |
4593 | statement(ts: "ivec2 c = bitCount(v);"); |
4594 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group"); |
4595 | statement(ts: "return uint(c.x);"); |
4596 | statement_no_indent(ts: "#else"); |
4597 | statement(ts: "return uint(c.x + c.y);"); |
4598 | statement_no_indent(ts: "#endif"); |
4599 | end_scope(); |
4600 | |
4601 | statement(ts: "uint subgroupBallotExclusiveBitCount(uvec4 value)"); |
4602 | begin_scope(); |
4603 | statement(ts: "uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); |
4604 | statement(ts: "ivec2 c = bitCount(v);"); |
4605 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group"); |
4606 | statement(ts: "return uint(c.x);"); |
4607 | statement_no_indent(ts: "#else"); |
4608 | statement(ts: "return uint(c.x + c.y);"); |
4609 | statement_no_indent(ts: "#endif"); |
4610 | end_scope(); |
4611 | statement(ts: "#endif"); |
4612 | statement(ts: ""); |
4613 | } |
4614 | |
4615 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitCount)) |
4616 | { |
4617 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot"); |
4618 | statement(ts: "uint subgroupBallotBitCount(uvec4 value)"); |
4619 | begin_scope(); |
4620 | statement(ts: "ivec2 c = bitCount(value.xy);"); |
4621 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group"); |
4622 | statement(ts: "return uint(c.x);"); |
4623 | statement_no_indent(ts: "#else"); |
4624 | statement(ts: "return uint(c.x + c.y);"); |
4625 | statement_no_indent(ts: "#endif"); |
4626 | end_scope(); |
4627 | statement(ts: "#endif"); |
4628 | statement(ts: ""); |
4629 | } |
4630 | |
4631 | if (shader_subgroup_supporter.is_feature_requested(feature: Supp::SubgroupBallotBitExtract)) |
4632 | { |
4633 | statement(ts: "#ifndef GL_KHR_shader_subgroup_ballot"); |
4634 | statement(ts: "bool subgroupBallotBitExtract(uvec4 value, uint index)"); |
4635 | begin_scope(); |
4636 | statement_no_indent(ts: "#ifdef GL_NV_shader_thread_group"); |
4637 | statement(ts: "uint shifted = value.x >> index;"); |
4638 | statement_no_indent(ts: "#else"); |
4639 | statement(ts: "uint shifted = value[index >> 5u] >> (index & 0x1fu);"); |
4640 | statement_no_indent(ts: "#endif"); |
4641 | statement(ts: "return (shifted & 1u) != 0u;"); |
4642 | end_scope(); |
4643 | statement(ts: "#endif"); |
4644 | statement(ts: ""); |
4645 | } |
4646 | |
4647 | auto arithmetic_feature_helper = |
4648 | [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op) |
4649 | { |
4650 | if (shader_subgroup_supporter.is_feature_requested(feature: feat)) |
4651 | { |
4652 | auto exts = Supp::get_candidates_for_feature(ft: feat, r: result); |
4653 | for (auto &e : exts) |
4654 | { |
4655 | const char *name = Supp::get_extension_name(c: e); |
4656 | statement(ts: &e == &exts.front() ? "#if": "#elif", ts: " defined(", ts&: name, ts: ")"); |
4657 | |
4658 | switch (e) |
4659 | { |
4660 | case Supp::NV_shader_thread_shuffle: |
4661 | emit_subgroup_arithmetic_workaround(func: func_name, op, group_op); |
4662 | break; |
4663 | default: |
4664 | break; |
4665 | } |
4666 | } |
4667 | statement(ts: "#endif"); |
4668 | statement(ts: ""); |
4669 | } |
4670 | }; |
4671 | |
4672 | arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd, |
4673 | GroupOperationReduce); |
4674 | arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd", |
4675 | OpGroupNonUniformIAdd, GroupOperationExclusiveScan); |
4676 | arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd", |
4677 | OpGroupNonUniformIAdd, GroupOperationInclusiveScan); |
4678 | arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd, |
4679 | GroupOperationReduce); |
4680 | arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd", |
4681 | OpGroupNonUniformFAdd, GroupOperationExclusiveScan); |
4682 | arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd", |
4683 | OpGroupNonUniformFAdd, GroupOperationInclusiveScan); |
4684 | |
4685 | arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul, |
4686 | GroupOperationReduce); |
4687 | arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul", |
4688 | OpGroupNonUniformIMul, GroupOperationExclusiveScan); |
4689 | arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul", |
4690 | OpGroupNonUniformIMul, GroupOperationInclusiveScan); |
4691 | arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul, |
4692 | GroupOperationReduce); |
4693 | arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul", |
4694 | OpGroupNonUniformFMul, GroupOperationExclusiveScan); |
4695 | arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul", |
4696 | OpGroupNonUniformFMul, GroupOperationInclusiveScan); |
4697 | } |
4698 | |
4699 | if (!workaround_ubo_load_overload_types.empty()) |
4700 | { |
4701 | for (auto &type_id : workaround_ubo_load_overload_types) |
4702 | { |
4703 | auto &type = get<SPIRType>(id: type_id); |
4704 | |
4705 | if (options.es && is_matrix(type)) |
4706 | { |
4707 | // Need both variants. |
4708 | // GLSL cannot overload on precision, so need to dispatch appropriately. |
4709 | statement(ts: "highp ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(highp ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }"); |
4710 | statement(ts: "mediump ", ts: type_to_glsl(type), ts: " spvWorkaroundRowMajorMP(mediump ", ts: type_to_glsl(type), ts: " wrap) { return wrap; }"); |
4711 | } |
4712 | else |
4713 | { |
4714 | statement(ts: type_to_glsl(type), ts: " spvWorkaroundRowMajor(", ts: type_to_glsl(type), ts: " wrap) { return wrap; }"); |
4715 | } |
4716 | } |
4717 | statement(ts: ""); |
4718 | } |
4719 | } |
4720 | |
4721 | void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) |
4722 | { |
4723 | const char *qual = ""; |
4724 | const char *suffix = (options.es && relaxed) ? "MP": ""; |
4725 | if (options.es) |
4726 | qual = relaxed ? "mediump ": "highp "; |
4727 | |
4728 | if (polyfills & PolyfillTranspose2x2) |
4729 | { |
4730 | statement(ts&: qual, ts: "mat2 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)"); |
4731 | begin_scope(); |
4732 | statement(ts: "return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); |
4733 | end_scope(); |
4734 | statement(ts: ""); |
4735 | } |
4736 | |
4737 | if (polyfills & PolyfillTranspose3x3) |
4738 | { |
4739 | statement(ts&: qual, ts: "mat3 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)"); |
4740 | begin_scope(); |
4741 | statement(ts: "return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); |
4742 | end_scope(); |
4743 | statement(ts: ""); |
4744 | } |
4745 | |
4746 | if (polyfills & PolyfillTranspose4x4) |
4747 | { |
4748 | statement(ts&: qual, ts: "mat4 spvTranspose", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)"); |
4749 | begin_scope(); |
4750 | statement(ts: "return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " |
4751 | "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); |
4752 | end_scope(); |
4753 | statement(ts: ""); |
4754 | } |
4755 | |
4756 | if (polyfills & PolyfillDeterminant2x2) |
4757 | { |
4758 | statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)"); |
4759 | begin_scope(); |
4760 | statement(ts: "return m[0][0] * m[1][1] - m[0][1] * m[1][0];"); |
4761 | end_scope(); |
4762 | statement(ts: ""); |
4763 | } |
4764 | |
4765 | if (polyfills & PolyfillDeterminant3x3) |
4766 | { |
4767 | statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)"); |
4768 | begin_scope(); |
4769 | statement(ts: "return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], " |
4770 | "m[1][2] * m[2][0] - m[1][0] * m[2][2], " |
4771 | "m[1][0] * m[2][1] - m[1][1] * m[2][0]));"); |
4772 | end_scope(); |
4773 | statement(ts: ""); |
4774 | } |
4775 | |
4776 | if (polyfills & PolyfillDeterminant4x4) |
4777 | { |
4778 | statement(ts&: qual, ts: "float spvDeterminant", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)"); |
4779 | begin_scope(); |
4780 | statement(ts: "return dot(m[0], vec4(" |
4781 | "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " |
4782 | "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " |
4783 | "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " |
4784 | "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));"); |
4785 | end_scope(); |
4786 | statement(ts: ""); |
4787 | } |
4788 | |
4789 | if (polyfills & PolyfillMatrixInverse2x2) |
4790 | { |
4791 | statement(ts&: qual, ts: "mat2 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat2 m)"); |
4792 | begin_scope(); |
4793 | statement(ts: "return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) " |
4794 | "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));"); |
4795 | end_scope(); |
4796 | statement(ts: ""); |
4797 | } |
4798 | |
4799 | if (polyfills & PolyfillMatrixInverse3x3) |
4800 | { |
4801 | statement(ts&: qual, ts: "mat3 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat3 m)"); |
4802 | begin_scope(); |
4803 | statement(ts&: qual, ts: "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);"); |
4804 | statement(ts: "return mat3(t[0], " |
4805 | "m[0][2] * m[2][1] - m[0][1] * m[2][2], " |
4806 | "m[0][1] * m[1][2] - m[0][2] * m[1][1], " |
4807 | "t[1], " |
4808 | "m[0][0] * m[2][2] - m[0][2] * m[2][0], " |
4809 | "m[0][2] * m[1][0] - m[0][0] * m[1][2], " |
4810 | "t[2], " |
4811 | "m[0][1] * m[2][0] - m[0][0] * m[2][1], " |
4812 | "m[0][0] * m[1][1] - m[0][1] * m[1][0]) " |
4813 | "* (1.0 / dot(m[0], t));"); |
4814 | end_scope(); |
4815 | statement(ts: ""); |
4816 | } |
4817 | |
4818 | if (polyfills & PolyfillMatrixInverse4x4) |
4819 | { |
4820 | statement(ts&: qual, ts: "mat4 spvInverse", ts&: suffix, ts: "(", ts&: qual, ts: "mat4 m)"); |
4821 | begin_scope(); |
4822 | statement(ts&: qual, ts: "vec4 t = vec4(" |
4823 | "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " |
4824 | "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " |
4825 | "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " |
4826 | "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);"); |
4827 | statement(ts: "return mat4(" |
4828 | "t[0], " |
4829 | "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], " |
4830 | "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], " |
4831 | "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], " |
4832 | "t[1], " |
4833 | "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], " |
4834 | "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], " |
4835 | "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], " |
4836 | "t[2], " |
4837 | "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], " |
4838 | "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], " |
4839 | "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], " |
4840 | "t[3], " |
4841 | "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], " |
4842 | "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], " |
4843 | "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) " |
4844 | "* (1.0 / dot(m[0], t));"); |
4845 | end_scope(); |
4846 | statement(ts: ""); |
4847 | } |
4848 | |
4849 | if (!relaxed) |
4850 | { |
4851 | static const Polyfill polys[3][3] = { |
4852 | { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, |
4853 | { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, |
4854 | { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, |
4855 | }; |
4856 | |
4857 | static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp }; |
4858 | static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp"}; |
4859 | bool has_poly = false; |
4860 | |
4861 | for (uint32_t i = 0; i < 3; i++) |
4862 | { |
4863 | for (uint32_t j = 0; j < 3; j++) |
4864 | { |
4865 | if ((polyfills & polys[i][j]) == 0) |
4866 | continue; |
4867 | |
4868 | const char *types[3][4] = { |
4869 | { "float16_t", "f16vec2", "f16vec3", "f16vec4"}, |
4870 | { "float", "vec2", "vec3", "vec4"}, |
4871 | { "double", "dvec2", "dvec3", "dvec4"}, |
4872 | }; |
4873 | |
4874 | for (uint32_t k = 0; k < 4; k++) |
4875 | { |
4876 | auto *type = types[j][k]; |
4877 | |
4878 | if (i < 2) |
4879 | { |
4880 | statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ", |
4881 | ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ");"); |
4882 | } |
4883 | else |
4884 | { |
4885 | statement(ts: "spirv_instruction(set = \"GLSL.std.450\", id = ", ts: glsl_ops[i], ts: ") ", |
4886 | ts&: type, ts: " ", ts&: spv_ops[i], ts: "(", ts&: type, ts: ", ", ts&: type, ts: ", ", ts&: type, ts: ");"); |
4887 | } |
4888 | |
4889 | has_poly = true; |
4890 | } |
4891 | } |
4892 | } |
4893 | |
4894 | if (has_poly) |
4895 | statement(ts: ""); |
4896 | } |
4897 | else |
4898 | { |
4899 | // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump |
4900 | // propagation. |
4901 | |
4902 | static const Polyfill polys[3][3] = { |
4903 | { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, |
4904 | { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, |
4905 | { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, |
4906 | }; |
4907 | |
4908 | static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp"}; |
4909 | |
4910 | for (uint32_t i = 0; i < 3; i++) |
4911 | { |
4912 | for (uint32_t j = 0; j < 3; j++) |
4913 | { |
4914 | if ((polyfills & polys[i][j]) == 0) |
4915 | continue; |
4916 | |
4917 | const char *types[3][4] = { |
4918 | { "float16_t", "f16vec2", "f16vec3", "f16vec4"}, |
4919 | { "float", "vec2", "vec3", "vec4"}, |
4920 | { "double", "dvec2", "dvec3", "dvec4"}, |
4921 | }; |
4922 | |
4923 | for (uint32_t k = 0; k < 4; k++) |
4924 | { |
4925 | auto *type = types[j][k]; |
4926 | |
4927 | if (i < 2) |
4928 | { |
4929 | statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(", |
4930 | ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b)"); |
4931 | begin_scope(); |
4932 | statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b);"); |
4933 | statement(ts: "return res;"); |
4934 | end_scope(); |
4935 | statement(ts: ""); |
4936 | } |
4937 | else |
4938 | { |
4939 | statement(ts: "mediump ", ts&: type, ts: " ", ts&: spv_ops[i], ts: "Relaxed(", |
4940 | ts: "mediump ", ts&: type, ts: " a, mediump ", ts&: type, ts: " b, mediump ", ts&: type, ts: " c)"); |
4941 | begin_scope(); |
4942 | statement(ts: "mediump ", ts&: type, ts: " res = ", ts&: spv_ops[i], ts: "(a, b, c);"); |
4943 | statement(ts: "return res;"); |
4944 | end_scope(); |
4945 | statement(ts: ""); |
4946 | } |
4947 | } |
4948 | } |
4949 | } |
4950 | } |
4951 | } |
4952 | |
4953 | // Returns a string representation of the ID, usable as a function arg. |
4954 | // Default is to simply return the expression representation fo the arg ID. |
4955 | // Subclasses may override to modify the return value. |
4956 | string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) |
4957 | { |
4958 | // Make sure that we use the name of the original variable, and not the parameter alias. |
4959 | uint32_t name_id = id; |
4960 | auto *var = maybe_get<SPIRVariable>(id); |
4961 | if (var && var->basevariable) |
4962 | name_id = var->basevariable; |
4963 | return to_expression(id: name_id); |
4964 | } |
4965 | |
4966 | void CompilerGLSL::force_temporary_and_recompile(uint32_t id) |
4967 | { |
4968 | auto res = forced_temporaries.insert(x: id); |
4969 | |
4970 | // Forcing new temporaries guarantees forward progress. |
4971 | if (res.second) |
4972 | force_recompile_guarantee_forward_progress(); |
4973 | else |
4974 | force_recompile(); |
4975 | } |
4976 | |
4977 | uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) |
4978 | { |
4979 | // Constants do not have innate precision. |
4980 | auto handle_type = ir.ids[id].get_type(); |
4981 | if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) |
4982 | return id; |
4983 | |
4984 | // Ignore anything that isn't 32-bit values. |
4985 | auto &type = get<SPIRType>(id: type_id); |
4986 | if (type.pointer) |
4987 | return id; |
4988 | if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) |
4989 | return id; |
4990 | |
4991 | if (precision == Options::DontCare) |
4992 | { |
4993 | // If precision is consumed as don't care (operations only consisting of constants), |
4994 | // we need to bind the expression to a temporary, |
4995 | // otherwise we have no way of controlling the precision later. |
4996 | auto itr = forced_temporaries.insert(x: id); |
4997 | if (itr.second) |
4998 | force_recompile_guarantee_forward_progress(); |
4999 | return id; |
5000 | } |
5001 | |
5002 | auto current_precision = has_decoration(id, decoration: DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; |
5003 | if (current_precision == precision) |
5004 | return id; |
5005 | |
5006 | auto itr = temporary_to_mirror_precision_alias.find(x: id); |
5007 | if (itr == temporary_to_mirror_precision_alias.end()) |
5008 | { |
5009 | uint32_t alias_id = ir.increase_bound_by(count: 1); |
5010 | auto &m = ir.meta[alias_id]; |
5011 | if (auto *input_m = ir.find_meta(id)) |
5012 | m = *input_m; |
5013 | |
5014 | const char *prefix; |
5015 | if (precision == Options::Mediump) |
5016 | { |
5017 | set_decoration(id: alias_id, decoration: DecorationRelaxedPrecision); |
5018 | prefix = "mp_copy_"; |
5019 | } |
5020 | else |
5021 | { |
5022 | unset_decoration(id: alias_id, decoration: DecorationRelaxedPrecision); |
5023 | prefix = "hp_copy_"; |
5024 | } |
5025 | |
5026 | auto alias_name = join(ts&: prefix, ts: to_name(id)); |
5027 | ParsedIR::sanitize_underscores(str&: alias_name); |
5028 | set_name(id: alias_id, name: alias_name); |
5029 | |
5030 | emit_op(result_type: type_id, result_id: alias_id, rhs: to_expression(id), forward_rhs: true); |
5031 | temporary_to_mirror_precision_alias[id] = alias_id; |
5032 | forced_temporaries.insert(x: id); |
5033 | forced_temporaries.insert(x: alias_id); |
5034 | force_recompile_guarantee_forward_progress(); |
5035 | id = alias_id; |
5036 | } |
5037 | else |
5038 | { |
5039 | id = itr->second; |
5040 | } |
5041 | |
5042 | return id; |
5043 | } |
5044 | |
5045 | void CompilerGLSL::handle_invalid_expression(uint32_t id) |
5046 | { |
5047 | // We tried to read an invalidated expression. |
5048 | // This means we need another pass at compilation, but next time, |
5049 | // force temporary variables so that they cannot be invalidated. |
5050 | force_temporary_and_recompile(id); |
5051 | |
5052 | // If the invalid expression happened as a result of a CompositeInsert |
5053 | // overwrite, we must block this from happening next iteration. |
5054 | if (composite_insert_overwritten.count(x: id)) |
5055 | block_composite_insert_overwrite.insert(x: id); |
5056 | } |
5057 | |
5058 | // Converts the format of the current expression from packed to unpacked, |
5059 | // by wrapping the expression in a constructor of the appropriate type. |
5060 | // GLSL does not support packed formats, so simply return the expression. |
5061 | // Subclasses that do will override. |
5062 | string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) |
5063 | { |
5064 | return expr_str; |
5065 | } |
5066 | |
5067 | // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. |
5068 | void CompilerGLSL::strip_enclosed_expression(string &expr) |
5069 | { |
5070 | if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') |
5071 | return; |
5072 | |
5073 | // Have to make sure that our first and last parens actually enclose everything inside it. |
5074 | uint32_t paren_count = 0; |
5075 | for (auto &c : expr) |
5076 | { |
5077 | if (c == '(') |
5078 | paren_count++; |
5079 | else if (c == ')') |
5080 | { |
5081 | paren_count--; |
5082 | |
5083 | // If we hit 0 and this is not the final char, our first and final parens actually don't |
5084 | // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). |
5085 | if (paren_count == 0 && &c != &expr.back()) |
5086 | return; |
5087 | } |
5088 | } |
5089 | expr.erase(pos: expr.size() - 1, n: 1); |
5090 | expr.erase(position: begin(cont&: expr)); |
5091 | } |
5092 | |
5093 | bool CompilerGLSL::needs_enclose_expression(const std::string &expr) |
5094 | { |
5095 | bool need_parens = false; |
5096 | |
5097 | // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back |
5098 | // unary expressions. |
5099 | if (!expr.empty()) |
5100 | { |
5101 | auto c = expr.front(); |
5102 | if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') |
5103 | need_parens = true; |
5104 | } |
5105 | |
5106 | if (!need_parens) |
5107 | { |
5108 | uint32_t paren_count = 0; |
5109 | for (auto c : expr) |
5110 | { |
5111 | if (c == '(' || c == '[') |
5112 | paren_count++; |
5113 | else if (c == ')' || c == ']') |
5114 | { |
5115 | assert(paren_count); |
5116 | paren_count--; |
5117 | } |
5118 | else if (c == ' ' && paren_count == 0) |
5119 | { |
5120 | need_parens = true; |
5121 | break; |
5122 | } |
5123 | } |
5124 | assert(paren_count == 0); |
5125 | } |
5126 | |
5127 | return need_parens; |
5128 | } |
5129 | |
5130 | string CompilerGLSL::enclose_expression(const string &expr) |
5131 | { |
5132 | // If this expression contains any spaces which are not enclosed by parentheses, |
5133 | // we need to enclose it so we can treat the whole string as an expression. |
5134 | // This happens when two expressions have been part of a binary op earlier. |
5135 | if (needs_enclose_expression(expr)) |
5136 | return join(ts: '(', ts: expr, ts: ')'); |
5137 | else |
5138 | return expr; |
5139 | } |
5140 | |
5141 | string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) |
5142 | { |
5143 | // If this expression starts with an address-of operator ('&'), then |
5144 | // just return the part after the operator. |
5145 | // TODO: Strip parens if unnecessary? |
5146 | if (expr.front() == '&') |
5147 | return expr.substr(pos: 1); |
5148 | else if (backend.native_pointers) |
5149 | return join(ts: '*', ts: expr); |
5150 | else if (is_physical_pointer(type: expr_type) && !is_physical_pointer_to_buffer_block(type: expr_type)) |
5151 | return join(ts: enclose_expression(expr), ts: ".value"); |
5152 | else |
5153 | return expr; |
5154 | } |
5155 | |
5156 | string CompilerGLSL::address_of_expression(const std::string &expr) |
5157 | { |
5158 | if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') |
5159 | { |
5160 | // If we have an expression which looks like (*foo), taking the address of it is the same as stripping |
5161 | // the first two and last characters. We might have to enclose the expression. |
5162 | // This doesn't work for cases like (*foo + 10), |
5163 | // but this is an r-value expression which we cannot take the address of anyways. |
5164 | return enclose_expression(expr: expr.substr(pos: 2, n: expr.size() - 3)); |
5165 | } |
5166 | else if (expr.front() == '*') |
5167 | { |
5168 | // If this expression starts with a dereference operator ('*'), then |
5169 | // just return the part after the operator. |
5170 | return expr.substr(pos: 1); |
5171 | } |
5172 | else |
5173 | return join(ts: '&', ts: enclose_expression(expr)); |
5174 | } |
5175 | |
5176 | // Just like to_expression except that we enclose the expression inside parentheses if needed. |
5177 | string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) |
5178 | { |
5179 | return enclose_expression(expr: to_expression(id, register_expression_read)); |
5180 | } |
5181 | |
5182 | // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. |
5183 | // need_transpose must be forced to false. |
5184 | string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) |
5185 | { |
5186 | return unpack_expression_type(expr_str: to_expression(id), expression_type(id), |
5187 | get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID), |
5188 | has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), true); |
5189 | } |
5190 | |
5191 | string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) |
5192 | { |
5193 | // If we need to transpose, it will also take care of unpacking rules. |
5194 | auto *e = maybe_get<SPIRExpression>(id); |
5195 | bool need_transpose = e && e->need_transpose; |
5196 | bool is_remapped = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
5197 | bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
5198 | |
5199 | if (!need_transpose && (is_remapped || is_packed)) |
5200 | { |
5201 | return unpack_expression_type(expr_str: to_expression(id, register_expression_read), |
5202 | get_pointee_type(type_id: expression_type_id(id)), |
5203 | get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID), |
5204 | has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked), false); |
5205 | } |
5206 | else |
5207 | return to_expression(id, register_expression_read); |
5208 | } |
5209 | |
5210 | string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) |
5211 | { |
5212 | return enclose_expression(expr: to_unpacked_expression(id, register_expression_read)); |
5213 | } |
5214 | |
5215 | string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) |
5216 | { |
5217 | auto &type = expression_type(id); |
5218 | if (type.pointer && should_dereference(id)) |
5219 | return dereference_expression(expr_type: type, expr: to_enclosed_expression(id, register_expression_read)); |
5220 | else |
5221 | return to_expression(id, register_expression_read); |
5222 | } |
5223 | |
5224 | string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) |
5225 | { |
5226 | auto &type = expression_type(id); |
5227 | if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) |
5228 | return address_of_expression(expr: to_enclosed_expression(id, register_expression_read)); |
5229 | else |
5230 | return to_unpacked_expression(id, register_expression_read); |
5231 | } |
5232 | |
5233 | string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) |
5234 | { |
5235 | auto &type = expression_type(id); |
5236 | if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) |
5237 | return address_of_expression(expr: to_enclosed_expression(id, register_expression_read)); |
5238 | else |
5239 | return to_enclosed_unpacked_expression(id, register_expression_read); |
5240 | } |
5241 | |
5242 | string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) |
5243 | { |
5244 | auto expr = to_enclosed_expression(id); |
5245 | if (has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
5246 | return join(ts&: expr, ts: "[", ts&: index, ts: "]"); |
5247 | else |
5248 | return join(ts&: expr, ts: ".", ts: index_to_swizzle(index)); |
5249 | } |
5250 | |
5251 | string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c, |
5252 | const uint32_t *chain, uint32_t length) |
5253 | { |
5254 | // It is kinda silly if application actually enter this path since they know the constant up front. |
5255 | // It is useful here to extract the plain constant directly. |
5256 | SPIRConstant tmp; |
5257 | tmp.constant_type = result_type; |
5258 | auto &composite_type = get<SPIRType>(id: c.constant_type); |
5259 | assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty()); |
5260 | assert(!c.specialization); |
5261 | |
5262 | if (is_matrix(type: composite_type)) |
5263 | { |
5264 | if (length == 2) |
5265 | { |
5266 | tmp.m.c[0].vecsize = 1; |
5267 | tmp.m.columns = 1; |
5268 | tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]]; |
5269 | } |
5270 | else |
5271 | { |
5272 | assert(length == 1); |
5273 | tmp.m.c[0].vecsize = composite_type.vecsize; |
5274 | tmp.m.columns = 1; |
5275 | tmp.m.c[0] = c.m.c[chain[0]]; |
5276 | } |
5277 | } |
5278 | else |
5279 | { |
5280 | assert(length == 1); |
5281 | tmp.m.c[0].vecsize = 1; |
5282 | tmp.m.columns = 1; |
5283 | tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]]; |
5284 | } |
5285 | |
5286 | return constant_expression(c: tmp); |
5287 | } |
5288 | |
5289 | string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type, |
5290 | const string &base_expr, const SPIRType &type) |
5291 | { |
5292 | bool remapped_boolean = parent_type.basetype == SPIRType::Struct && |
5293 | type.basetype == SPIRType::Boolean && |
5294 | backend.boolean_in_struct_remapped_type != SPIRType::Boolean; |
5295 | |
5296 | SPIRType tmp_type { OpNop }; |
5297 | if (remapped_boolean) |
5298 | { |
5299 | tmp_type = get<SPIRType>(id: type.parent_type); |
5300 | tmp_type.basetype = backend.boolean_in_struct_remapped_type; |
5301 | } |
5302 | else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean) |
5303 | { |
5304 | // It's possible that we have an r-value expression that was OpLoaded from a struct. |
5305 | // We have to reroll this and explicitly cast the input to bool, because the r-value is short. |
5306 | tmp_type = get<SPIRType>(id: type.parent_type); |
5307 | remapped_boolean = true; |
5308 | } |
5309 | |
5310 | uint32_t size = to_array_size_literal(type); |
5311 | auto &parent = get<SPIRType>(id: type.parent_type); |
5312 | string expr = "{ "; |
5313 | |
5314 | for (uint32_t i = 0; i < size; i++) |
5315 | { |
5316 | auto subexpr = join(ts: base_expr, ts: "[", ts: convert_to_string(t: i), ts: "]"); |
5317 | if (!is_array(type: parent)) |
5318 | { |
5319 | if (remapped_boolean) |
5320 | subexpr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: subexpr, ts: ")"); |
5321 | expr += subexpr; |
5322 | } |
5323 | else |
5324 | expr += to_rerolled_array_expression(parent_type, base_expr: subexpr, type: parent); |
5325 | |
5326 | if (i + 1 < size) |
5327 | expr += ", "; |
5328 | } |
5329 | |
5330 | expr += " }"; |
5331 | return expr; |
5332 | } |
5333 | |
5334 | string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type) |
5335 | { |
5336 | auto &type = expression_type(id); |
5337 | |
5338 | bool reroll_array = false; |
5339 | bool remapped_boolean = parent_type.basetype == SPIRType::Struct && |
5340 | type.basetype == SPIRType::Boolean && |
5341 | backend.boolean_in_struct_remapped_type != SPIRType::Boolean; |
5342 | |
5343 | if (is_array(type)) |
5344 | { |
5345 | reroll_array = !backend.array_is_value_type || |
5346 | (block_like_type && !backend.array_is_value_type_in_buffer_blocks); |
5347 | |
5348 | if (remapped_boolean) |
5349 | { |
5350 | // Forced to reroll if we have to change bool[] to short[]. |
5351 | reroll_array = true; |
5352 | } |
5353 | } |
5354 | |
5355 | if (reroll_array) |
5356 | { |
5357 | // For this case, we need to "re-roll" an array initializer from a temporary. |
5358 | // We cannot simply pass the array directly, since it decays to a pointer and it cannot |
5359 | // participate in a struct initializer. E.g. |
5360 | // float arr[2] = { 1.0, 2.0 }; |
5361 | // Foo foo = { arr }; must be transformed to |
5362 | // Foo foo = { { arr[0], arr[1] } }; |
5363 | // The array sizes cannot be deduced from specialization constants since we cannot use any loops. |
5364 | |
5365 | // We're only triggering one read of the array expression, but this is fine since arrays have to be declared |
5366 | // as temporaries anyways. |
5367 | return to_rerolled_array_expression(parent_type, base_expr: to_enclosed_expression(id), type); |
5368 | } |
5369 | else |
5370 | { |
5371 | auto expr = to_unpacked_expression(id); |
5372 | if (remapped_boolean) |
5373 | { |
5374 | auto tmp_type = type; |
5375 | tmp_type.basetype = backend.boolean_in_struct_remapped_type; |
5376 | expr = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: expr, ts: ")"); |
5377 | } |
5378 | |
5379 | return expr; |
5380 | } |
5381 | } |
5382 | |
5383 | string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) |
5384 | { |
5385 | string expr = to_expression(id); |
5386 | |
5387 | if (has_decoration(id, decoration: DecorationNonUniform)) |
5388 | convert_non_uniform_expression(expr, ptr_id: id); |
5389 | |
5390 | return expr; |
5391 | } |
5392 | |
5393 | string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) |
5394 | { |
5395 | auto itr = invalid_expressions.find(x: id); |
5396 | if (itr != end(cont&: invalid_expressions)) |
5397 | handle_invalid_expression(id); |
5398 | |
5399 | if (ir.ids[id].get_type() == TypeExpression) |
5400 | { |
5401 | // We might have a more complex chain of dependencies. |
5402 | // A possible scenario is that we |
5403 | // |
5404 | // %1 = OpLoad |
5405 | // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. |
5406 | // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. |
5407 | // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. |
5408 | // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. |
5409 | // |
5410 | // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, |
5411 | // and see that we should not forward reads of the original variable. |
5412 | auto &expr = get<SPIRExpression>(id); |
5413 | for (uint32_t dep : expr.expression_dependencies) |
5414 | if (invalid_expressions.find(x: dep) != end(cont&: invalid_expressions)) |
5415 | handle_invalid_expression(id: dep); |
5416 | } |
5417 | |
5418 | if (register_expression_read) |
5419 | track_expression_read(id); |
5420 | |
5421 | switch (ir.ids[id].get_type()) |
5422 | { |
5423 | case TypeExpression: |
5424 | { |
5425 | auto &e = get<SPIRExpression>(id); |
5426 | if (e.base_expression) |
5427 | return to_enclosed_expression(id: e.base_expression) + e.expression; |
5428 | else if (e.need_transpose) |
5429 | { |
5430 | // This should not be reached for access chains, since we always deal explicitly with transpose state |
5431 | // when consuming an access chain expression. |
5432 | uint32_t physical_type_id = get_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
5433 | bool is_packed = has_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
5434 | bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision); |
5435 | return convert_row_major_matrix(exp_str: e.expression, exp_type: get<SPIRType>(id: e.expression_type), physical_type_id, |
5436 | is_packed, relaxed); |
5437 | } |
5438 | else if (flattened_structs.count(x: id)) |
5439 | { |
5440 | return load_flattened_struct(basename: e.expression, type: get<SPIRType>(id: e.expression_type)); |
5441 | } |
5442 | else |
5443 | { |
5444 | if (is_forcing_recompilation()) |
5445 | { |
5446 | // During first compilation phase, certain expression patterns can trigger exponential growth of memory. |
5447 | // Avoid this by returning dummy expressions during this phase. |
5448 | // Do not use empty expressions here, because those are sentinels for other cases. |
5449 | return "_"; |
5450 | } |
5451 | else |
5452 | return e.expression; |
5453 | } |
5454 | } |
5455 | |
5456 | case TypeConstant: |
5457 | { |
5458 | auto &c = get<SPIRConstant>(id); |
5459 | auto &type = get<SPIRType>(id: c.constant_type); |
5460 | |
5461 | // WorkGroupSize may be a constant. |
5462 | if (has_decoration(id: c.self, decoration: DecorationBuiltIn)) |
5463 | return builtin_to_glsl(builtin: BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)), storage: StorageClassGeneric); |
5464 | else if (c.specialization) |
5465 | { |
5466 | if (backend.workgroup_size_is_hidden) |
5467 | { |
5468 | int wg_index = get_constant_mapping_to_workgroup_component(c); |
5469 | if (wg_index >= 0) |
5470 | { |
5471 | auto wg_size = join(ts: builtin_to_glsl(builtin: BuiltInWorkgroupSize, storage: StorageClassInput), ts: vector_swizzle(vecsize: 1, index: wg_index)); |
5472 | if (type.basetype != SPIRType::UInt) |
5473 | wg_size = bitcast_expression(target_type: type, expr_type: SPIRType::UInt, expr: wg_size); |
5474 | return wg_size; |
5475 | } |
5476 | } |
5477 | |
5478 | if (expression_is_forwarded(id)) |
5479 | return constant_expression(c); |
5480 | |
5481 | return to_name(id); |
5482 | } |
5483 | else if (c.is_used_as_lut) |
5484 | return to_name(id); |
5485 | else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) |
5486 | return to_name(id); |
5487 | else if (!type.array.empty() && !backend.can_declare_arrays_inline) |
5488 | return to_name(id); |
5489 | else |
5490 | return constant_expression(c); |
5491 | } |
5492 | |
5493 | case TypeConstantOp: |
5494 | return to_name(id); |
5495 | |
5496 | case TypeVariable: |
5497 | { |
5498 | auto &var = get<SPIRVariable>(id); |
5499 | // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, |
5500 | // the variable has not been declared yet. |
5501 | if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) |
5502 | { |
5503 | // We might try to load from a loop variable before it has been initialized. |
5504 | // Prefer static expression and fallback to initializer. |
5505 | if (var.static_expression) |
5506 | return to_expression(id: var.static_expression); |
5507 | else if (var.initializer) |
5508 | return to_expression(id: var.initializer); |
5509 | else |
5510 | { |
5511 | // We cannot declare the variable yet, so have to fake it. |
5512 | uint32_t undef_id = ir.increase_bound_by(count: 1); |
5513 | return emit_uninitialized_temporary_expression(type: get_variable_data_type_id(var), id: undef_id).expression; |
5514 | } |
5515 | } |
5516 | else if (var.deferred_declaration) |
5517 | { |
5518 | var.deferred_declaration = false; |
5519 | return variable_decl(variable: var); |
5520 | } |
5521 | else if (flattened_structs.count(x: id)) |
5522 | { |
5523 | return load_flattened_struct(basename: to_name(id), type: get<SPIRType>(id: var.basetype)); |
5524 | } |
5525 | else |
5526 | { |
5527 | auto &dec = ir.meta[var.self].decoration; |
5528 | if (dec.builtin) |
5529 | return builtin_to_glsl(builtin: dec.builtin_type, storage: var.storage); |
5530 | else |
5531 | return to_name(id); |
5532 | } |
5533 | } |
5534 | |
5535 | case TypeCombinedImageSampler: |
5536 | // This type should never be taken the expression of directly. |
5537 | // The intention is that texture sampling functions will extract the image and samplers |
5538 | // separately and take their expressions as needed. |
5539 | // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler |
5540 | // expression ala sampler2D(texture, sampler). |
5541 | SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); |
5542 | |
5543 | case TypeAccessChain: |
5544 | // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. |
5545 | SPIRV_CROSS_THROW("Access chains have no default expression representation."); |
5546 | |
5547 | default: |
5548 | return to_name(id); |
5549 | } |
5550 | } |
5551 | |
5552 | SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id) |
5553 | { |
5554 | if (auto *constant = maybe_get<SPIRConstant>(id: const_id)) |
5555 | { |
5556 | const auto &type = get<SPIRType>(id: constant->constant_type); |
5557 | if (is_array(type) || type.basetype == SPIRType::Struct) |
5558 | return constant->subconstants; |
5559 | if (is_matrix(type)) |
5560 | return SmallVector<ConstantID>(constant->m.id); |
5561 | if (is_vector(type)) |
5562 | return SmallVector<ConstantID>(constant->m.c[0].id); |
5563 | SPIRV_CROSS_THROW("Unexpected scalar constant!"); |
5564 | } |
5565 | if (!const_composite_insert_ids.count(x: const_id)) |
5566 | SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!"); |
5567 | return const_composite_insert_ids[const_id]; |
5568 | } |
5569 | |
5570 | void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id, |
5571 | const SmallVector<ConstantID> &initializers) |
5572 | { |
5573 | auto &type = get<SPIRType>(id: type_id); |
5574 | constant.specialization = true; |
5575 | if (is_array(type) || type.basetype == SPIRType::Struct) |
5576 | { |
5577 | constant.subconstants = initializers; |
5578 | } |
5579 | else if (is_matrix(type)) |
5580 | { |
5581 | constant.m.columns = type.columns; |
5582 | for (uint32_t i = 0; i < type.columns; ++i) |
5583 | { |
5584 | constant.m.id[i] = initializers[i]; |
5585 | constant.m.c[i].vecsize = type.vecsize; |
5586 | } |
5587 | } |
5588 | else if (is_vector(type)) |
5589 | { |
5590 | constant.m.c[0].vecsize = type.vecsize; |
5591 | for (uint32_t i = 0; i < type.vecsize; ++i) |
5592 | constant.m.c[0].id[i] = initializers[i]; |
5593 | } |
5594 | else |
5595 | SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!"); |
5596 | } |
5597 | |
5598 | void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id, |
5599 | const SmallVector<ConstantID> &initializers) |
5600 | { |
5601 | if (maybe_get<SPIRConstantOp>(id: const_id)) |
5602 | { |
5603 | const_composite_insert_ids[const_id] = initializers; |
5604 | return; |
5605 | } |
5606 | |
5607 | auto &constant = set<SPIRConstant>(id: const_id, args&: type_id); |
5608 | fill_composite_constant(constant, type_id, initializers); |
5609 | forwarded_temporaries.insert(x: const_id); |
5610 | } |
5611 | |
5612 | TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx) |
5613 | { |
5614 | auto &type = get<SPIRType>(id: type_id); |
5615 | if (is_array(type)) |
5616 | return type.parent_type; |
5617 | if (type.basetype == SPIRType::Struct) |
5618 | return type.member_types[member_idx]; |
5619 | if (is_matrix(type)) |
5620 | return type.parent_type; |
5621 | if (is_vector(type)) |
5622 | return type.parent_type; |
5623 | SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!"); |
5624 | } |
5625 | |
5626 | string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) |
5627 | { |
5628 | auto &type = get<SPIRType>(id: cop.basetype); |
5629 | bool binary = false; |
5630 | bool unary = false; |
5631 | string op; |
5632 | |
5633 | if (is_legacy() && is_unsigned_opcode(op: cop.opcode)) |
5634 | SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); |
5635 | |
5636 | // TODO: Find a clean way to reuse emit_instruction. |
5637 | switch (cop.opcode) |
5638 | { |
5639 | case OpSConvert: |
5640 | case OpUConvert: |
5641 | case OpFConvert: |
5642 | op = type_to_glsl_constructor(type); |
5643 | break; |
5644 | |
5645 | #define GLSL_BOP(opname, x) \ |
5646 | case Op##opname: \ |
5647 | binary = true; \ |
5648 | op = x; \ |
5649 | break |
5650 | |
5651 | #define GLSL_UOP(opname, x) \ |
5652 | case Op##opname: \ |
5653 | unary = true; \ |
5654 | op = x; \ |
5655 | break |
5656 | |
5657 | GLSL_UOP(SNegate, "-"); |
5658 | GLSL_UOP(Not, "~"); |
5659 | GLSL_BOP(IAdd, "+"); |
5660 | GLSL_BOP(ISub, "-"); |
5661 | GLSL_BOP(IMul, "*"); |
5662 | GLSL_BOP(SDiv, "/"); |
5663 | GLSL_BOP(UDiv, "/"); |
5664 | GLSL_BOP(UMod, "%"); |
5665 | GLSL_BOP(SMod, "%"); |
5666 | GLSL_BOP(ShiftRightLogical, ">>"); |
5667 | GLSL_BOP(ShiftRightArithmetic, ">>"); |
5668 | GLSL_BOP(ShiftLeftLogical, "<<"); |
5669 | GLSL_BOP(BitwiseOr, "|"); |
5670 | GLSL_BOP(BitwiseXor, "^"); |
5671 | GLSL_BOP(BitwiseAnd, "&"); |
5672 | GLSL_BOP(LogicalOr, "||"); |
5673 | GLSL_BOP(LogicalAnd, "&&"); |
5674 | GLSL_UOP(LogicalNot, "!"); |
5675 | GLSL_BOP(LogicalEqual, "=="); |
5676 | GLSL_BOP(LogicalNotEqual, "!="); |
5677 | GLSL_BOP(IEqual, "=="); |
5678 | GLSL_BOP(INotEqual, "!="); |
5679 | GLSL_BOP(ULessThan, "<"); |
5680 | GLSL_BOP(SLessThan, "<"); |
5681 | GLSL_BOP(ULessThanEqual, "<="); |
5682 | GLSL_BOP(SLessThanEqual, "<="); |
5683 | GLSL_BOP(UGreaterThan, ">"); |
5684 | GLSL_BOP(SGreaterThan, ">"); |
5685 | GLSL_BOP(UGreaterThanEqual, ">="); |
5686 | GLSL_BOP(SGreaterThanEqual, ">="); |
5687 | |
5688 | case OpSRem: |
5689 | { |
5690 | uint32_t op0 = cop.arguments[0]; |
5691 | uint32_t op1 = cop.arguments[1]; |
5692 | return join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(", |
5693 | ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")"); |
5694 | } |
5695 | |
5696 | case OpSelect: |
5697 | { |
5698 | if (cop.arguments.size() < 3) |
5699 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); |
5700 | |
5701 | // This one is pretty annoying. It's triggered from |
5702 | // uint(bool), int(bool) from spec constants. |
5703 | // In order to preserve its compile-time constness in Vulkan GLSL, |
5704 | // we need to reduce the OpSelect expression back to this simplified model. |
5705 | // If we cannot, fail. |
5706 | if (to_trivial_mix_op(type, op, left: cop.arguments[2], right: cop.arguments[1], lerp: cop.arguments[0])) |
5707 | { |
5708 | // Implement as a simple cast down below. |
5709 | } |
5710 | else |
5711 | { |
5712 | // Implement a ternary and pray the compiler understands it :) |
5713 | return to_ternary_expression(result_type: type, select: cop.arguments[0], true_value: cop.arguments[1], false_value: cop.arguments[2]); |
5714 | } |
5715 | break; |
5716 | } |
5717 | |
5718 | case OpVectorShuffle: |
5719 | { |
5720 | string expr = type_to_glsl_constructor(type); |
5721 | expr += "("; |
5722 | |
5723 | uint32_t left_components = expression_type(id: cop.arguments[0]).vecsize; |
5724 | string left_arg = to_enclosed_expression(id: cop.arguments[0]); |
5725 | string right_arg = to_enclosed_expression(id: cop.arguments[1]); |
5726 | |
5727 | for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) |
5728 | { |
5729 | uint32_t index = cop.arguments[i]; |
5730 | if (index == 0xFFFFFFFF) |
5731 | { |
5732 | SPIRConstant c; |
5733 | c.constant_type = type.parent_type; |
5734 | assert(type.parent_type != ID(0)); |
5735 | expr += constant_expression(c); |
5736 | } |
5737 | else if (index >= left_components) |
5738 | { |
5739 | expr += right_arg + "."+ "xyzw"[index - left_components]; |
5740 | } |
5741 | else |
5742 | { |
5743 | expr += left_arg + "."+ "xyzw"[index]; |
5744 | } |
5745 | |
5746 | if (i + 1 < uint32_t(cop.arguments.size())) |
5747 | expr += ", "; |
5748 | } |
5749 | |
5750 | expr += ")"; |
5751 | return expr; |
5752 | } |
5753 | |
5754 | case OpCompositeExtract: |
5755 | { |
5756 | auto expr = access_chain_internal(base: cop.arguments[0], indices: &cop.arguments[1], count: uint32_t(cop.arguments.size() - 1), |
5757 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
5758 | return expr; |
5759 | } |
5760 | |
5761 | case OpCompositeInsert: |
5762 | { |
5763 | SmallVector<ConstantID> new_init = get_composite_constant_ids(const_id: cop.arguments[1]); |
5764 | uint32_t idx; |
5765 | uint32_t target_id = cop.self; |
5766 | uint32_t target_type_id = cop.basetype; |
5767 | // We have to drill down to the part we want to modify, and create new |
5768 | // constants for each containing part. |
5769 | for (idx = 2; idx < cop.arguments.size() - 1; ++idx) |
5770 | { |
5771 | uint32_t new_const = ir.increase_bound_by(count: 1); |
5772 | uint32_t old_const = new_init[cop.arguments[idx]]; |
5773 | new_init[cop.arguments[idx]] = new_const; |
5774 | set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init); |
5775 | new_init = get_composite_constant_ids(const_id: old_const); |
5776 | target_id = new_const; |
5777 | target_type_id = get_composite_member_type(type_id: target_type_id, member_idx: cop.arguments[idx]); |
5778 | } |
5779 | // Now replace the initializer with the one from this instruction. |
5780 | new_init[cop.arguments[idx]] = cop.arguments[0]; |
5781 | set_composite_constant(const_id: target_id, type_id: target_type_id, initializers: new_init); |
5782 | SPIRConstant tmp_const(cop.basetype); |
5783 | fill_composite_constant(constant&: tmp_const, type_id: cop.basetype, initializers: const_composite_insert_ids[cop.self]); |
5784 | return constant_expression(c: tmp_const); |
5785 | } |
5786 | |
5787 | default: |
5788 | // Some opcodes are unimplemented here, these are currently not possible to test from glslang. |
5789 | SPIRV_CROSS_THROW("Unimplemented spec constant op."); |
5790 | } |
5791 | |
5792 | uint32_t bit_width = 0; |
5793 | if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) |
5794 | bit_width = expression_type(id: cop.arguments[0]).width; |
5795 | |
5796 | SPIRType::BaseType input_type; |
5797 | bool skip_cast_if_equal_type = opcode_is_sign_invariant(opcode: cop.opcode); |
5798 | |
5799 | switch (cop.opcode) |
5800 | { |
5801 | case OpIEqual: |
5802 | case OpINotEqual: |
5803 | input_type = to_signed_basetype(width: bit_width); |
5804 | break; |
5805 | |
5806 | case OpSLessThan: |
5807 | case OpSLessThanEqual: |
5808 | case OpSGreaterThan: |
5809 | case OpSGreaterThanEqual: |
5810 | case OpSMod: |
5811 | case OpSDiv: |
5812 | case OpShiftRightArithmetic: |
5813 | case OpSConvert: |
5814 | case OpSNegate: |
5815 | input_type = to_signed_basetype(width: bit_width); |
5816 | break; |
5817 | |
5818 | case OpULessThan: |
5819 | case OpULessThanEqual: |
5820 | case OpUGreaterThan: |
5821 | case OpUGreaterThanEqual: |
5822 | case OpUMod: |
5823 | case OpUDiv: |
5824 | case OpShiftRightLogical: |
5825 | case OpUConvert: |
5826 | input_type = to_unsigned_basetype(width: bit_width); |
5827 | break; |
5828 | |
5829 | default: |
5830 | input_type = type.basetype; |
5831 | break; |
5832 | } |
5833 | |
5834 | #undef GLSL_BOP |
5835 | #undef GLSL_UOP |
5836 | if (binary) |
5837 | { |
5838 | if (cop.arguments.size() < 2) |
5839 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); |
5840 | |
5841 | string cast_op0; |
5842 | string cast_op1; |
5843 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0: cop.arguments[0], |
5844 | op1: cop.arguments[1], skip_cast_if_equal_type); |
5845 | |
5846 | if (type.basetype != input_type && type.basetype != SPIRType::Boolean) |
5847 | { |
5848 | expected_type.basetype = input_type; |
5849 | auto expr = bitcast_glsl_op(result_type: type, argument_type: expected_type); |
5850 | expr += '('; |
5851 | expr += join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1); |
5852 | expr += ')'; |
5853 | return expr; |
5854 | } |
5855 | else |
5856 | return join(ts: "(", ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1, ts: ")"); |
5857 | } |
5858 | else if (unary) |
5859 | { |
5860 | if (cop.arguments.size() < 1) |
5861 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); |
5862 | |
5863 | // Auto-bitcast to result type as needed. |
5864 | // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. |
5865 | return join(ts: "(", ts&: op, ts: bitcast_glsl(result_type: type, arg: cop.arguments[0]), ts: ")"); |
5866 | } |
5867 | else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) |
5868 | { |
5869 | if (cop.arguments.size() < 1) |
5870 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); |
5871 | |
5872 | auto &arg_type = expression_type(id: cop.arguments[0]); |
5873 | if (arg_type.width < type.width && input_type != arg_type.basetype) |
5874 | { |
5875 | auto expected = arg_type; |
5876 | expected.basetype = input_type; |
5877 | return join(ts&: op, ts: "(", ts: bitcast_glsl(result_type: expected, arg: cop.arguments[0]), ts: ")"); |
5878 | } |
5879 | else |
5880 | return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")"); |
5881 | } |
5882 | else |
5883 | { |
5884 | if (cop.arguments.size() < 1) |
5885 | SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); |
5886 | return join(ts&: op, ts: "(", ts: to_expression(id: cop.arguments[0]), ts: ")"); |
5887 | } |
5888 | } |
5889 | |
5890 | string CompilerGLSL::constant_expression(const SPIRConstant &c, |
5891 | bool inside_block_like_struct_scope, |
5892 | bool inside_struct_scope) |
5893 | { |
5894 | auto &type = get<SPIRType>(id: c.constant_type); |
5895 | |
5896 | if (is_pointer(type)) |
5897 | { |
5898 | return backend.null_pointer_literal; |
5899 | } |
5900 | else if (!c.subconstants.empty()) |
5901 | { |
5902 | // Handles Arrays and structures. |
5903 | string res; |
5904 | |
5905 | // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration. |
5906 | // Outside a block-like struct declaration, we can always bind to a constant array with templated type. |
5907 | // Should look at ArrayStride here as well, but it's possible to declare a constant struct |
5908 | // with Offset = 0, using no ArrayStride on the enclosed array type. |
5909 | // A particular CTS test hits this scenario. |
5910 | bool array_type_decays = inside_block_like_struct_scope && |
5911 | is_array(type) && |
5912 | !backend.array_is_value_type_in_buffer_blocks; |
5913 | |
5914 | // Allow Metal to use the array<T> template to make arrays a value type |
5915 | bool needs_trailing_tracket = false; |
5916 | if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && |
5917 | !is_array(type)) |
5918 | { |
5919 | res = type_to_glsl_constructor(type) + "{ "; |
5920 | } |
5921 | else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && |
5922 | is_array(type) && !array_type_decays) |
5923 | { |
5924 | const auto *p_type = &type; |
5925 | SPIRType tmp_type { OpNop }; |
5926 | |
5927 | if (inside_struct_scope && |
5928 | backend.boolean_in_struct_remapped_type != SPIRType::Boolean && |
5929 | type.basetype == SPIRType::Boolean) |
5930 | { |
5931 | tmp_type = type; |
5932 | tmp_type.basetype = backend.boolean_in_struct_remapped_type; |
5933 | p_type = &tmp_type; |
5934 | } |
5935 | |
5936 | res = type_to_glsl_constructor(type: *p_type) + "({ "; |
5937 | needs_trailing_tracket = true; |
5938 | } |
5939 | else if (backend.use_initializer_list) |
5940 | { |
5941 | res = "{ "; |
5942 | } |
5943 | else |
5944 | { |
5945 | res = type_to_glsl_constructor(type) + "("; |
5946 | } |
5947 | |
5948 | uint32_t subconstant_index = 0; |
5949 | for (auto &elem : c.subconstants) |
5950 | { |
5951 | if (auto *op = maybe_get<SPIRConstantOp>(id: elem)) |
5952 | { |
5953 | res += constant_op_expression(cop: *op); |
5954 | } |
5955 | else if (maybe_get<SPIRUndef>(id: elem) != nullptr) |
5956 | { |
5957 | res += to_name(id: elem); |
5958 | } |
5959 | else |
5960 | { |
5961 | auto &subc = get<SPIRConstant>(id: elem); |
5962 | if (subc.specialization && !expression_is_forwarded(id: elem)) |
5963 | res += to_name(id: elem); |
5964 | else |
5965 | { |
5966 | if (!is_array(type) && type.basetype == SPIRType::Struct) |
5967 | { |
5968 | // When we get down to emitting struct members, override the block-like information. |
5969 | // For constants, we can freely mix and match block-like state. |
5970 | inside_block_like_struct_scope = |
5971 | has_member_decoration(id: type.self, index: subconstant_index, decoration: DecorationOffset); |
5972 | } |
5973 | |
5974 | if (type.basetype == SPIRType::Struct) |
5975 | inside_struct_scope = true; |
5976 | |
5977 | res += constant_expression(c: subc, inside_block_like_struct_scope, inside_struct_scope); |
5978 | } |
5979 | } |
5980 | |
5981 | if (&elem != &c.subconstants.back()) |
5982 | res += ", "; |
5983 | |
5984 | subconstant_index++; |
5985 | } |
5986 | |
5987 | res += backend.use_initializer_list ? " }": ")"; |
5988 | if (needs_trailing_tracket) |
5989 | res += ")"; |
5990 | |
5991 | return res; |
5992 | } |
5993 | else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) |
5994 | { |
5995 | // Metal tessellation likes empty structs which are then constant expressions. |
5996 | if (backend.supports_empty_struct) |
5997 | return "{ }"; |
5998 | else if (backend.use_typed_initializer_list) |
5999 | return join(ts: type_to_glsl(type), ts: "{ 0 }"); |
6000 | else if (backend.use_initializer_list) |
6001 | return "{ 0 }"; |
6002 | else |
6003 | return join(ts: type_to_glsl(type), ts: "(0)"); |
6004 | } |
6005 | else if (c.columns() == 1) |
6006 | { |
6007 | auto res = constant_expression_vector(c, vector: 0); |
6008 | |
6009 | if (inside_struct_scope && |
6010 | backend.boolean_in_struct_remapped_type != SPIRType::Boolean && |
6011 | type.basetype == SPIRType::Boolean) |
6012 | { |
6013 | SPIRType tmp_type = type; |
6014 | tmp_type.basetype = backend.boolean_in_struct_remapped_type; |
6015 | res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")"); |
6016 | } |
6017 | |
6018 | return res; |
6019 | } |
6020 | else |
6021 | { |
6022 | string res = type_to_glsl(type) + "("; |
6023 | for (uint32_t col = 0; col < c.columns(); col++) |
6024 | { |
6025 | if (c.specialization_constant_id(col) != 0) |
6026 | res += to_name(id: c.specialization_constant_id(col)); |
6027 | else |
6028 | res += constant_expression_vector(c, vector: col); |
6029 | |
6030 | if (col + 1 < c.columns()) |
6031 | res += ", "; |
6032 | } |
6033 | res += ")"; |
6034 | |
6035 | if (inside_struct_scope && |
6036 | backend.boolean_in_struct_remapped_type != SPIRType::Boolean && |
6037 | type.basetype == SPIRType::Boolean) |
6038 | { |
6039 | SPIRType tmp_type = type; |
6040 | tmp_type.basetype = backend.boolean_in_struct_remapped_type; |
6041 | res = join(ts: type_to_glsl(type: tmp_type), ts: "(", ts&: res, ts: ")"); |
6042 | } |
6043 | |
6044 | return res; |
6045 | } |
6046 | } |
6047 | |
6048 | #ifdef _MSC_VER |
6049 | // snprintf does not exist or is buggy on older MSVC versions, some of them |
6050 | // being used by MinGW. Use sprintf instead and disable corresponding warning. |
6051 | #pragma warning(push) |
6052 | #pragma warning(disable : 4996) |
6053 | #endif |
6054 | |
6055 | string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
6056 | { |
6057 | string res; |
6058 | float float_value = c.scalar_f16(col, row); |
6059 | |
6060 | // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots |
6061 | // of complicated workarounds, just value-cast to the half type always. |
6062 | if (std::isnan(x: float_value) || std::isinf(x: float_value)) |
6063 | { |
6064 | SPIRType type { OpTypeFloat }; |
6065 | type.basetype = SPIRType::Half; |
6066 | type.vecsize = 1; |
6067 | type.columns = 1; |
6068 | |
6069 | if (float_value == numeric_limits<float>::infinity()) |
6070 | res = join(ts: type_to_glsl(type), ts: "(1.0 / 0.0)"); |
6071 | else if (float_value == -numeric_limits<float>::infinity()) |
6072 | res = join(ts: type_to_glsl(type), ts: "(-1.0 / 0.0)"); |
6073 | else if (std::isnan(x: float_value)) |
6074 | res = join(ts: type_to_glsl(type), ts: "(0.0 / 0.0)"); |
6075 | else |
6076 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); |
6077 | } |
6078 | else |
6079 | { |
6080 | SPIRType type { OpTypeFloat }; |
6081 | type.basetype = SPIRType::Half; |
6082 | type.vecsize = 1; |
6083 | type.columns = 1; |
6084 | res = join(ts: type_to_glsl(type), ts: "(", ts: format_float(value: float_value), ts: ")"); |
6085 | } |
6086 | |
6087 | return res; |
6088 | } |
6089 | |
6090 | string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
6091 | { |
6092 | string res; |
6093 | float float_value = c.scalar_f32(col, row); |
6094 | |
6095 | if (std::isnan(x: float_value) || std::isinf(x: float_value)) |
6096 | { |
6097 | // Use special representation. |
6098 | if (!is_legacy()) |
6099 | { |
6100 | SPIRType out_type { OpTypeFloat }; |
6101 | SPIRType in_type { OpTypeInt }; |
6102 | out_type.basetype = SPIRType::Float; |
6103 | in_type.basetype = SPIRType::UInt; |
6104 | out_type.vecsize = 1; |
6105 | in_type.vecsize = 1; |
6106 | out_type.width = 32; |
6107 | in_type.width = 32; |
6108 | |
6109 | char print_buffer[32]; |
6110 | #ifdef _WIN32 |
6111 | sprintf(print_buffer, "0x%xu", c.scalar(col, row)); |
6112 | #else |
6113 | snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%xu", c.scalar(col, row)); |
6114 | #endif |
6115 | |
6116 | const char *comment = "inf"; |
6117 | if (float_value == -numeric_limits<float>::infinity()) |
6118 | comment = "-inf"; |
6119 | else if (std::isnan(x: float_value)) |
6120 | comment = "nan"; |
6121 | res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)"); |
6122 | } |
6123 | else |
6124 | { |
6125 | if (float_value == numeric_limits<float>::infinity()) |
6126 | { |
6127 | if (backend.float_literal_suffix) |
6128 | res = "(1.0f / 0.0f)"; |
6129 | else |
6130 | res = "(1.0 / 0.0)"; |
6131 | } |
6132 | else if (float_value == -numeric_limits<float>::infinity()) |
6133 | { |
6134 | if (backend.float_literal_suffix) |
6135 | res = "(-1.0f / 0.0f)"; |
6136 | else |
6137 | res = "(-1.0 / 0.0)"; |
6138 | } |
6139 | else if (std::isnan(x: float_value)) |
6140 | { |
6141 | if (backend.float_literal_suffix) |
6142 | res = "(0.0f / 0.0f)"; |
6143 | else |
6144 | res = "(0.0 / 0.0)"; |
6145 | } |
6146 | else |
6147 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); |
6148 | } |
6149 | } |
6150 | else |
6151 | { |
6152 | res = format_float(value: float_value); |
6153 | if (backend.float_literal_suffix) |
6154 | res += "f"; |
6155 | } |
6156 | |
6157 | return res; |
6158 | } |
6159 | |
6160 | std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) |
6161 | { |
6162 | string res; |
6163 | double double_value = c.scalar_f64(col, row); |
6164 | |
6165 | if (std::isnan(x: double_value) || std::isinf(x: double_value)) |
6166 | { |
6167 | // Use special representation. |
6168 | if (!is_legacy()) |
6169 | { |
6170 | SPIRType out_type { OpTypeFloat }; |
6171 | SPIRType in_type { OpTypeInt }; |
6172 | out_type.basetype = SPIRType::Double; |
6173 | in_type.basetype = SPIRType::UInt64; |
6174 | out_type.vecsize = 1; |
6175 | in_type.vecsize = 1; |
6176 | out_type.width = 64; |
6177 | in_type.width = 64; |
6178 | |
6179 | uint64_t u64_value = c.scalar_u64(col, row); |
6180 | |
6181 | if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. |
6182 | SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); |
6183 | require_extension_internal(ext: "GL_ARB_gpu_shader_int64"); |
6184 | |
6185 | char print_buffer[64]; |
6186 | #ifdef _WIN32 |
6187 | sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value), |
6188 | backend.long_long_literal_suffix ? "ull": "ul"); |
6189 | #else |
6190 | snprintf(s: print_buffer, maxlen: sizeof(print_buffer), format: "0x%llx%s", static_cast<unsigned long long>(u64_value), |
6191 | backend.long_long_literal_suffix ? "ull": "ul"); |
6192 | #endif |
6193 | |
6194 | const char *comment = "inf"; |
6195 | if (double_value == -numeric_limits<double>::infinity()) |
6196 | comment = "-inf"; |
6197 | else if (std::isnan(x: double_value)) |
6198 | comment = "nan"; |
6199 | res = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: in_type), ts: "(", ts&: print_buffer, ts: " /* ", ts&: comment, ts: " */)"); |
6200 | } |
6201 | else |
6202 | { |
6203 | if (options.es) |
6204 | SPIRV_CROSS_THROW("FP64 not supported in ES profile."); |
6205 | if (options.version < 400) |
6206 | require_extension_internal(ext: "GL_ARB_gpu_shader_fp64"); |
6207 | |
6208 | if (double_value == numeric_limits<double>::infinity()) |
6209 | { |
6210 | if (backend.double_literal_suffix) |
6211 | res = "(1.0lf / 0.0lf)"; |
6212 | else |
6213 | res = "(1.0 / 0.0)"; |
6214 | } |
6215 | else if (double_value == -numeric_limits<double>::infinity()) |
6216 | { |
6217 | if (backend.double_literal_suffix) |
6218 | res = "(-1.0lf / 0.0lf)"; |
6219 | else |
6220 | res = "(-1.0 / 0.0)"; |
6221 | } |
6222 | else if (std::isnan(x: double_value)) |
6223 | { |
6224 | if (backend.double_literal_suffix) |
6225 | res = "(0.0lf / 0.0lf)"; |
6226 | else |
6227 | res = "(0.0 / 0.0)"; |
6228 | } |
6229 | else |
6230 | SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); |
6231 | } |
6232 | } |
6233 | else |
6234 | { |
6235 | res = format_double(value: double_value); |
6236 | if (backend.double_literal_suffix) |
6237 | res += "lf"; |
6238 | } |
6239 | |
6240 | return res; |
6241 | } |
6242 | |
6243 | #ifdef _MSC_VER |
6244 | #pragma warning(pop) |
6245 | #endif |
6246 | |
6247 | string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) |
6248 | { |
6249 | auto type = get<SPIRType>(id: c.constant_type); |
6250 | type.columns = 1; |
6251 | |
6252 | auto scalar_type = type; |
6253 | scalar_type.vecsize = 1; |
6254 | |
6255 | string res; |
6256 | bool splat = backend.use_constructor_splatting && c.vector_size() > 1; |
6257 | bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; |
6258 | |
6259 | if (!type_is_floating_point(type)) |
6260 | { |
6261 | // Cannot swizzle literal integers as a special case. |
6262 | swizzle_splat = false; |
6263 | } |
6264 | |
6265 | if (splat || swizzle_splat) |
6266 | { |
6267 | // Cannot use constant splatting if we have specialization constants somewhere in the vector. |
6268 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6269 | { |
6270 | if (c.specialization_constant_id(col: vector, row: i) != 0) |
6271 | { |
6272 | splat = false; |
6273 | swizzle_splat = false; |
6274 | break; |
6275 | } |
6276 | } |
6277 | } |
6278 | |
6279 | if (splat || swizzle_splat) |
6280 | { |
6281 | if (type.width == 64) |
6282 | { |
6283 | uint64_t ident = c.scalar_u64(col: vector, row: 0); |
6284 | for (uint32_t i = 1; i < c.vector_size(); i++) |
6285 | { |
6286 | if (ident != c.scalar_u64(col: vector, row: i)) |
6287 | { |
6288 | splat = false; |
6289 | swizzle_splat = false; |
6290 | break; |
6291 | } |
6292 | } |
6293 | } |
6294 | else |
6295 | { |
6296 | uint32_t ident = c.scalar(col: vector, row: 0); |
6297 | for (uint32_t i = 1; i < c.vector_size(); i++) |
6298 | { |
6299 | if (ident != c.scalar(col: vector, row: i)) |
6300 | { |
6301 | splat = false; |
6302 | swizzle_splat = false; |
6303 | } |
6304 | } |
6305 | } |
6306 | } |
6307 | |
6308 | if (c.vector_size() > 1 && !swizzle_splat) |
6309 | res += type_to_glsl(type) + "("; |
6310 | |
6311 | switch (type.basetype) |
6312 | { |
6313 | case SPIRType::Half: |
6314 | if (splat || swizzle_splat) |
6315 | { |
6316 | res += convert_half_to_string(c, col: vector, row: 0); |
6317 | if (swizzle_splat) |
6318 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
6319 | } |
6320 | else |
6321 | { |
6322 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6323 | { |
6324 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6325 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6326 | else |
6327 | res += convert_half_to_string(c, col: vector, row: i); |
6328 | |
6329 | if (i + 1 < c.vector_size()) |
6330 | res += ", "; |
6331 | } |
6332 | } |
6333 | break; |
6334 | |
6335 | case SPIRType::Float: |
6336 | if (splat || swizzle_splat) |
6337 | { |
6338 | res += convert_float_to_string(c, col: vector, row: 0); |
6339 | if (swizzle_splat) |
6340 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
6341 | } |
6342 | else |
6343 | { |
6344 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6345 | { |
6346 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6347 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6348 | else |
6349 | res += convert_float_to_string(c, col: vector, row: i); |
6350 | |
6351 | if (i + 1 < c.vector_size()) |
6352 | res += ", "; |
6353 | } |
6354 | } |
6355 | break; |
6356 | |
6357 | case SPIRType::Double: |
6358 | if (splat || swizzle_splat) |
6359 | { |
6360 | res += convert_double_to_string(c, col: vector, row: 0); |
6361 | if (swizzle_splat) |
6362 | res = remap_swizzle(out_type: get<SPIRType>(id: c.constant_type), input_components: 1, expr: res); |
6363 | } |
6364 | else |
6365 | { |
6366 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6367 | { |
6368 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6369 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6370 | else |
6371 | res += convert_double_to_string(c, col: vector, row: i); |
6372 | |
6373 | if (i + 1 < c.vector_size()) |
6374 | res += ", "; |
6375 | } |
6376 | } |
6377 | break; |
6378 | |
6379 | case SPIRType::Int64: |
6380 | { |
6381 | auto tmp = type; |
6382 | tmp.vecsize = 1; |
6383 | tmp.columns = 1; |
6384 | auto int64_type = type_to_glsl(type: tmp); |
6385 | |
6386 | if (splat) |
6387 | { |
6388 | res += convert_to_string(value: c.scalar_i64(col: vector, row: 0), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix); |
6389 | } |
6390 | else |
6391 | { |
6392 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6393 | { |
6394 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6395 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6396 | else |
6397 | res += convert_to_string(value: c.scalar_i64(col: vector, row: i), int64_type, long_long_literal_suffix: backend.long_long_literal_suffix); |
6398 | |
6399 | if (i + 1 < c.vector_size()) |
6400 | res += ", "; |
6401 | } |
6402 | } |
6403 | break; |
6404 | } |
6405 | |
6406 | case SPIRType::UInt64: |
6407 | if (splat) |
6408 | { |
6409 | res += convert_to_string(t: c.scalar_u64(col: vector, row: 0)); |
6410 | if (backend.long_long_literal_suffix) |
6411 | res += "ull"; |
6412 | else |
6413 | res += "ul"; |
6414 | } |
6415 | else |
6416 | { |
6417 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6418 | { |
6419 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6420 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6421 | else |
6422 | { |
6423 | res += convert_to_string(t: c.scalar_u64(col: vector, row: i)); |
6424 | if (backend.long_long_literal_suffix) |
6425 | res += "ull"; |
6426 | else |
6427 | res += "ul"; |
6428 | } |
6429 | |
6430 | if (i + 1 < c.vector_size()) |
6431 | res += ", "; |
6432 | } |
6433 | } |
6434 | break; |
6435 | |
6436 | case SPIRType::UInt: |
6437 | if (splat) |
6438 | { |
6439 | res += convert_to_string(t: c.scalar(col: vector, row: 0)); |
6440 | if (is_legacy()) |
6441 | { |
6442 | // Fake unsigned constant literals with signed ones if possible. |
6443 | // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. |
6444 | if (c.scalar_i32(col: vector, row: 0) < 0) |
6445 | SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); |
6446 | } |
6447 | else if (backend.uint32_t_literal_suffix) |
6448 | res += "u"; |
6449 | } |
6450 | else |
6451 | { |
6452 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6453 | { |
6454 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6455 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6456 | else |
6457 | { |
6458 | res += convert_to_string(t: c.scalar(col: vector, row: i)); |
6459 | if (is_legacy()) |
6460 | { |
6461 | // Fake unsigned constant literals with signed ones if possible. |
6462 | // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. |
6463 | if (c.scalar_i32(col: vector, row: i) < 0) |
6464 | SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " |
6465 | "the literal negative."); |
6466 | } |
6467 | else if (backend.uint32_t_literal_suffix) |
6468 | res += "u"; |
6469 | } |
6470 | |
6471 | if (i + 1 < c.vector_size()) |
6472 | res += ", "; |
6473 | } |
6474 | } |
6475 | break; |
6476 | |
6477 | case SPIRType::Int: |
6478 | if (splat) |
6479 | res += convert_to_string(value: c.scalar_i32(col: vector, row: 0)); |
6480 | else |
6481 | { |
6482 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6483 | { |
6484 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6485 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6486 | else |
6487 | res += convert_to_string(value: c.scalar_i32(col: vector, row: i)); |
6488 | if (i + 1 < c.vector_size()) |
6489 | res += ", "; |
6490 | } |
6491 | } |
6492 | break; |
6493 | |
6494 | case SPIRType::UShort: |
6495 | if (splat) |
6496 | { |
6497 | res += convert_to_string(t: c.scalar(col: vector, row: 0)); |
6498 | } |
6499 | else |
6500 | { |
6501 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6502 | { |
6503 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6504 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6505 | else |
6506 | { |
6507 | if (*backend.uint16_t_literal_suffix) |
6508 | { |
6509 | res += convert_to_string(t: c.scalar_u16(col: vector, row: i)); |
6510 | res += backend.uint16_t_literal_suffix; |
6511 | } |
6512 | else |
6513 | { |
6514 | // If backend doesn't have a literal suffix, we need to value cast. |
6515 | res += type_to_glsl(type: scalar_type); |
6516 | res += "("; |
6517 | res += convert_to_string(t: c.scalar_u16(col: vector, row: i)); |
6518 | res += ")"; |
6519 | } |
6520 | } |
6521 | |
6522 | if (i + 1 < c.vector_size()) |
6523 | res += ", "; |
6524 | } |
6525 | } |
6526 | break; |
6527 | |
6528 | case SPIRType::Short: |
6529 | if (splat) |
6530 | { |
6531 | res += convert_to_string(t: c.scalar_i16(col: vector, row: 0)); |
6532 | } |
6533 | else |
6534 | { |
6535 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6536 | { |
6537 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6538 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6539 | else |
6540 | { |
6541 | if (*backend.int16_t_literal_suffix) |
6542 | { |
6543 | res += convert_to_string(t: c.scalar_i16(col: vector, row: i)); |
6544 | res += backend.int16_t_literal_suffix; |
6545 | } |
6546 | else |
6547 | { |
6548 | // If backend doesn't have a literal suffix, we need to value cast. |
6549 | res += type_to_glsl(type: scalar_type); |
6550 | res += "("; |
6551 | res += convert_to_string(t: c.scalar_i16(col: vector, row: i)); |
6552 | res += ")"; |
6553 | } |
6554 | } |
6555 | |
6556 | if (i + 1 < c.vector_size()) |
6557 | res += ", "; |
6558 | } |
6559 | } |
6560 | break; |
6561 | |
6562 | case SPIRType::UByte: |
6563 | if (splat) |
6564 | { |
6565 | res += convert_to_string(t: c.scalar_u8(col: vector, row: 0)); |
6566 | } |
6567 | else |
6568 | { |
6569 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6570 | { |
6571 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6572 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6573 | else |
6574 | { |
6575 | res += type_to_glsl(type: scalar_type); |
6576 | res += "("; |
6577 | res += convert_to_string(t: c.scalar_u8(col: vector, row: i)); |
6578 | res += ")"; |
6579 | } |
6580 | |
6581 | if (i + 1 < c.vector_size()) |
6582 | res += ", "; |
6583 | } |
6584 | } |
6585 | break; |
6586 | |
6587 | case SPIRType::SByte: |
6588 | if (splat) |
6589 | { |
6590 | res += convert_to_string(t: c.scalar_i8(col: vector, row: 0)); |
6591 | } |
6592 | else |
6593 | { |
6594 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6595 | { |
6596 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6597 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6598 | else |
6599 | { |
6600 | res += type_to_glsl(type: scalar_type); |
6601 | res += "("; |
6602 | res += convert_to_string(t: c.scalar_i8(col: vector, row: i)); |
6603 | res += ")"; |
6604 | } |
6605 | |
6606 | if (i + 1 < c.vector_size()) |
6607 | res += ", "; |
6608 | } |
6609 | } |
6610 | break; |
6611 | |
6612 | case SPIRType::Boolean: |
6613 | if (splat) |
6614 | res += c.scalar(col: vector, row: 0) ? "true": "false"; |
6615 | else |
6616 | { |
6617 | for (uint32_t i = 0; i < c.vector_size(); i++) |
6618 | { |
6619 | if (c.vector_size() > 1 && c.specialization_constant_id(col: vector, row: i) != 0) |
6620 | res += to_expression(id: c.specialization_constant_id(col: vector, row: i)); |
6621 | else |
6622 | res += c.scalar(col: vector, row: i) ? "true": "false"; |
6623 | |
6624 | if (i + 1 < c.vector_size()) |
6625 | res += ", "; |
6626 | } |
6627 | } |
6628 | break; |
6629 | |
6630 | default: |
6631 | SPIRV_CROSS_THROW("Invalid constant expression basetype."); |
6632 | } |
6633 | |
6634 | if (c.vector_size() > 1 && !swizzle_splat) |
6635 | res += ")"; |
6636 | |
6637 | return res; |
6638 | } |
6639 | |
6640 | SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) |
6641 | { |
6642 | forced_temporaries.insert(x: id); |
6643 | emit_uninitialized_temporary(type, id); |
6644 | return set<SPIRExpression>(id, args: to_name(id), args&: type, args: true); |
6645 | } |
6646 | |
6647 | void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) |
6648 | { |
6649 | // If we're declaring temporaries inside continue blocks, |
6650 | // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. |
6651 | if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id)) |
6652 | { |
6653 | auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator); |
6654 | if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary), |
6655 | pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { |
6656 | return tmp.first == result_type && tmp.second == result_id; |
6657 | }) == end(cont&: header.declare_temporary)) |
6658 | { |
6659 | header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id); |
6660 | hoisted_temporaries.insert(x: result_id); |
6661 | force_recompile(); |
6662 | } |
6663 | } |
6664 | else if (hoisted_temporaries.count(x: result_id) == 0) |
6665 | { |
6666 | auto &type = get<SPIRType>(id: result_type); |
6667 | auto &flags = get_decoration_bitset(id: result_id); |
6668 | |
6669 | // The result_id has not been made into an expression yet, so use flags interface. |
6670 | add_local_variable_name(id: result_id); |
6671 | |
6672 | string initializer; |
6673 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
6674 | initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: result_type)); |
6675 | |
6676 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts&: initializer, ts: ";"); |
6677 | } |
6678 | } |
6679 | |
6680 | string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) |
6681 | { |
6682 | auto &type = get<SPIRType>(id: result_type); |
6683 | |
6684 | // If we're declaring temporaries inside continue blocks, |
6685 | // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. |
6686 | if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(x: result_id)) |
6687 | { |
6688 | auto &header = get<SPIRBlock>(id: current_continue_block->loop_dominator); |
6689 | if (find_if(first: begin(cont&: header.declare_temporary), last: end(cont&: header.declare_temporary), |
6690 | pred: [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { |
6691 | return tmp.first == result_type && tmp.second == result_id; |
6692 | }) == end(cont&: header.declare_temporary)) |
6693 | { |
6694 | header.declare_temporary.emplace_back(ts&: result_type, ts&: result_id); |
6695 | hoisted_temporaries.insert(x: result_id); |
6696 | force_recompile_guarantee_forward_progress(); |
6697 | } |
6698 | |
6699 | return join(ts: to_name(id: result_id), ts: " = "); |
6700 | } |
6701 | else if (hoisted_temporaries.count(x: result_id)) |
6702 | { |
6703 | // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. |
6704 | return join(ts: to_name(id: result_id), ts: " = "); |
6705 | } |
6706 | else |
6707 | { |
6708 | // The result_id has not been made into an expression yet, so use flags interface. |
6709 | add_local_variable_name(id: result_id); |
6710 | auto &flags = get_decoration_bitset(id: result_id); |
6711 | return join(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: result_id)), ts: " = "); |
6712 | } |
6713 | } |
6714 | |
6715 | bool CompilerGLSL::expression_is_forwarded(uint32_t id) const |
6716 | { |
6717 | return forwarded_temporaries.count(x: id) != 0; |
6718 | } |
6719 | |
6720 | bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const |
6721 | { |
6722 | return suppressed_usage_tracking.count(x: id) != 0; |
6723 | } |
6724 | |
6725 | bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const |
6726 | { |
6727 | auto *expr = maybe_get<SPIRExpression>(id); |
6728 | if (!expr) |
6729 | return false; |
6730 | |
6731 | // If we're emitting code at a deeper loop level than when we emitted the expression, |
6732 | // we're probably reading the same expression over and over. |
6733 | return current_loop_level > expr->emitted_loop_level; |
6734 | } |
6735 | |
6736 | SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, |
6737 | bool suppress_usage_tracking) |
6738 | { |
6739 | if (forwarding && (forced_temporaries.find(x: result_id) == end(cont&: forced_temporaries))) |
6740 | { |
6741 | // Just forward it without temporary. |
6742 | // If the forward is trivial, we do not force flushing to temporary for this expression. |
6743 | forwarded_temporaries.insert(x: result_id); |
6744 | if (suppress_usage_tracking) |
6745 | suppressed_usage_tracking.insert(x: result_id); |
6746 | |
6747 | return set<SPIRExpression>(id: result_id, args: rhs, args&: result_type, args: true); |
6748 | } |
6749 | else |
6750 | { |
6751 | // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). |
6752 | statement(ts: declare_temporary(result_type, result_id), ts: rhs, ts: ";"); |
6753 | return set<SPIRExpression>(id: result_id, args: to_name(id: result_id), args&: result_type, args: true); |
6754 | } |
6755 | } |
6756 | |
6757 | void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) |
6758 | { |
6759 | bool forward = should_forward(id: op0); |
6760 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: to_enclosed_unpacked_expression(id: op0)), forwarding: forward); |
6761 | inherit_expression_dependencies(dst: result_id, source: op0); |
6762 | } |
6763 | |
6764 | void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) |
6765 | { |
6766 | auto &type = get<SPIRType>(id: result_type); |
6767 | bool forward = should_forward(id: op0); |
6768 | emit_op(result_type, result_id, rhs: join(ts: type_to_glsl(type), ts: "(", ts&: op, ts: to_enclosed_unpacked_expression(id: op0), ts: ")"), forwarding: forward); |
6769 | inherit_expression_dependencies(dst: result_id, source: op0); |
6770 | } |
6771 | |
6772 | void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block) |
6773 | { |
6774 | statement(ts: "EmitMeshTasksEXT(", |
6775 | ts: to_unpacked_expression(id: block.mesh.groups[0]), ts: ", ", |
6776 | ts: to_unpacked_expression(id: block.mesh.groups[1]), ts: ", ", |
6777 | ts: to_unpacked_expression(id: block.mesh.groups[2]), ts: ");"); |
6778 | } |
6779 | |
6780 | void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) |
6781 | { |
6782 | // Various FP arithmetic opcodes such as add, sub, mul will hit this. |
6783 | bool force_temporary_precise = backend.support_precise_qualifier && |
6784 | has_decoration(id: result_id, decoration: DecorationNoContraction) && |
6785 | type_is_floating_point(type: get<SPIRType>(id: result_type)); |
6786 | bool forward = should_forward(id: op0) && should_forward(id: op1) && !force_temporary_precise; |
6787 | |
6788 | emit_op(result_type, result_id, |
6789 | rhs: join(ts: to_enclosed_unpacked_expression(id: op0), ts: " ", ts&: op, ts: " ", ts: to_enclosed_unpacked_expression(id: op1)), forwarding: forward); |
6790 | |
6791 | inherit_expression_dependencies(dst: result_id, source: op0); |
6792 | inherit_expression_dependencies(dst: result_id, source: op1); |
6793 | } |
6794 | |
6795 | void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) |
6796 | { |
6797 | auto &type = get<SPIRType>(id: result_type); |
6798 | auto expr = type_to_glsl_constructor(type); |
6799 | expr += '('; |
6800 | for (uint32_t i = 0; i < type.vecsize; i++) |
6801 | { |
6802 | // Make sure to call to_expression multiple times to ensure |
6803 | // that these expressions are properly flushed to temporaries if needed. |
6804 | expr += op; |
6805 | expr += to_extract_component_expression(id: operand, index: i); |
6806 | |
6807 | if (i + 1 < type.vecsize) |
6808 | expr += ", "; |
6809 | } |
6810 | expr += ')'; |
6811 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand)); |
6812 | |
6813 | inherit_expression_dependencies(dst: result_id, source: operand); |
6814 | } |
6815 | |
6816 | void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6817 | const char *op, bool negate, SPIRType::BaseType expected_type) |
6818 | { |
6819 | auto &type0 = expression_type(id: op0); |
6820 | auto &type1 = expression_type(id: op1); |
6821 | |
6822 | SPIRType target_type0 = type0; |
6823 | SPIRType target_type1 = type1; |
6824 | target_type0.basetype = expected_type; |
6825 | target_type1.basetype = expected_type; |
6826 | target_type0.vecsize = 1; |
6827 | target_type1.vecsize = 1; |
6828 | |
6829 | auto &type = get<SPIRType>(id: result_type); |
6830 | auto expr = type_to_glsl_constructor(type); |
6831 | expr += '('; |
6832 | for (uint32_t i = 0; i < type.vecsize; i++) |
6833 | { |
6834 | // Make sure to call to_expression multiple times to ensure |
6835 | // that these expressions are properly flushed to temporaries if needed. |
6836 | if (negate) |
6837 | expr += "!("; |
6838 | |
6839 | if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) |
6840 | expr += bitcast_expression(target_type: target_type0, expr_type: type0.basetype, expr: to_extract_component_expression(id: op0, index: i)); |
6841 | else |
6842 | expr += to_extract_component_expression(id: op0, index: i); |
6843 | |
6844 | expr += ' '; |
6845 | expr += op; |
6846 | expr += ' '; |
6847 | |
6848 | if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) |
6849 | expr += bitcast_expression(target_type: target_type1, expr_type: type1.basetype, expr: to_extract_component_expression(id: op1, index: i)); |
6850 | else |
6851 | expr += to_extract_component_expression(id: op1, index: i); |
6852 | |
6853 | if (negate) |
6854 | expr += ")"; |
6855 | |
6856 | if (i + 1 < type.vecsize) |
6857 | expr += ", "; |
6858 | } |
6859 | expr += ')'; |
6860 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
6861 | |
6862 | inherit_expression_dependencies(dst: result_id, source: op0); |
6863 | inherit_expression_dependencies(dst: result_id, source: op1); |
6864 | } |
6865 | |
6866 | SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, |
6867 | uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) |
6868 | { |
6869 | auto &type0 = expression_type(id: op0); |
6870 | auto &type1 = expression_type(id: op1); |
6871 | |
6872 | // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. |
6873 | // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected |
6874 | // since equality test is exactly the same. |
6875 | bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); |
6876 | |
6877 | // Create a fake type so we can bitcast to it. |
6878 | // We only deal with regular arithmetic types here like int, uints and so on. |
6879 | SPIRType expected_type{type0.op}; |
6880 | expected_type.basetype = input_type; |
6881 | expected_type.vecsize = type0.vecsize; |
6882 | expected_type.columns = type0.columns; |
6883 | expected_type.width = type0.width; |
6884 | |
6885 | if (cast) |
6886 | { |
6887 | cast_op0 = bitcast_glsl(result_type: expected_type, arg: op0); |
6888 | cast_op1 = bitcast_glsl(result_type: expected_type, arg: op1); |
6889 | } |
6890 | else |
6891 | { |
6892 | // If we don't cast, our actual input type is that of the first (or second) argument. |
6893 | cast_op0 = to_enclosed_unpacked_expression(id: op0); |
6894 | cast_op1 = to_enclosed_unpacked_expression(id: op1); |
6895 | input_type = type0.basetype; |
6896 | } |
6897 | |
6898 | return expected_type; |
6899 | } |
6900 | |
6901 | bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) |
6902 | { |
6903 | // Some bitcasts may require complex casting sequences, and are implemented here. |
6904 | // Otherwise a simply unary function will do with bitcast_glsl_op. |
6905 | |
6906 | auto &output_type = get<SPIRType>(id: result_type); |
6907 | auto &input_type = expression_type(id: op0); |
6908 | string expr; |
6909 | |
6910 | if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) |
6911 | expr = join(ts: "unpackFloat2x16(floatBitsToUint(", ts: to_unpacked_expression(id: op0), ts: "))"); |
6912 | else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && |
6913 | input_type.vecsize == 2) |
6914 | expr = join(ts: "uintBitsToFloat(packFloat2x16(", ts: to_unpacked_expression(id: op0), ts: "))"); |
6915 | else |
6916 | return false; |
6917 | |
6918 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: op0)); |
6919 | return true; |
6920 | } |
6921 | |
6922 | void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6923 | const char *op, SPIRType::BaseType input_type, |
6924 | bool skip_cast_if_equal_type, |
6925 | bool implicit_integer_promotion) |
6926 | { |
6927 | string cast_op0, cast_op1; |
6928 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); |
6929 | auto &out_type = get<SPIRType>(id: result_type); |
6930 | |
6931 | // We might have casted away from the result type, so bitcast again. |
6932 | // For example, arithmetic right shift with uint inputs. |
6933 | // Special case boolean outputs since relational opcodes output booleans instead of int/uint. |
6934 | auto bitop = join(ts&: cast_op0, ts: " ", ts&: op, ts: " ", ts&: cast_op1); |
6935 | string expr; |
6936 | |
6937 | if (implicit_integer_promotion) |
6938 | { |
6939 | // Simple value cast. |
6940 | expr = join(ts: type_to_glsl(type: out_type), ts: '(', ts&: bitop, ts: ')'); |
6941 | } |
6942 | else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) |
6943 | { |
6944 | expected_type.basetype = input_type; |
6945 | expr = join(ts: bitcast_glsl_op(result_type: out_type, argument_type: expected_type), ts: '(', ts&: bitop, ts: ')'); |
6946 | } |
6947 | else |
6948 | { |
6949 | expr = std::move(bitop); |
6950 | } |
6951 | |
6952 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
6953 | inherit_expression_dependencies(dst: result_id, source: op0); |
6954 | inherit_expression_dependencies(dst: result_id, source: op1); |
6955 | } |
6956 | |
6957 | void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) |
6958 | { |
6959 | bool forward = should_forward(id: op0); |
6960 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ")"), forwarding: forward); |
6961 | inherit_expression_dependencies(dst: result_id, source: op0); |
6962 | } |
6963 | |
6964 | void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6965 | const char *op) |
6966 | { |
6967 | // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL |
6968 | const auto &type = get_type(id: result_type); |
6969 | bool must_forward = type_is_opaque_value(type); |
6970 | bool forward = must_forward || (should_forward(id: op0) && should_forward(id: op1)); |
6971 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ")"), |
6972 | forwarding: forward); |
6973 | inherit_expression_dependencies(dst: result_id, source: op0); |
6974 | inherit_expression_dependencies(dst: result_id, source: op1); |
6975 | } |
6976 | |
6977 | void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
6978 | const char *op) |
6979 | { |
6980 | auto &type = get<SPIRType>(id: result_type); |
6981 | if (type_is_floating_point(type)) |
6982 | { |
6983 | if (!options.vulkan_semantics) |
6984 | SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics."); |
6985 | if (options.es) |
6986 | SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL."); |
6987 | require_extension_internal(ext: "GL_EXT_shader_atomic_float"); |
6988 | } |
6989 | |
6990 | forced_temporaries.insert(x: result_id); |
6991 | emit_op(result_type, result_id, |
6992 | rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ", |
6993 | ts: to_unpacked_expression(id: op1), ts: ")"), forwarding: false); |
6994 | flush_all_atomic_capable_variables(); |
6995 | } |
6996 | |
6997 | void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, |
6998 | uint32_t op0, uint32_t op1, uint32_t op2, |
6999 | const char *op) |
7000 | { |
7001 | forced_temporaries.insert(x: result_id); |
7002 | emit_op(result_type, result_id, |
7003 | rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: op0), ts: ", ", |
7004 | ts: to_unpacked_expression(id: op1), ts: ", ", ts: to_unpacked_expression(id: op2), ts: ")"), forwarding: false); |
7005 | flush_all_atomic_capable_variables(); |
7006 | } |
7007 | |
7008 | void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, |
7009 | SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) |
7010 | { |
7011 | auto &out_type = get<SPIRType>(id: result_type); |
7012 | auto &expr_type = expression_type(id: op0); |
7013 | auto expected_type = out_type; |
7014 | |
7015 | // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. |
7016 | expected_type.basetype = input_type; |
7017 | expected_type.width = expr_type.width; |
7018 | |
7019 | string cast_op; |
7020 | if (expr_type.basetype != input_type) |
7021 | { |
7022 | if (expr_type.basetype == SPIRType::Boolean) |
7023 | cast_op = join(ts: type_to_glsl(type: expected_type), ts: "(", ts: to_unpacked_expression(id: op0), ts: ")"); |
7024 | else |
7025 | cast_op = bitcast_glsl(result_type: expected_type, arg: op0); |
7026 | } |
7027 | else |
7028 | cast_op = to_unpacked_expression(id: op0); |
7029 | |
7030 | string expr; |
7031 | if (out_type.basetype != expected_result_type) |
7032 | { |
7033 | expected_type.basetype = expected_result_type; |
7034 | expected_type.width = out_type.width; |
7035 | if (out_type.basetype == SPIRType::Boolean) |
7036 | expr = type_to_glsl(type: out_type); |
7037 | else |
7038 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
7039 | expr += '('; |
7040 | expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")"); |
7041 | expr += ')'; |
7042 | } |
7043 | else |
7044 | { |
7045 | expr += join(ts&: op, ts: "(", ts&: cast_op, ts: ")"); |
7046 | } |
7047 | |
7048 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0)); |
7049 | inherit_expression_dependencies(dst: result_id, source: op0); |
7050 | } |
7051 | |
7052 | // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs |
7053 | // and different vector sizes all at once. Need a special purpose method here. |
7054 | void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7055 | uint32_t op2, const char *op, |
7056 | SPIRType::BaseType expected_result_type, |
7057 | SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, |
7058 | SPIRType::BaseType input_type2) |
7059 | { |
7060 | auto &out_type = get<SPIRType>(id: result_type); |
7061 | auto expected_type = out_type; |
7062 | expected_type.basetype = input_type0; |
7063 | |
7064 | string cast_op0 = |
7065 | expression_type(id: op0).basetype != input_type0 ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
7066 | |
7067 | auto op1_expr = to_unpacked_expression(id: op1); |
7068 | auto op2_expr = to_unpacked_expression(id: op2); |
7069 | |
7070 | // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. |
7071 | expected_type.basetype = input_type1; |
7072 | expected_type.vecsize = 1; |
7073 | string cast_op1 = expression_type(id: op1).basetype != input_type1 ? |
7074 | join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op1_expr, ts: ")") : |
7075 | op1_expr; |
7076 | |
7077 | expected_type.basetype = input_type2; |
7078 | expected_type.vecsize = 1; |
7079 | string cast_op2 = expression_type(id: op2).basetype != input_type2 ? |
7080 | join(ts: type_to_glsl_constructor(type: expected_type), ts: "(", ts&: op2_expr, ts: ")") : |
7081 | op2_expr; |
7082 | |
7083 | string expr; |
7084 | if (out_type.basetype != expected_result_type) |
7085 | { |
7086 | expected_type.vecsize = out_type.vecsize; |
7087 | expected_type.basetype = expected_result_type; |
7088 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
7089 | expr += '('; |
7090 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")"); |
7091 | expr += ')'; |
7092 | } |
7093 | else |
7094 | { |
7095 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")"); |
7096 | } |
7097 | |
7098 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2)); |
7099 | inherit_expression_dependencies(dst: result_id, source: op0); |
7100 | inherit_expression_dependencies(dst: result_id, source: op1); |
7101 | inherit_expression_dependencies(dst: result_id, source: op2); |
7102 | } |
7103 | |
7104 | void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7105 | uint32_t op2, const char *op, SPIRType::BaseType input_type) |
7106 | { |
7107 | auto &out_type = get<SPIRType>(id: result_type); |
7108 | auto expected_type = out_type; |
7109 | expected_type.basetype = input_type; |
7110 | string cast_op0 = |
7111 | expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
7112 | string cast_op1 = |
7113 | expression_type(id: op1).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op1) : to_unpacked_expression(id: op1); |
7114 | string cast_op2 = |
7115 | expression_type(id: op2).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op2) : to_unpacked_expression(id: op2); |
7116 | |
7117 | string expr; |
7118 | if (out_type.basetype != input_type) |
7119 | { |
7120 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
7121 | expr += '('; |
7122 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")"); |
7123 | expr += ')'; |
7124 | } |
7125 | else |
7126 | { |
7127 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ", ", ts&: cast_op2, ts: ")"); |
7128 | } |
7129 | |
7130 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2)); |
7131 | inherit_expression_dependencies(dst: result_id, source: op0); |
7132 | inherit_expression_dependencies(dst: result_id, source: op1); |
7133 | inherit_expression_dependencies(dst: result_id, source: op2); |
7134 | } |
7135 | |
7136 | void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, |
7137 | uint32_t op1, const char *op, SPIRType::BaseType input_type) |
7138 | { |
7139 | // Special purpose method for implementing clustered subgroup opcodes. |
7140 | // Main difference is that op1 does not participate in any casting, it needs to be a literal. |
7141 | auto &out_type = get<SPIRType>(id: result_type); |
7142 | auto expected_type = out_type; |
7143 | expected_type.basetype = input_type; |
7144 | string cast_op0 = |
7145 | expression_type(id: op0).basetype != input_type ? bitcast_glsl(result_type: expected_type, arg: op0) : to_unpacked_expression(id: op0); |
7146 | |
7147 | string expr; |
7148 | if (out_type.basetype != input_type) |
7149 | { |
7150 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
7151 | expr += '('; |
7152 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")"); |
7153 | expr += ')'; |
7154 | } |
7155 | else |
7156 | { |
7157 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts: to_expression(id: op1), ts: ")"); |
7158 | } |
7159 | |
7160 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0)); |
7161 | inherit_expression_dependencies(dst: result_id, source: op0); |
7162 | } |
7163 | |
7164 | void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7165 | const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) |
7166 | { |
7167 | string cast_op0, cast_op1; |
7168 | auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); |
7169 | auto &out_type = get<SPIRType>(id: result_type); |
7170 | |
7171 | // Special case boolean outputs since relational opcodes output booleans instead of int/uint. |
7172 | string expr; |
7173 | if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) |
7174 | { |
7175 | expected_type.basetype = input_type; |
7176 | expr = bitcast_glsl_op(result_type: out_type, argument_type: expected_type); |
7177 | expr += '('; |
7178 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")"); |
7179 | expr += ')'; |
7180 | } |
7181 | else |
7182 | { |
7183 | expr += join(ts&: op, ts: "(", ts&: cast_op0, ts: ", ", ts&: cast_op1, ts: ")"); |
7184 | } |
7185 | |
7186 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
7187 | inherit_expression_dependencies(dst: result_id, source: op0); |
7188 | inherit_expression_dependencies(dst: result_id, source: op1); |
7189 | } |
7190 | |
7191 | void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7192 | uint32_t op2, const char *op) |
7193 | { |
7194 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2); |
7195 | emit_op(result_type, result_id, |
7196 | rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ", |
7197 | ts: to_unpacked_expression(id: op2), ts: ")"), |
7198 | forwarding: forward); |
7199 | |
7200 | inherit_expression_dependencies(dst: result_id, source: op0); |
7201 | inherit_expression_dependencies(dst: result_id, source: op1); |
7202 | inherit_expression_dependencies(dst: result_id, source: op2); |
7203 | } |
7204 | |
7205 | void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7206 | uint32_t op2, uint32_t op3, const char *op) |
7207 | { |
7208 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3); |
7209 | emit_op(result_type, result_id, |
7210 | rhs: join(ts&: op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: ", ", |
7211 | ts: to_unpacked_expression(id: op2), ts: ", ", ts: to_unpacked_expression(id: op3), ts: ")"), |
7212 | forwarding: forward); |
7213 | |
7214 | inherit_expression_dependencies(dst: result_id, source: op0); |
7215 | inherit_expression_dependencies(dst: result_id, source: op1); |
7216 | inherit_expression_dependencies(dst: result_id, source: op2); |
7217 | inherit_expression_dependencies(dst: result_id, source: op3); |
7218 | } |
7219 | |
7220 | void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, |
7221 | uint32_t op2, uint32_t op3, const char *op, |
7222 | SPIRType::BaseType offset_count_type) |
7223 | { |
7224 | // Only need to cast offset/count arguments. Types of base/insert must be same as result type, |
7225 | // and bitfieldInsert is sign invariant. |
7226 | bool forward = should_forward(id: op0) && should_forward(id: op1) && should_forward(id: op2) && should_forward(id: op3); |
7227 | |
7228 | auto op0_expr = to_unpacked_expression(id: op0); |
7229 | auto op1_expr = to_unpacked_expression(id: op1); |
7230 | auto op2_expr = to_unpacked_expression(id: op2); |
7231 | auto op3_expr = to_unpacked_expression(id: op3); |
7232 | |
7233 | assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int); |
7234 | SPIRType target_type { OpTypeInt }; |
7235 | target_type.width = 32; |
7236 | target_type.vecsize = 1; |
7237 | target_type.basetype = offset_count_type; |
7238 | |
7239 | if (expression_type(id: op2).basetype != offset_count_type) |
7240 | { |
7241 | // Value-cast here. Input might be 16-bit. GLSL requires int. |
7242 | op2_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op2_expr, ts: ")"); |
7243 | } |
7244 | |
7245 | if (expression_type(id: op3).basetype != offset_count_type) |
7246 | { |
7247 | // Value-cast here. Input might be 16-bit. GLSL requires int. |
7248 | op3_expr = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op3_expr, ts: ")"); |
7249 | } |
7250 | |
7251 | emit_op(result_type, result_id, rhs: join(ts&: op, ts: "(", ts&: op0_expr, ts: ", ", ts&: op1_expr, ts: ", ", ts&: op2_expr, ts: ", ", ts&: op3_expr, ts: ")"), |
7252 | forwarding: forward); |
7253 | |
7254 | inherit_expression_dependencies(dst: result_id, source: op0); |
7255 | inherit_expression_dependencies(dst: result_id, source: op1); |
7256 | inherit_expression_dependencies(dst: result_id, source: op2); |
7257 | inherit_expression_dependencies(dst: result_id, source: op3); |
7258 | } |
7259 | |
7260 | string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) |
7261 | { |
7262 | const char *type; |
7263 | switch (imgtype.image.dim) |
7264 | { |
7265 | case spv::Dim1D: |
7266 | // Force 2D path for ES. |
7267 | if (options.es) |
7268 | type = (imgtype.image.arrayed && !options.es) ? "2DArray": "2D"; |
7269 | else |
7270 | type = (imgtype.image.arrayed && !options.es) ? "1DArray": "1D"; |
7271 | break; |
7272 | case spv::Dim2D: |
7273 | type = (imgtype.image.arrayed && !options.es) ? "2DArray": "2D"; |
7274 | break; |
7275 | case spv::Dim3D: |
7276 | type = "3D"; |
7277 | break; |
7278 | case spv::DimCube: |
7279 | type = "Cube"; |
7280 | break; |
7281 | case spv::DimRect: |
7282 | type = "2DRect"; |
7283 | break; |
7284 | case spv::DimBuffer: |
7285 | type = "Buffer"; |
7286 | break; |
7287 | case spv::DimSubpassData: |
7288 | type = "2D"; |
7289 | break; |
7290 | default: |
7291 | type = ""; |
7292 | break; |
7293 | } |
7294 | |
7295 | // In legacy GLSL, an extension is required for textureLod in the fragment |
7296 | // shader or textureGrad anywhere. |
7297 | bool legacy_lod_ext = false; |
7298 | auto &execution = get_entry_point(); |
7299 | if (op == "textureGrad"|| op == "textureProjGrad"|| |
7300 | ((op == "textureLod"|| op == "textureProjLod") && execution.model != ExecutionModelVertex)) |
7301 | { |
7302 | if (is_legacy_es()) |
7303 | { |
7304 | legacy_lod_ext = true; |
7305 | require_extension_internal(ext: "GL_EXT_shader_texture_lod"); |
7306 | } |
7307 | else if (is_legacy_desktop()) |
7308 | require_extension_internal(ext: "GL_ARB_shader_texture_lod"); |
7309 | } |
7310 | |
7311 | if (op == "textureLodOffset"|| op == "textureProjLodOffset") |
7312 | { |
7313 | if (is_legacy_es()) |
7314 | SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); |
7315 | |
7316 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
7317 | } |
7318 | |
7319 | // GLES has very limited support for shadow samplers. |
7320 | // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, |
7321 | // everything else can just throw |
7322 | bool is_comparison = is_depth_image(type: imgtype, id: tex); |
7323 | if (is_comparison && is_legacy_es()) |
7324 | { |
7325 | if (op == "texture"|| op == "textureProj") |
7326 | require_extension_internal(ext: "GL_EXT_shadow_samplers"); |
7327 | else |
7328 | SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); |
7329 | |
7330 | if (imgtype.image.dim == spv::DimCube) |
7331 | return "shadowCubeNV"; |
7332 | } |
7333 | |
7334 | if (op == "textureSize") |
7335 | { |
7336 | if (is_legacy_es()) |
7337 | SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); |
7338 | if (is_comparison) |
7339 | SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); |
7340 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
7341 | } |
7342 | |
7343 | if (op == "texelFetch"&& is_legacy_es()) |
7344 | SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); |
7345 | |
7346 | bool is_es_and_depth = is_legacy_es() && is_comparison; |
7347 | std::string type_prefix = is_comparison ? "shadow": "texture"; |
7348 | |
7349 | if (op == "texture") |
7350 | return is_es_and_depth ? join(ts&: type_prefix, ts&: type, ts: "EXT") : join(ts&: type_prefix, ts&: type); |
7351 | else if (op == "textureLod") |
7352 | return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "LodEXT": "Lod"); |
7353 | else if (op == "textureProj") |
7354 | return join(ts&: type_prefix, ts&: type, ts: is_es_and_depth ? "ProjEXT": "Proj"); |
7355 | else if (op == "textureGrad") |
7356 | return join(ts&: type_prefix, ts&: type, ts: is_legacy_es() ? "GradEXT": is_legacy_desktop() ? "GradARB": "Grad"); |
7357 | else if (op == "textureProjLod") |
7358 | return join(ts&: type_prefix, ts&: type, ts: legacy_lod_ext ? "ProjLodEXT": "ProjLod"); |
7359 | else if (op == "textureLodOffset") |
7360 | return join(ts&: type_prefix, ts&: type, ts: "LodOffset"); |
7361 | else if (op == "textureProjGrad") |
7362 | return join(ts&: type_prefix, ts&: type, |
7363 | ts: is_legacy_es() ? "ProjGradEXT": is_legacy_desktop() ? "ProjGradARB": "ProjGrad"); |
7364 | else if (op == "textureProjLodOffset") |
7365 | return join(ts&: type_prefix, ts&: type, ts: "ProjLodOffset"); |
7366 | else if (op == "textureSize") |
7367 | return join(ts: "textureSize", ts&: type); |
7368 | else if (op == "texelFetch") |
7369 | return join(ts: "texelFetch", ts&: type); |
7370 | else |
7371 | { |
7372 | SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); |
7373 | } |
7374 | } |
7375 | |
7376 | bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) |
7377 | { |
7378 | auto *cleft = maybe_get<SPIRConstant>(id: left); |
7379 | auto *cright = maybe_get<SPIRConstant>(id: right); |
7380 | auto &lerptype = expression_type(id: lerp); |
7381 | |
7382 | // If our targets aren't constants, we cannot use construction. |
7383 | if (!cleft || !cright) |
7384 | return false; |
7385 | |
7386 | // If our targets are spec constants, we cannot use construction. |
7387 | if (cleft->specialization || cright->specialization) |
7388 | return false; |
7389 | |
7390 | auto &value_type = get<SPIRType>(id: cleft->constant_type); |
7391 | |
7392 | if (lerptype.basetype != SPIRType::Boolean) |
7393 | return false; |
7394 | if (value_type.basetype == SPIRType::Struct || is_array(type: value_type)) |
7395 | return false; |
7396 | if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize) |
7397 | return false; |
7398 | |
7399 | // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select. |
7400 | // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly. |
7401 | // Just avoid this case. |
7402 | if (value_type.columns > 1) |
7403 | return false; |
7404 | |
7405 | // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. |
7406 | bool ret = true; |
7407 | for (uint32_t row = 0; ret && row < value_type.vecsize; row++) |
7408 | { |
7409 | switch (type.basetype) |
7410 | { |
7411 | case SPIRType::Short: |
7412 | case SPIRType::UShort: |
7413 | ret = cleft->scalar_u16(col: 0, row) == 0 && cright->scalar_u16(col: 0, row) == 1; |
7414 | break; |
7415 | |
7416 | case SPIRType::Int: |
7417 | case SPIRType::UInt: |
7418 | ret = cleft->scalar(col: 0, row) == 0 && cright->scalar(col: 0, row) == 1; |
7419 | break; |
7420 | |
7421 | case SPIRType::Half: |
7422 | ret = cleft->scalar_f16(col: 0, row) == 0.0f && cright->scalar_f16(col: 0, row) == 1.0f; |
7423 | break; |
7424 | |
7425 | case SPIRType::Float: |
7426 | ret = cleft->scalar_f32(col: 0, row) == 0.0f && cright->scalar_f32(col: 0, row) == 1.0f; |
7427 | break; |
7428 | |
7429 | case SPIRType::Double: |
7430 | ret = cleft->scalar_f64(col: 0, row) == 0.0 && cright->scalar_f64(col: 0, row) == 1.0; |
7431 | break; |
7432 | |
7433 | case SPIRType::Int64: |
7434 | case SPIRType::UInt64: |
7435 | ret = cleft->scalar_u64(col: 0, row) == 0 && cright->scalar_u64(col: 0, row) == 1; |
7436 | break; |
7437 | |
7438 | default: |
7439 | ret = false; |
7440 | break; |
7441 | } |
7442 | } |
7443 | |
7444 | if (ret) |
7445 | op = type_to_glsl_constructor(type); |
7446 | return ret; |
7447 | } |
7448 | |
7449 | string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, |
7450 | uint32_t false_value) |
7451 | { |
7452 | string expr; |
7453 | auto &lerptype = expression_type(id: select); |
7454 | |
7455 | if (lerptype.vecsize == 1) |
7456 | expr = join(ts: to_enclosed_expression(id: select), ts: " ? ", ts: to_enclosed_pointer_expression(id: true_value), ts: " : ", |
7457 | ts: to_enclosed_pointer_expression(id: false_value)); |
7458 | else |
7459 | { |
7460 | auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(id: expression, index: i); }; |
7461 | |
7462 | expr = type_to_glsl_constructor(type: restype); |
7463 | expr += "("; |
7464 | for (uint32_t i = 0; i < restype.vecsize; i++) |
7465 | { |
7466 | expr += swiz(select, i); |
7467 | expr += " ? "; |
7468 | expr += swiz(true_value, i); |
7469 | expr += " : "; |
7470 | expr += swiz(false_value, i); |
7471 | if (i + 1 < restype.vecsize) |
7472 | expr += ", "; |
7473 | } |
7474 | expr += ")"; |
7475 | } |
7476 | |
7477 | return expr; |
7478 | } |
7479 | |
7480 | void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) |
7481 | { |
7482 | auto &lerptype = expression_type(id: lerp); |
7483 | auto &restype = get<SPIRType>(id: result_type); |
7484 | |
7485 | // If this results in a variable pointer, assume it may be written through. |
7486 | if (restype.pointer) |
7487 | { |
7488 | register_write(chain: left); |
7489 | register_write(chain: right); |
7490 | } |
7491 | |
7492 | string mix_op; |
7493 | bool has_boolean_mix = *backend.boolean_mix_function && |
7494 | ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); |
7495 | bool trivial_mix = to_trivial_mix_op(type: restype, op&: mix_op, left, right, lerp); |
7496 | |
7497 | // Cannot use boolean mix when the lerp argument is just one boolean, |
7498 | // fall back to regular trinary statements. |
7499 | if (lerptype.vecsize == 1) |
7500 | has_boolean_mix = false; |
7501 | |
7502 | // If we can reduce the mix to a simple cast, do so. |
7503 | // This helps for cases like int(bool), uint(bool) which is implemented with |
7504 | // OpSelect bool 1 0. |
7505 | if (trivial_mix) |
7506 | { |
7507 | emit_unary_func_op(result_type, result_id: id, op0: lerp, op: mix_op.c_str()); |
7508 | } |
7509 | else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) |
7510 | { |
7511 | // Boolean mix not supported on desktop without extension. |
7512 | // Was added in OpenGL 4.5 with ES 3.1 compat. |
7513 | // |
7514 | // Could use GL_EXT_shader_integer_mix on desktop at least, |
7515 | // but Apple doesn't support it. :( |
7516 | // Just implement it as ternary expressions. |
7517 | auto expr = to_ternary_expression(restype: get<SPIRType>(id: result_type), select: lerp, true_value: right, false_value: left); |
7518 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: left) && should_forward(id: right) && should_forward(id: lerp)); |
7519 | inherit_expression_dependencies(dst: id, source: left); |
7520 | inherit_expression_dependencies(dst: id, source: right); |
7521 | inherit_expression_dependencies(dst: id, source: lerp); |
7522 | } |
7523 | else if (lerptype.basetype == SPIRType::Boolean) |
7524 | emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: backend.boolean_mix_function); |
7525 | else |
7526 | emit_trinary_func_op(result_type, result_id: id, op0: left, op1: right, op2: lerp, op: "mix"); |
7527 | } |
7528 | |
7529 | string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) |
7530 | { |
7531 | // Keep track of the array indices we have used to load the image. |
7532 | // We'll need to use the same array index into the combined image sampler array. |
7533 | auto image_expr = to_non_uniform_aware_expression(id: image_id); |
7534 | string array_expr; |
7535 | auto array_index = image_expr.find_first_of(c: '['); |
7536 | if (array_index != string::npos) |
7537 | array_expr = image_expr.substr(pos: array_index, n: string::npos); |
7538 | |
7539 | auto &args = current_function->arguments; |
7540 | |
7541 | // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect |
7542 | // all possible combinations into new sampler2D uniforms. |
7543 | auto *image = maybe_get_backing_variable(chain: image_id); |
7544 | auto *samp = maybe_get_backing_variable(chain: samp_id); |
7545 | if (image) |
7546 | image_id = image->self; |
7547 | if (samp) |
7548 | samp_id = samp->self; |
7549 | |
7550 | auto image_itr = find_if(first: begin(cont&: args), last: end(cont&: args), |
7551 | pred: [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); |
7552 | |
7553 | auto sampler_itr = find_if(first: begin(cont&: args), last: end(cont&: args), |
7554 | pred: [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); |
7555 | |
7556 | if (image_itr != end(cont&: args) || sampler_itr != end(cont&: args)) |
7557 | { |
7558 | // If any parameter originates from a parameter, we will find it in our argument list. |
7559 | bool global_image = image_itr == end(cont&: args); |
7560 | bool global_sampler = sampler_itr == end(cont&: args); |
7561 | VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(cont&: args))); |
7562 | VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(cont&: args))); |
7563 | |
7564 | auto &combined = current_function->combined_parameters; |
7565 | auto itr = find_if(first: begin(cont&: combined), last: end(cont&: combined), pred: [=](const SPIRFunction::CombinedImageSamplerParameter &p) { |
7566 | return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && |
7567 | p.sampler_id == sid; |
7568 | }); |
7569 | |
7570 | if (itr != end(cont&: combined)) |
7571 | return to_expression(id: itr->id) + array_expr; |
7572 | else |
7573 | { |
7574 | SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " |
7575 | "build_combined_image_samplers() used " |
7576 | "before compile() was called?"); |
7577 | } |
7578 | } |
7579 | else |
7580 | { |
7581 | // For global sampler2D, look directly at the global remapping table. |
7582 | auto &mapping = combined_image_samplers; |
7583 | auto itr = find_if(first: begin(cont&: mapping), last: end(cont&: mapping), pred: [image_id, samp_id](const CombinedImageSampler &combined) { |
7584 | return combined.image_id == image_id && combined.sampler_id == samp_id; |
7585 | }); |
7586 | |
7587 | if (itr != end(cont&: combined_image_samplers)) |
7588 | return to_expression(id: itr->combined_id) + array_expr; |
7589 | else |
7590 | { |
7591 | SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " |
7592 | "before compile() was called?"); |
7593 | } |
7594 | } |
7595 | } |
7596 | |
7597 | bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops) |
7598 | { |
7599 | switch (op) |
7600 | { |
7601 | case OpGroupNonUniformElect: |
7602 | case OpGroupNonUniformBallot: |
7603 | case OpGroupNonUniformBallotFindLSB: |
7604 | case OpGroupNonUniformBallotFindMSB: |
7605 | case OpGroupNonUniformBroadcast: |
7606 | case OpGroupNonUniformBroadcastFirst: |
7607 | case OpGroupNonUniformAll: |
7608 | case OpGroupNonUniformAny: |
7609 | case OpGroupNonUniformAllEqual: |
7610 | case OpControlBarrier: |
7611 | case OpMemoryBarrier: |
7612 | case OpGroupNonUniformBallotBitCount: |
7613 | case OpGroupNonUniformBallotBitExtract: |
7614 | case OpGroupNonUniformInverseBallot: |
7615 | return true; |
7616 | case OpGroupNonUniformIAdd: |
7617 | case OpGroupNonUniformFAdd: |
7618 | case OpGroupNonUniformIMul: |
7619 | case OpGroupNonUniformFMul: |
7620 | { |
7621 | const GroupOperation operation = static_cast<GroupOperation>(ops[3]); |
7622 | if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan || |
7623 | operation == GroupOperationExclusiveScan) |
7624 | { |
7625 | return true; |
7626 | } |
7627 | else |
7628 | { |
7629 | return false; |
7630 | } |
7631 | } |
7632 | default: |
7633 | return false; |
7634 | } |
7635 | } |
7636 | |
7637 | void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) |
7638 | { |
7639 | if (options.vulkan_semantics && combined_image_samplers.empty()) |
7640 | { |
7641 | emit_binary_func_op(result_type, result_id, op0: image_id, op1: samp_id, |
7642 | op: type_to_glsl(type: get<SPIRType>(id: result_type), id: result_id).c_str()); |
7643 | } |
7644 | else |
7645 | { |
7646 | // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. |
7647 | emit_op(result_type, result_id, rhs: to_combined_image_sampler(image_id, samp_id), forwarding: true, suppress_usage_tracking: true); |
7648 | } |
7649 | |
7650 | // Make sure to suppress usage tracking and any expression invalidation. |
7651 | // It is illegal to create temporaries of opaque types. |
7652 | forwarded_temporaries.erase(x: result_id); |
7653 | } |
7654 | |
7655 | static inline bool image_opcode_is_sample_no_dref(Op op) |
7656 | { |
7657 | switch (op) |
7658 | { |
7659 | case OpImageSampleExplicitLod: |
7660 | case OpImageSampleImplicitLod: |
7661 | case OpImageSampleProjExplicitLod: |
7662 | case OpImageSampleProjImplicitLod: |
7663 | case OpImageFetch: |
7664 | case OpImageRead: |
7665 | case OpImageSparseSampleExplicitLod: |
7666 | case OpImageSparseSampleImplicitLod: |
7667 | case OpImageSparseSampleProjExplicitLod: |
7668 | case OpImageSparseSampleProjImplicitLod: |
7669 | case OpImageSparseFetch: |
7670 | case OpImageSparseRead: |
7671 | return true; |
7672 | |
7673 | default: |
7674 | return false; |
7675 | } |
7676 | } |
7677 | |
7678 | void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, |
7679 | uint32_t &texel_id) |
7680 | { |
7681 | // Need to allocate two temporaries. |
7682 | if (options.es) |
7683 | SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL."); |
7684 | require_extension_internal(ext: "GL_ARB_sparse_texture2"); |
7685 | |
7686 | auto &temps = extra_sub_expressions[id]; |
7687 | if (temps == 0) |
7688 | temps = ir.increase_bound_by(count: 2); |
7689 | |
7690 | feedback_id = temps + 0; |
7691 | texel_id = temps + 1; |
7692 | |
7693 | auto &return_type = get<SPIRType>(id: result_type_id); |
7694 | if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) |
7695 | SPIRV_CROSS_THROW("Invalid return type for sparse feedback."); |
7696 | emit_uninitialized_temporary(result_type: return_type.member_types[0], result_id: feedback_id); |
7697 | emit_uninitialized_temporary(result_type: return_type.member_types[1], result_id: texel_id); |
7698 | } |
7699 | |
7700 | uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const |
7701 | { |
7702 | auto itr = extra_sub_expressions.find(x: id); |
7703 | if (itr == extra_sub_expressions.end()) |
7704 | return 0; |
7705 | else |
7706 | return itr->second + 1; |
7707 | } |
7708 | |
7709 | void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) |
7710 | { |
7711 | auto *ops = stream(instr: i); |
7712 | auto op = static_cast<Op>(i.op); |
7713 | |
7714 | SmallVector<uint32_t> inherited_expressions; |
7715 | |
7716 | uint32_t result_type_id = ops[0]; |
7717 | uint32_t id = ops[1]; |
7718 | auto &return_type = get<SPIRType>(id: result_type_id); |
7719 | |
7720 | uint32_t sparse_code_id = 0; |
7721 | uint32_t sparse_texel_id = 0; |
7722 | if (sparse) |
7723 | emit_sparse_feedback_temporaries(result_type_id, id, feedback_id&: sparse_code_id, texel_id&: sparse_texel_id); |
7724 | |
7725 | bool forward = false; |
7726 | string expr = to_texture_op(i, sparse, forward: &forward, inherited_expressions); |
7727 | |
7728 | if (sparse) |
7729 | { |
7730 | statement(ts: to_expression(id: sparse_code_id), ts: " = ", ts&: expr, ts: ";"); |
7731 | expr = join(ts: type_to_glsl(type: return_type), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ", ts: to_expression(id: sparse_texel_id), |
7732 | ts: ")"); |
7733 | forward = true; |
7734 | inherited_expressions.clear(); |
7735 | } |
7736 | |
7737 | emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward); |
7738 | for (auto &inherit : inherited_expressions) |
7739 | inherit_expression_dependencies(dst: id, source: inherit); |
7740 | |
7741 | // Do not register sparse ops as control dependent as they are always lowered to a temporary. |
7742 | switch (op) |
7743 | { |
7744 | case OpImageSampleDrefImplicitLod: |
7745 | case OpImageSampleImplicitLod: |
7746 | case OpImageSampleProjImplicitLod: |
7747 | case OpImageSampleProjDrefImplicitLod: |
7748 | register_control_dependent_expression(expr: id); |
7749 | break; |
7750 | |
7751 | default: |
7752 | break; |
7753 | } |
7754 | } |
7755 | |
7756 | std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, |
7757 | SmallVector<uint32_t> &inherited_expressions) |
7758 | { |
7759 | auto *ops = stream(instr: i); |
7760 | auto op = static_cast<Op>(i.op); |
7761 | uint32_t length = i.length; |
7762 | |
7763 | uint32_t result_type_id = ops[0]; |
7764 | VariableID img = ops[2]; |
7765 | uint32_t coord = ops[3]; |
7766 | uint32_t dref = 0; |
7767 | uint32_t comp = 0; |
7768 | bool gather = false; |
7769 | bool proj = false; |
7770 | bool fetch = false; |
7771 | bool nonuniform_expression = false; |
7772 | const uint32_t *opt = nullptr; |
7773 | |
7774 | auto &result_type = get<SPIRType>(id: result_type_id); |
7775 | |
7776 | inherited_expressions.push_back(t: coord); |
7777 | if (has_decoration(id: img, decoration: DecorationNonUniform) && !maybe_get_backing_variable(chain: img)) |
7778 | nonuniform_expression = true; |
7779 | |
7780 | switch (op) |
7781 | { |
7782 | case OpImageSampleDrefImplicitLod: |
7783 | case OpImageSampleDrefExplicitLod: |
7784 | case OpImageSparseSampleDrefImplicitLod: |
7785 | case OpImageSparseSampleDrefExplicitLod: |
7786 | dref = ops[4]; |
7787 | opt = &ops[5]; |
7788 | length -= 5; |
7789 | break; |
7790 | |
7791 | case OpImageSampleProjDrefImplicitLod: |
7792 | case OpImageSampleProjDrefExplicitLod: |
7793 | case OpImageSparseSampleProjDrefImplicitLod: |
7794 | case OpImageSparseSampleProjDrefExplicitLod: |
7795 | dref = ops[4]; |
7796 | opt = &ops[5]; |
7797 | length -= 5; |
7798 | proj = true; |
7799 | break; |
7800 | |
7801 | case OpImageDrefGather: |
7802 | case OpImageSparseDrefGather: |
7803 | dref = ops[4]; |
7804 | opt = &ops[5]; |
7805 | length -= 5; |
7806 | gather = true; |
7807 | if (options.es && options.version < 310) |
7808 | SPIRV_CROSS_THROW("textureGather requires ESSL 310."); |
7809 | else if (!options.es && options.version < 400) |
7810 | SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); |
7811 | break; |
7812 | |
7813 | case OpImageGather: |
7814 | case OpImageSparseGather: |
7815 | comp = ops[4]; |
7816 | opt = &ops[5]; |
7817 | length -= 5; |
7818 | gather = true; |
7819 | if (options.es && options.version < 310) |
7820 | SPIRV_CROSS_THROW("textureGather requires ESSL 310."); |
7821 | else if (!options.es && options.version < 400) |
7822 | { |
7823 | if (!expression_is_constant_null(id: comp)) |
7824 | SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); |
7825 | require_extension_internal(ext: "GL_ARB_texture_gather"); |
7826 | } |
7827 | break; |
7828 | |
7829 | case OpImageFetch: |
7830 | case OpImageSparseFetch: |
7831 | case OpImageRead: // Reads == fetches in Metal (other langs will not get here) |
7832 | opt = &ops[4]; |
7833 | length -= 4; |
7834 | fetch = true; |
7835 | break; |
7836 | |
7837 | case OpImageSampleProjImplicitLod: |
7838 | case OpImageSampleProjExplicitLod: |
7839 | case OpImageSparseSampleProjImplicitLod: |
7840 | case OpImageSparseSampleProjExplicitLod: |
7841 | opt = &ops[4]; |
7842 | length -= 4; |
7843 | proj = true; |
7844 | break; |
7845 | |
7846 | default: |
7847 | opt = &ops[4]; |
7848 | length -= 4; |
7849 | break; |
7850 | } |
7851 | |
7852 | // Bypass pointers because we need the real image struct |
7853 | auto &type = expression_type(id: img); |
7854 | auto &imgtype = get<SPIRType>(id: type.self); |
7855 | |
7856 | uint32_t coord_components = 0; |
7857 | switch (imgtype.image.dim) |
7858 | { |
7859 | case spv::Dim1D: |
7860 | coord_components = 1; |
7861 | break; |
7862 | case spv::Dim2D: |
7863 | coord_components = 2; |
7864 | break; |
7865 | case spv::Dim3D: |
7866 | coord_components = 3; |
7867 | break; |
7868 | case spv::DimCube: |
7869 | coord_components = 3; |
7870 | break; |
7871 | case spv::DimBuffer: |
7872 | coord_components = 1; |
7873 | break; |
7874 | default: |
7875 | coord_components = 2; |
7876 | break; |
7877 | } |
7878 | |
7879 | if (dref) |
7880 | inherited_expressions.push_back(t: dref); |
7881 | |
7882 | if (proj) |
7883 | coord_components++; |
7884 | if (imgtype.image.arrayed) |
7885 | coord_components++; |
7886 | |
7887 | uint32_t bias = 0; |
7888 | uint32_t lod = 0; |
7889 | uint32_t grad_x = 0; |
7890 | uint32_t grad_y = 0; |
7891 | uint32_t coffset = 0; |
7892 | uint32_t offset = 0; |
7893 | uint32_t coffsets = 0; |
7894 | uint32_t sample = 0; |
7895 | uint32_t minlod = 0; |
7896 | uint32_t flags = 0; |
7897 | |
7898 | if (length) |
7899 | { |
7900 | flags = *opt++; |
7901 | length--; |
7902 | } |
7903 | |
7904 | auto test = [&](uint32_t &v, uint32_t flag) { |
7905 | if (length && (flags & flag)) |
7906 | { |
7907 | v = *opt++; |
7908 | inherited_expressions.push_back(t: v); |
7909 | length--; |
7910 | } |
7911 | }; |
7912 | |
7913 | test(bias, ImageOperandsBiasMask); |
7914 | test(lod, ImageOperandsLodMask); |
7915 | test(grad_x, ImageOperandsGradMask); |
7916 | test(grad_y, ImageOperandsGradMask); |
7917 | test(coffset, ImageOperandsConstOffsetMask); |
7918 | test(offset, ImageOperandsOffsetMask); |
7919 | test(coffsets, ImageOperandsConstOffsetsMask); |
7920 | test(sample, ImageOperandsSampleMask); |
7921 | test(minlod, ImageOperandsMinLodMask); |
7922 | |
7923 | TextureFunctionBaseArguments base_args = {}; |
7924 | base_args.img = img; |
7925 | base_args.imgtype = &imgtype; |
7926 | base_args.is_fetch = fetch != 0; |
7927 | base_args.is_gather = gather != 0; |
7928 | base_args.is_proj = proj != 0; |
7929 | |
7930 | string expr; |
7931 | TextureFunctionNameArguments name_args = {}; |
7932 | |
7933 | name_args.base = base_args; |
7934 | name_args.has_array_offsets = coffsets != 0; |
7935 | name_args.has_offset = coffset != 0 || offset != 0; |
7936 | name_args.has_grad = grad_x != 0 || grad_y != 0; |
7937 | name_args.has_dref = dref != 0; |
7938 | name_args.is_sparse_feedback = sparse; |
7939 | name_args.has_min_lod = minlod != 0; |
7940 | name_args.lod = lod; |
7941 | expr += to_function_name(args: name_args); |
7942 | expr += "("; |
7943 | |
7944 | uint32_t sparse_texel_id = 0; |
7945 | if (sparse) |
7946 | sparse_texel_id = get_sparse_feedback_texel_id(id: ops[1]); |
7947 | |
7948 | TextureFunctionArguments args = {}; |
7949 | args.base = base_args; |
7950 | args.coord = coord; |
7951 | args.coord_components = coord_components; |
7952 | args.dref = dref; |
7953 | args.grad_x = grad_x; |
7954 | args.grad_y = grad_y; |
7955 | args.lod = lod; |
7956 | args.has_array_offsets = coffsets != 0; |
7957 | |
7958 | if (coffsets) |
7959 | args.offset = coffsets; |
7960 | else if (coffset) |
7961 | args.offset = coffset; |
7962 | else |
7963 | args.offset = offset; |
7964 | |
7965 | args.bias = bias; |
7966 | args.component = comp; |
7967 | args.sample = sample; |
7968 | args.sparse_texel = sparse_texel_id; |
7969 | args.min_lod = minlod; |
7970 | args.nonuniform_expression = nonuniform_expression; |
7971 | expr += to_function_args(args, p_forward: forward); |
7972 | expr += ")"; |
7973 | |
7974 | // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. |
7975 | if (is_legacy() && !options.es && is_depth_image(type: imgtype, id: img)) |
7976 | expr += ".r"; |
7977 | |
7978 | // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. |
7979 | // Remap back to 4 components as sampling opcodes expect. |
7980 | if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) |
7981 | { |
7982 | bool image_is_depth = false; |
7983 | const auto *combined = maybe_get<SPIRCombinedImageSampler>(id: img); |
7984 | VariableID image_id = combined ? combined->image : img; |
7985 | |
7986 | if (combined && is_depth_image(type: imgtype, id: combined->image)) |
7987 | image_is_depth = true; |
7988 | else if (is_depth_image(type: imgtype, id: img)) |
7989 | image_is_depth = true; |
7990 | |
7991 | // We must also check the backing variable for the image. |
7992 | // We might have loaded an OpImage, and used that handle for two different purposes. |
7993 | // Once with comparison, once without. |
7994 | auto *image_variable = maybe_get_backing_variable(chain: image_id); |
7995 | if (image_variable && is_depth_image(type: get<SPIRType>(id: image_variable->basetype), id: image_variable->self)) |
7996 | image_is_depth = true; |
7997 | |
7998 | if (image_is_depth) |
7999 | expr = remap_swizzle(out_type: result_type, input_components: 1, expr); |
8000 | } |
8001 | |
8002 | if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) |
8003 | { |
8004 | // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. |
8005 | // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. |
8006 | expr = join(ts: type_to_glsl_constructor(type: result_type), ts: "(", ts&: expr, ts: ")"); |
8007 | } |
8008 | |
8009 | // Deals with reads from MSL. We might need to downconvert to fewer components. |
8010 | if (op == OpImageRead) |
8011 | expr = remap_swizzle(out_type: result_type, input_components: 4, expr); |
8012 | |
8013 | return expr; |
8014 | } |
8015 | |
8016 | bool CompilerGLSL::expression_is_constant_null(uint32_t id) const |
8017 | { |
8018 | auto *c = maybe_get<SPIRConstant>(id); |
8019 | if (!c) |
8020 | return false; |
8021 | return c->constant_is_null(); |
8022 | } |
8023 | |
8024 | bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) |
8025 | { |
8026 | auto &type = expression_type(id: ptr); |
8027 | if (!is_array(type: get_pointee_type(type))) |
8028 | return false; |
8029 | |
8030 | if (!backend.array_is_value_type) |
8031 | return true; |
8032 | |
8033 | auto *var = maybe_get_backing_variable(chain: ptr); |
8034 | if (!var) |
8035 | return false; |
8036 | |
8037 | auto &backed_type = get<SPIRType>(id: var->basetype); |
8038 | return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct && |
8039 | has_member_decoration(id: backed_type.self, index: 0, decoration: DecorationOffset); |
8040 | } |
8041 | |
8042 | // Returns the function name for a texture sampling function for the specified image and sampling characteristics. |
8043 | // For some subclasses, the function is a method on the specified image. |
8044 | string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) |
8045 | { |
8046 | if (args.has_min_lod) |
8047 | { |
8048 | if (options.es) |
8049 | SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL."); |
8050 | require_extension_internal(ext: "GL_ARB_sparse_texture_clamp"); |
8051 | } |
8052 | |
8053 | string fname; |
8054 | auto &imgtype = *args.base.imgtype; |
8055 | VariableID tex = args.base.img; |
8056 | |
8057 | // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. |
8058 | // To emulate this, we will have to use textureGrad with a constant gradient of 0. |
8059 | // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. |
8060 | // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. |
8061 | bool workaround_lod_array_shadow_as_grad = false; |
8062 | if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && |
8063 | is_depth_image(type: imgtype, id: tex) && args.lod && !args.base.is_fetch) |
8064 | { |
8065 | if (!expression_is_constant_null(id: args.lod)) |
8066 | { |
8067 | SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " |
8068 | "expressed in GLSL."); |
8069 | } |
8070 | workaround_lod_array_shadow_as_grad = true; |
8071 | } |
8072 | |
8073 | if (args.is_sparse_feedback) |
8074 | fname += "sparse"; |
8075 | |
8076 | if (args.base.is_fetch) |
8077 | fname += args.is_sparse_feedback ? "TexelFetch": "texelFetch"; |
8078 | else |
8079 | { |
8080 | fname += args.is_sparse_feedback ? "Texture": "texture"; |
8081 | |
8082 | if (args.base.is_gather) |
8083 | fname += "Gather"; |
8084 | if (args.has_array_offsets) |
8085 | fname += "Offsets"; |
8086 | if (args.base.is_proj) |
8087 | fname += "Proj"; |
8088 | if (args.has_grad || workaround_lod_array_shadow_as_grad) |
8089 | fname += "Grad"; |
8090 | if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) |
8091 | fname += "Lod"; |
8092 | } |
8093 | |
8094 | if (args.has_offset) |
8095 | fname += "Offset"; |
8096 | |
8097 | if (args.has_min_lod) |
8098 | fname += "Clamp"; |
8099 | |
8100 | if (args.is_sparse_feedback || args.has_min_lod) |
8101 | fname += "ARB"; |
8102 | |
8103 | return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(op: fname, imgtype, tex) : fname; |
8104 | } |
8105 | |
8106 | std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) |
8107 | { |
8108 | auto *var = maybe_get_backing_variable(chain: id); |
8109 | |
8110 | // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. |
8111 | // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. |
8112 | if (var) |
8113 | { |
8114 | auto &type = get<SPIRType>(id: var->basetype); |
8115 | if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) |
8116 | { |
8117 | if (options.vulkan_semantics) |
8118 | { |
8119 | if (dummy_sampler_id) |
8120 | { |
8121 | // Don't need to consider Shadow state since the dummy sampler is always non-shadow. |
8122 | auto sampled_type = type; |
8123 | sampled_type.basetype = SPIRType::SampledImage; |
8124 | return join(ts: type_to_glsl(type: sampled_type), ts: "(", ts: to_non_uniform_aware_expression(id), ts: ", ", |
8125 | ts: to_expression(id: dummy_sampler_id), ts: ")"); |
8126 | } |
8127 | else |
8128 | { |
8129 | // Newer glslang supports this extension to deal with texture2D as argument to texture functions. |
8130 | require_extension_internal(ext: "GL_EXT_samplerless_texture_functions"); |
8131 | } |
8132 | } |
8133 | else |
8134 | { |
8135 | if (!dummy_sampler_id) |
8136 | SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " |
8137 | "build_dummy_sampler_for_combined_images() called?"); |
8138 | |
8139 | return to_combined_image_sampler(image_id: id, samp_id: dummy_sampler_id); |
8140 | } |
8141 | } |
8142 | } |
8143 | |
8144 | return to_non_uniform_aware_expression(id); |
8145 | } |
8146 | |
8147 | // Returns the function args for a texture sampling function for the specified image and sampling characteristics. |
8148 | string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) |
8149 | { |
8150 | VariableID img = args.base.img; |
8151 | auto &imgtype = *args.base.imgtype; |
8152 | |
8153 | string farg_str; |
8154 | if (args.base.is_fetch) |
8155 | farg_str = convert_separate_image_to_expression(id: img); |
8156 | else |
8157 | farg_str = to_non_uniform_aware_expression(id: img); |
8158 | |
8159 | if (args.nonuniform_expression && farg_str.find_first_of(c: '[') != string::npos) |
8160 | { |
8161 | // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way. |
8162 | farg_str = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: farg_str, ts: ")"); |
8163 | } |
8164 | |
8165 | bool swizz_func = backend.swizzle_is_function; |
8166 | auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { |
8167 | if (comps == in_comps) |
8168 | return ""; |
8169 | |
8170 | switch (comps) |
8171 | { |
8172 | case 1: |
8173 | return ".x"; |
8174 | case 2: |
8175 | return swizz_func ? ".xy()": ".xy"; |
8176 | case 3: |
8177 | return swizz_func ? ".xyz()": ".xyz"; |
8178 | default: |
8179 | return ""; |
8180 | } |
8181 | }; |
8182 | |
8183 | bool forward = should_forward(id: args.coord); |
8184 | |
8185 | // The IR can give us more components than we need, so chop them off as needed. |
8186 | auto swizzle_expr = swizzle(args.coord_components, expression_type(id: args.coord).vecsize); |
8187 | // Only enclose the UV expression if needed. |
8188 | auto coord_expr = |
8189 | (*swizzle_expr == '\0') ? to_expression(id: args.coord) : (to_enclosed_expression(id: args.coord) + swizzle_expr); |
8190 | |
8191 | // texelFetch only takes int, not uint. |
8192 | auto &coord_type = expression_type(id: args.coord); |
8193 | if (coord_type.basetype == SPIRType::UInt) |
8194 | { |
8195 | auto expected_type = coord_type; |
8196 | expected_type.vecsize = args.coord_components; |
8197 | expected_type.basetype = SPIRType::Int; |
8198 | coord_expr = bitcast_expression(target_type: expected_type, expr_type: coord_type.basetype, expr: coord_expr); |
8199 | } |
8200 | |
8201 | // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. |
8202 | // To emulate this, we will have to use textureGrad with a constant gradient of 0. |
8203 | // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. |
8204 | // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. |
8205 | bool workaround_lod_array_shadow_as_grad = |
8206 | ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && |
8207 | is_depth_image(type: imgtype, id: img) && args.lod != 0 && !args.base.is_fetch; |
8208 | |
8209 | if (args.dref) |
8210 | { |
8211 | forward = forward && should_forward(id: args.dref); |
8212 | |
8213 | // SPIR-V splits dref and coordinate. |
8214 | if (args.base.is_gather || |
8215 | args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. |
8216 | { |
8217 | farg_str += ", "; |
8218 | farg_str += to_expression(id: args.coord); |
8219 | farg_str += ", "; |
8220 | farg_str += to_expression(id: args.dref); |
8221 | } |
8222 | else if (args.base.is_proj) |
8223 | { |
8224 | // Have to reshuffle so we get vec4(coord, dref, proj), special case. |
8225 | // Other shading languages splits up the arguments for coord and compare value like SPIR-V. |
8226 | // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. |
8227 | farg_str += ", vec4("; |
8228 | |
8229 | if (imgtype.image.dim == Dim1D) |
8230 | { |
8231 | // Could reuse coord_expr, but we will mess up the temporary usage checking. |
8232 | farg_str += to_enclosed_expression(id: args.coord) + ".x"; |
8233 | farg_str += ", "; |
8234 | farg_str += "0.0, "; |
8235 | farg_str += to_expression(id: args.dref); |
8236 | farg_str += ", "; |
8237 | farg_str += to_enclosed_expression(id: args.coord) + ".y)"; |
8238 | } |
8239 | else if (imgtype.image.dim == Dim2D) |
8240 | { |
8241 | // Could reuse coord_expr, but we will mess up the temporary usage checking. |
8242 | farg_str += to_enclosed_expression(id: args.coord) + (swizz_func ? ".xy()": ".xy"); |
8243 | farg_str += ", "; |
8244 | farg_str += to_expression(id: args.dref); |
8245 | farg_str += ", "; |
8246 | farg_str += to_enclosed_expression(id: args.coord) + ".z)"; |
8247 | } |
8248 | else |
8249 | SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); |
8250 | } |
8251 | else |
8252 | { |
8253 | // Create a composite which merges coord/dref into a single vector. |
8254 | auto type = expression_type(id: args.coord); |
8255 | type.vecsize = args.coord_components + 1; |
8256 | if (imgtype.image.dim == Dim1D && options.es) |
8257 | type.vecsize++; |
8258 | farg_str += ", "; |
8259 | farg_str += type_to_glsl_constructor(type); |
8260 | farg_str += "("; |
8261 | |
8262 | if (imgtype.image.dim == Dim1D && options.es) |
8263 | { |
8264 | if (imgtype.image.arrayed) |
8265 | { |
8266 | farg_str += enclose_expression(expr: coord_expr) + ".x"; |
8267 | farg_str += ", 0.0, "; |
8268 | farg_str += enclose_expression(expr: coord_expr) + ".y"; |
8269 | } |
8270 | else |
8271 | { |
8272 | farg_str += coord_expr; |
8273 | farg_str += ", 0.0"; |
8274 | } |
8275 | } |
8276 | else |
8277 | farg_str += coord_expr; |
8278 | |
8279 | farg_str += ", "; |
8280 | farg_str += to_expression(id: args.dref); |
8281 | farg_str += ")"; |
8282 | } |
8283 | } |
8284 | else |
8285 | { |
8286 | if (imgtype.image.dim == Dim1D && options.es) |
8287 | { |
8288 | // Have to fake a second coordinate. |
8289 | if (type_is_floating_point(type: coord_type)) |
8290 | { |
8291 | // Cannot mix proj and array. |
8292 | if (imgtype.image.arrayed || args.base.is_proj) |
8293 | { |
8294 | coord_expr = join(ts: "vec3(", ts: enclose_expression(expr: coord_expr), ts: ".x, 0.0, ", |
8295 | ts: enclose_expression(expr: coord_expr), ts: ".y)"); |
8296 | } |
8297 | else |
8298 | coord_expr = join(ts: "vec2(", ts&: coord_expr, ts: ", 0.0)"); |
8299 | } |
8300 | else |
8301 | { |
8302 | if (imgtype.image.arrayed) |
8303 | { |
8304 | coord_expr = join(ts: "ivec3(", ts: enclose_expression(expr: coord_expr), |
8305 | ts: ".x, 0, ", |
8306 | ts: enclose_expression(expr: coord_expr), ts: ".y)"); |
8307 | } |
8308 | else |
8309 | coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)"); |
8310 | } |
8311 | } |
8312 | |
8313 | farg_str += ", "; |
8314 | farg_str += coord_expr; |
8315 | } |
8316 | |
8317 | if (args.grad_x || args.grad_y) |
8318 | { |
8319 | forward = forward && should_forward(id: args.grad_x); |
8320 | forward = forward && should_forward(id: args.grad_y); |
8321 | farg_str += ", "; |
8322 | farg_str += to_expression(id: args.grad_x); |
8323 | farg_str += ", "; |
8324 | farg_str += to_expression(id: args.grad_y); |
8325 | } |
8326 | |
8327 | if (args.lod) |
8328 | { |
8329 | if (workaround_lod_array_shadow_as_grad) |
8330 | { |
8331 | // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. |
8332 | // Implementing this as plain texture() is not safe on some implementations. |
8333 | if (imgtype.image.dim == Dim2D) |
8334 | farg_str += ", vec2(0.0), vec2(0.0)"; |
8335 | else if (imgtype.image.dim == DimCube) |
8336 | farg_str += ", vec3(0.0), vec3(0.0)"; |
8337 | } |
8338 | else |
8339 | { |
8340 | forward = forward && should_forward(id: args.lod); |
8341 | farg_str += ", "; |
8342 | |
8343 | // Lod expression for TexelFetch in GLSL must be int, and only int. |
8344 | if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) |
8345 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.lod); |
8346 | else |
8347 | farg_str += to_expression(id: args.lod); |
8348 | } |
8349 | } |
8350 | else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) |
8351 | { |
8352 | // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. |
8353 | farg_str += ", 0"; |
8354 | } |
8355 | |
8356 | if (args.offset) |
8357 | { |
8358 | forward = forward && should_forward(id: args.offset); |
8359 | farg_str += ", "; |
8360 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.offset); |
8361 | } |
8362 | |
8363 | if (args.sample) |
8364 | { |
8365 | farg_str += ", "; |
8366 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.sample); |
8367 | } |
8368 | |
8369 | if (args.min_lod) |
8370 | { |
8371 | farg_str += ", "; |
8372 | farg_str += to_expression(id: args.min_lod); |
8373 | } |
8374 | |
8375 | if (args.sparse_texel) |
8376 | { |
8377 | // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. |
8378 | farg_str += ", "; |
8379 | farg_str += to_expression(id: args.sparse_texel); |
8380 | } |
8381 | |
8382 | if (args.bias) |
8383 | { |
8384 | forward = forward && should_forward(id: args.bias); |
8385 | farg_str += ", "; |
8386 | farg_str += to_expression(id: args.bias); |
8387 | } |
8388 | |
8389 | if (args.component && !expression_is_constant_null(id: args.component)) |
8390 | { |
8391 | forward = forward && should_forward(id: args.component); |
8392 | farg_str += ", "; |
8393 | farg_str += bitcast_expression(target_type: SPIRType::Int, arg: args.component); |
8394 | } |
8395 | |
8396 | *p_forward = forward; |
8397 | |
8398 | return farg_str; |
8399 | } |
8400 | |
8401 | Op CompilerGLSL::get_remapped_spirv_op(Op op) const |
8402 | { |
8403 | if (options.relax_nan_checks) |
8404 | { |
8405 | switch (op) |
8406 | { |
8407 | case OpFUnordLessThan: |
8408 | op = OpFOrdLessThan; |
8409 | break; |
8410 | case OpFUnordLessThanEqual: |
8411 | op = OpFOrdLessThanEqual; |
8412 | break; |
8413 | case OpFUnordGreaterThan: |
8414 | op = OpFOrdGreaterThan; |
8415 | break; |
8416 | case OpFUnordGreaterThanEqual: |
8417 | op = OpFOrdGreaterThanEqual; |
8418 | break; |
8419 | case OpFUnordEqual: |
8420 | op = OpFOrdEqual; |
8421 | break; |
8422 | case OpFOrdNotEqual: |
8423 | op = OpFUnordNotEqual; |
8424 | break; |
8425 | |
8426 | default: |
8427 | break; |
8428 | } |
8429 | } |
8430 | |
8431 | return op; |
8432 | } |
8433 | |
8434 | GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const |
8435 | { |
8436 | // Relax to non-NaN aware opcodes. |
8437 | if (options.relax_nan_checks) |
8438 | { |
8439 | switch (std450_op) |
8440 | { |
8441 | case GLSLstd450NClamp: |
8442 | std450_op = GLSLstd450FClamp; |
8443 | break; |
8444 | case GLSLstd450NMin: |
8445 | std450_op = GLSLstd450FMin; |
8446 | break; |
8447 | case GLSLstd450NMax: |
8448 | std450_op = GLSLstd450FMax; |
8449 | break; |
8450 | default: |
8451 | break; |
8452 | } |
8453 | } |
8454 | |
8455 | return std450_op; |
8456 | } |
8457 | |
8458 | void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) |
8459 | { |
8460 | auto op = static_cast<GLSLstd450>(eop); |
8461 | |
8462 | if (is_legacy() && is_unsigned_glsl_opcode(op)) |
8463 | SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); |
8464 | |
8465 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
8466 | uint32_t integer_width = get_integer_width_for_glsl_instruction(op, arguments: args, length); |
8467 | auto int_type = to_signed_basetype(width: integer_width); |
8468 | auto uint_type = to_unsigned_basetype(width: integer_width); |
8469 | |
8470 | op = get_remapped_glsl_op(std450_op: op); |
8471 | |
8472 | switch (op) |
8473 | { |
8474 | // FP fiddling |
8475 | case GLSLstd450Round: |
8476 | if (!is_legacy()) |
8477 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round"); |
8478 | else |
8479 | { |
8480 | auto op0 = to_enclosed_expression(id: args[0]); |
8481 | auto &op0_type = expression_type(id: args[0]); |
8482 | auto expr = join(ts: "floor(", ts&: op0, ts: " + ", ts: type_to_glsl_constructor(type: op0_type), ts: "(0.5))"); |
8483 | bool forward = should_forward(id: args[0]); |
8484 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
8485 | inherit_expression_dependencies(dst: id, source: args[0]); |
8486 | } |
8487 | break; |
8488 | |
8489 | case GLSLstd450RoundEven: |
8490 | if (!is_legacy()) |
8491 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "roundEven"); |
8492 | else if (!options.es) |
8493 | { |
8494 | // This extension provides round() with round-to-even semantics. |
8495 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
8496 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "round"); |
8497 | } |
8498 | else |
8499 | SPIRV_CROSS_THROW("roundEven supported only in ESSL 300."); |
8500 | break; |
8501 | |
8502 | case GLSLstd450Trunc: |
8503 | if (!is_legacy()) |
8504 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "trunc"); |
8505 | else |
8506 | { |
8507 | // Implement by value-casting to int and back. |
8508 | bool forward = should_forward(id: args[0]); |
8509 | auto op0 = to_unpacked_expression(id: args[0]); |
8510 | auto &op0_type = expression_type(id: args[0]); |
8511 | auto via_type = op0_type; |
8512 | via_type.basetype = SPIRType::Int; |
8513 | auto expr = join(ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts&: op0, ts: "))"); |
8514 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
8515 | inherit_expression_dependencies(dst: id, source: args[0]); |
8516 | } |
8517 | break; |
8518 | |
8519 | case GLSLstd450SAbs: |
8520 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "abs", input_type: int_type, expected_result_type: int_type); |
8521 | break; |
8522 | case GLSLstd450FAbs: |
8523 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "abs"); |
8524 | break; |
8525 | case GLSLstd450SSign: |
8526 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "sign", input_type: int_type, expected_result_type: int_type); |
8527 | break; |
8528 | case GLSLstd450FSign: |
8529 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sign"); |
8530 | break; |
8531 | case GLSLstd450Floor: |
8532 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "floor"); |
8533 | break; |
8534 | case GLSLstd450Ceil: |
8535 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "ceil"); |
8536 | break; |
8537 | case GLSLstd450Fract: |
8538 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "fract"); |
8539 | break; |
8540 | case GLSLstd450Radians: |
8541 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "radians"); |
8542 | break; |
8543 | case GLSLstd450Degrees: |
8544 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "degrees"); |
8545 | break; |
8546 | case GLSLstd450Fma: |
8547 | if ((!options.es && options.version < 400) || (options.es && options.version < 320)) |
8548 | { |
8549 | auto expr = join(ts: to_enclosed_expression(id: args[0]), ts: " * ", ts: to_enclosed_expression(id: args[1]), ts: " + ", |
8550 | ts: to_enclosed_expression(id: args[2])); |
8551 | |
8552 | emit_op(result_type, result_id: id, rhs: expr, |
8553 | forwarding: should_forward(id: args[0]) && should_forward(id: args[1]) && should_forward(id: args[2])); |
8554 | for (uint32_t i = 0; i < 3; i++) |
8555 | inherit_expression_dependencies(dst: id, source: args[i]); |
8556 | } |
8557 | else |
8558 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "fma"); |
8559 | break; |
8560 | |
8561 | case GLSLstd450Modf: |
8562 | register_call_out_argument(id: args[1]); |
8563 | if (!is_legacy()) |
8564 | { |
8565 | forced_temporaries.insert(x: id); |
8566 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "modf"); |
8567 | } |
8568 | else |
8569 | { |
8570 | //NB. legacy GLSL doesn't have trunc() either, so we do a value cast |
8571 | auto &op1_type = expression_type(id: args[1]); |
8572 | auto via_type = op1_type; |
8573 | via_type.basetype = SPIRType::Int; |
8574 | statement(ts: to_expression(id: args[1]), ts: " = ", |
8575 | ts: type_to_glsl(type: op1_type), ts: "(", ts: type_to_glsl(type: via_type), |
8576 | ts: "(", ts: to_expression(id: args[0]), ts: "));"); |
8577 | emit_binary_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "-"); |
8578 | } |
8579 | break; |
8580 | |
8581 | case GLSLstd450ModfStruct: |
8582 | { |
8583 | auto &type = get<SPIRType>(id: result_type); |
8584 | emit_uninitialized_temporary_expression(type: result_type, id); |
8585 | if (!is_legacy()) |
8586 | { |
8587 | statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "modf(", ts: to_expression(id: args[0]), ts: ", ", |
8588 | ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");"); |
8589 | } |
8590 | else |
8591 | { |
8592 | //NB. legacy GLSL doesn't have trunc() either, so we do a value cast |
8593 | auto &op0_type = expression_type(id: args[0]); |
8594 | auto via_type = op0_type; |
8595 | via_type.basetype = SPIRType::Int; |
8596 | statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: " = ", ts: type_to_glsl(type: op0_type), |
8597 | ts: "(", ts: type_to_glsl(type: via_type), ts: "(", ts: to_expression(id: args[0]), ts: "));"); |
8598 | statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: to_enclosed_expression(id: args[0]), ts: " - ", |
8599 | ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ";"); |
8600 | } |
8601 | break; |
8602 | } |
8603 | |
8604 | // Minmax |
8605 | case GLSLstd450UMin: |
8606 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: uint_type, skip_cast_if_equal_type: false); |
8607 | break; |
8608 | |
8609 | case GLSLstd450SMin: |
8610 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "min", input_type: int_type, skip_cast_if_equal_type: false); |
8611 | break; |
8612 | |
8613 | case GLSLstd450FMin: |
8614 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "min"); |
8615 | break; |
8616 | |
8617 | case GLSLstd450FMax: |
8618 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "max"); |
8619 | break; |
8620 | |
8621 | case GLSLstd450UMax: |
8622 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: uint_type, skip_cast_if_equal_type: false); |
8623 | break; |
8624 | |
8625 | case GLSLstd450SMax: |
8626 | emit_binary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op: "max", input_type: int_type, skip_cast_if_equal_type: false); |
8627 | break; |
8628 | |
8629 | case GLSLstd450FClamp: |
8630 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp"); |
8631 | break; |
8632 | |
8633 | case GLSLstd450UClamp: |
8634 | emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: uint_type); |
8635 | break; |
8636 | |
8637 | case GLSLstd450SClamp: |
8638 | emit_trinary_func_op_cast(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "clamp", input_type: int_type); |
8639 | break; |
8640 | |
8641 | // Trig |
8642 | case GLSLstd450Sin: |
8643 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sin"); |
8644 | break; |
8645 | case GLSLstd450Cos: |
8646 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cos"); |
8647 | break; |
8648 | case GLSLstd450Tan: |
8649 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tan"); |
8650 | break; |
8651 | case GLSLstd450Asin: |
8652 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asin"); |
8653 | break; |
8654 | case GLSLstd450Acos: |
8655 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acos"); |
8656 | break; |
8657 | case GLSLstd450Atan: |
8658 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atan"); |
8659 | break; |
8660 | case GLSLstd450Sinh: |
8661 | if (!is_legacy()) |
8662 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sinh"); |
8663 | else |
8664 | { |
8665 | bool forward = should_forward(id: args[0]); |
8666 | auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") - exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5"); |
8667 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
8668 | inherit_expression_dependencies(dst: id, source: args[0]); |
8669 | } |
8670 | break; |
8671 | case GLSLstd450Cosh: |
8672 | if (!is_legacy()) |
8673 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cosh"); |
8674 | else |
8675 | { |
8676 | bool forward = should_forward(id: args[0]); |
8677 | auto expr = join(ts: "(exp(", ts: to_expression(id: args[0]), ts: ") + exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")) * 0.5"); |
8678 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
8679 | inherit_expression_dependencies(dst: id, source: args[0]); |
8680 | } |
8681 | break; |
8682 | case GLSLstd450Tanh: |
8683 | if (!is_legacy()) |
8684 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "tanh"); |
8685 | else |
8686 | { |
8687 | // Create temporaries to store the result of exp(arg) and exp(-arg). |
8688 | uint32_t &ids = extra_sub_expressions[id]; |
8689 | if (!ids) |
8690 | { |
8691 | ids = ir.increase_bound_by(count: 2); |
8692 | |
8693 | // Inherit precision qualifier (legacy has no NoContraction). |
8694 | if (has_decoration(id, decoration: DecorationRelaxedPrecision)) |
8695 | { |
8696 | set_decoration(id: ids, decoration: DecorationRelaxedPrecision); |
8697 | set_decoration(id: ids + 1, decoration: DecorationRelaxedPrecision); |
8698 | } |
8699 | } |
8700 | uint32_t epos_id = ids; |
8701 | uint32_t eneg_id = ids + 1; |
8702 | |
8703 | emit_op(result_type, result_id: epos_id, rhs: join(ts: "exp(", ts: to_expression(id: args[0]), ts: ")"), forwarding: false); |
8704 | emit_op(result_type, result_id: eneg_id, rhs: join(ts: "exp(-", ts: to_enclosed_expression(id: args[0]), ts: ")"), forwarding: false); |
8705 | inherit_expression_dependencies(dst: epos_id, source: args[0]); |
8706 | inherit_expression_dependencies(dst: eneg_id, source: args[0]); |
8707 | |
8708 | auto expr = join(ts: "(", ts: to_enclosed_expression(id: epos_id), ts: " - ", ts: to_enclosed_expression(id: eneg_id), ts: ") / " |
8709 | "(", ts: to_enclosed_expression(id: epos_id), ts: " + ", ts: to_enclosed_expression(id: eneg_id), ts: ")"); |
8710 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
8711 | inherit_expression_dependencies(dst: id, source: epos_id); |
8712 | inherit_expression_dependencies(dst: id, source: eneg_id); |
8713 | } |
8714 | break; |
8715 | case GLSLstd450Asinh: |
8716 | if (!is_legacy()) |
8717 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "asinh"); |
8718 | else |
8719 | emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Asinh); |
8720 | break; |
8721 | case GLSLstd450Acosh: |
8722 | if (!is_legacy()) |
8723 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "acosh"); |
8724 | else |
8725 | emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Acosh); |
8726 | break; |
8727 | case GLSLstd450Atanh: |
8728 | if (!is_legacy()) |
8729 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "atanh"); |
8730 | else |
8731 | emit_emulated_ahyper_op(result_type, result_id: id, op0: args[0], op: GLSLstd450Atanh); |
8732 | break; |
8733 | case GLSLstd450Atan2: |
8734 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "atan"); |
8735 | break; |
8736 | |
8737 | // Exponentials |
8738 | case GLSLstd450Pow: |
8739 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "pow"); |
8740 | break; |
8741 | case GLSLstd450Exp: |
8742 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp"); |
8743 | break; |
8744 | case GLSLstd450Log: |
8745 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log"); |
8746 | break; |
8747 | case GLSLstd450Exp2: |
8748 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "exp2"); |
8749 | break; |
8750 | case GLSLstd450Log2: |
8751 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "log2"); |
8752 | break; |
8753 | case GLSLstd450Sqrt: |
8754 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "sqrt"); |
8755 | break; |
8756 | case GLSLstd450InverseSqrt: |
8757 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "inversesqrt"); |
8758 | break; |
8759 | |
8760 | // Matrix math |
8761 | case GLSLstd450Determinant: |
8762 | { |
8763 | // No need to transpose - it doesn't affect the determinant |
8764 | auto *e = maybe_get<SPIRExpression>(id: args[0]); |
8765 | bool old_transpose = e && e->need_transpose; |
8766 | if (old_transpose) |
8767 | e->need_transpose = false; |
8768 | |
8769 | if (options.version < 150) // also matches ES 100 |
8770 | { |
8771 | auto &type = expression_type(id: args[0]); |
8772 | assert(type.vecsize >= 2 && type.vecsize <= 4); |
8773 | assert(type.vecsize == type.columns); |
8774 | |
8775 | // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid |
8776 | if (type.basetype != SPIRType::Float) |
8777 | SPIRV_CROSS_THROW("Unsupported type for matrix determinant"); |
8778 | |
8779 | bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision); |
8780 | require_polyfill(polyfill: static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)), |
8781 | relaxed); |
8782 | emit_unary_func_op(result_type, result_id: id, op0: args[0], |
8783 | op: (options.es && relaxed) ? "spvDeterminantMP": "spvDeterminant"); |
8784 | } |
8785 | else |
8786 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "determinant"); |
8787 | |
8788 | if (old_transpose) |
8789 | e->need_transpose = true; |
8790 | break; |
8791 | } |
8792 | |
8793 | case GLSLstd450MatrixInverse: |
8794 | { |
8795 | // The inverse of the transpose is the same as the transpose of |
8796 | // the inverse, so we can just flip need_transpose of the result. |
8797 | auto *a = maybe_get<SPIRExpression>(id: args[0]); |
8798 | bool old_transpose = a && a->need_transpose; |
8799 | if (old_transpose) |
8800 | a->need_transpose = false; |
8801 | |
8802 | const char *func = "inverse"; |
8803 | if (options.version < 140) // also matches ES 100 |
8804 | { |
8805 | auto &type = get<SPIRType>(id: result_type); |
8806 | assert(type.vecsize >= 2 && type.vecsize <= 4); |
8807 | assert(type.vecsize == type.columns); |
8808 | |
8809 | // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid |
8810 | if (type.basetype != SPIRType::Float) |
8811 | SPIRV_CROSS_THROW("Unsupported type for matrix inverse"); |
8812 | |
8813 | bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision); |
8814 | require_polyfill(polyfill: static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)), |
8815 | relaxed); |
8816 | func = (options.es && relaxed) ? "spvInverseMP": "spvInverse"; |
8817 | } |
8818 | |
8819 | bool forward = should_forward(id: args[0]); |
8820 | auto &e = emit_op(result_type, result_id: id, rhs: join(ts&: func, ts: "(", ts: to_unpacked_expression(id: args[0]), ts: ")"), forwarding: forward); |
8821 | inherit_expression_dependencies(dst: id, source: args[0]); |
8822 | |
8823 | if (old_transpose) |
8824 | { |
8825 | e.need_transpose = true; |
8826 | a->need_transpose = true; |
8827 | } |
8828 | break; |
8829 | } |
8830 | |
8831 | // Lerping |
8832 | case GLSLstd450FMix: |
8833 | case GLSLstd450IMix: |
8834 | { |
8835 | emit_mix_op(result_type, id, left: args[0], right: args[1], lerp: args[2]); |
8836 | break; |
8837 | } |
8838 | case GLSLstd450Step: |
8839 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "step"); |
8840 | break; |
8841 | case GLSLstd450SmoothStep: |
8842 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "smoothstep"); |
8843 | break; |
8844 | |
8845 | // Packing |
8846 | case GLSLstd450Frexp: |
8847 | register_call_out_argument(id: args[1]); |
8848 | forced_temporaries.insert(x: id); |
8849 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "frexp"); |
8850 | break; |
8851 | |
8852 | case GLSLstd450FrexpStruct: |
8853 | { |
8854 | auto &type = get<SPIRType>(id: result_type); |
8855 | emit_uninitialized_temporary_expression(type: result_type, id); |
8856 | statement(ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts: "frexp(", ts: to_expression(id: args[0]), ts: ", ", |
8857 | ts: to_expression(id), ts: ".", ts: to_member_name(type, index: 1), ts: ");"); |
8858 | break; |
8859 | } |
8860 | |
8861 | case GLSLstd450Ldexp: |
8862 | { |
8863 | bool forward = should_forward(id: args[0]) && should_forward(id: args[1]); |
8864 | |
8865 | auto op0 = to_unpacked_expression(id: args[0]); |
8866 | auto op1 = to_unpacked_expression(id: args[1]); |
8867 | auto &op1_type = expression_type(id: args[1]); |
8868 | if (op1_type.basetype != SPIRType::Int) |
8869 | { |
8870 | // Need a value cast here. |
8871 | auto target_type = op1_type; |
8872 | target_type.basetype = SPIRType::Int; |
8873 | op1 = join(ts: type_to_glsl_constructor(type: target_type), ts: "(", ts&: op1, ts: ")"); |
8874 | } |
8875 | |
8876 | auto expr = join(ts: "ldexp(", ts&: op0, ts: ", ", ts&: op1, ts: ")"); |
8877 | |
8878 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
8879 | inherit_expression_dependencies(dst: id, source: args[0]); |
8880 | inherit_expression_dependencies(dst: id, source: args[1]); |
8881 | break; |
8882 | } |
8883 | |
8884 | case GLSLstd450PackSnorm4x8: |
8885 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm4x8"); |
8886 | break; |
8887 | case GLSLstd450PackUnorm4x8: |
8888 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm4x8"); |
8889 | break; |
8890 | case GLSLstd450PackSnorm2x16: |
8891 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packSnorm2x16"); |
8892 | break; |
8893 | case GLSLstd450PackUnorm2x16: |
8894 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packUnorm2x16"); |
8895 | break; |
8896 | case GLSLstd450PackHalf2x16: |
8897 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packHalf2x16"); |
8898 | break; |
8899 | case GLSLstd450UnpackSnorm4x8: |
8900 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm4x8"); |
8901 | break; |
8902 | case GLSLstd450UnpackUnorm4x8: |
8903 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm4x8"); |
8904 | break; |
8905 | case GLSLstd450UnpackSnorm2x16: |
8906 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackSnorm2x16"); |
8907 | break; |
8908 | case GLSLstd450UnpackUnorm2x16: |
8909 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackUnorm2x16"); |
8910 | break; |
8911 | case GLSLstd450UnpackHalf2x16: |
8912 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackHalf2x16"); |
8913 | break; |
8914 | |
8915 | case GLSLstd450PackDouble2x32: |
8916 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "packDouble2x32"); |
8917 | break; |
8918 | case GLSLstd450UnpackDouble2x32: |
8919 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "unpackDouble2x32"); |
8920 | break; |
8921 | |
8922 | // Vector math |
8923 | case GLSLstd450Length: |
8924 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "length"); |
8925 | break; |
8926 | case GLSLstd450Distance: |
8927 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "distance"); |
8928 | break; |
8929 | case GLSLstd450Cross: |
8930 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "cross"); |
8931 | break; |
8932 | case GLSLstd450Normalize: |
8933 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "normalize"); |
8934 | break; |
8935 | case GLSLstd450FaceForward: |
8936 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "faceforward"); |
8937 | break; |
8938 | case GLSLstd450Reflect: |
8939 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "reflect"); |
8940 | break; |
8941 | case GLSLstd450Refract: |
8942 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "refract"); |
8943 | break; |
8944 | |
8945 | // Bit-fiddling |
8946 | case GLSLstd450FindILsb: |
8947 | // findLSB always returns int. |
8948 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findLSB", input_type: expression_type(id: args[0]).basetype, expected_result_type: int_type); |
8949 | break; |
8950 | |
8951 | case GLSLstd450FindSMsb: |
8952 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: int_type, expected_result_type: int_type); |
8953 | break; |
8954 | |
8955 | case GLSLstd450FindUMsb: |
8956 | emit_unary_func_op_cast(result_type, result_id: id, op0: args[0], op: "findMSB", input_type: uint_type, |
8957 | expected_result_type: int_type); // findMSB always returns int. |
8958 | break; |
8959 | |
8960 | // Multisampled varying |
8961 | case GLSLstd450InterpolateAtCentroid: |
8962 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "interpolateAtCentroid"); |
8963 | break; |
8964 | case GLSLstd450InterpolateAtSample: |
8965 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtSample"); |
8966 | break; |
8967 | case GLSLstd450InterpolateAtOffset: |
8968 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtOffset"); |
8969 | break; |
8970 | |
8971 | case GLSLstd450NMin: |
8972 | case GLSLstd450NMax: |
8973 | { |
8974 | if (options.vulkan_semantics) |
8975 | { |
8976 | require_extension_internal(ext: "GL_EXT_spirv_intrinsics"); |
8977 | bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision); |
8978 | Polyfill poly = {}; |
8979 | switch (get<SPIRType>(id: result_type).width) |
8980 | { |
8981 | case 16: |
8982 | poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16; |
8983 | break; |
8984 | |
8985 | case 32: |
8986 | poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32; |
8987 | break; |
8988 | |
8989 | case 64: |
8990 | poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64; |
8991 | break; |
8992 | |
8993 | default: |
8994 | SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); |
8995 | } |
8996 | |
8997 | require_polyfill(polyfill: poly, relaxed); |
8998 | |
8999 | // Function return decorations are broken, so need to do double polyfill. |
9000 | if (relaxed) |
9001 | require_polyfill(polyfill: poly, relaxed: false); |
9002 | |
9003 | const char *op_str; |
9004 | if (relaxed) |
9005 | op_str = op == GLSLstd450NMin ? "spvNMinRelaxed": "spvNMaxRelaxed"; |
9006 | else |
9007 | op_str = op == GLSLstd450NMin ? "spvNMin": "spvNMax"; |
9008 | |
9009 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: op_str); |
9010 | } |
9011 | else |
9012 | { |
9013 | emit_nminmax_op(result_type, id, op0: args[0], op1: args[1], op); |
9014 | } |
9015 | break; |
9016 | } |
9017 | |
9018 | case GLSLstd450NClamp: |
9019 | { |
9020 | if (options.vulkan_semantics) |
9021 | { |
9022 | require_extension_internal(ext: "GL_EXT_spirv_intrinsics"); |
9023 | bool relaxed = has_decoration(id, decoration: DecorationRelaxedPrecision); |
9024 | Polyfill poly = {}; |
9025 | switch (get<SPIRType>(id: result_type).width) |
9026 | { |
9027 | case 16: |
9028 | poly = PolyfillNClamp16; |
9029 | break; |
9030 | |
9031 | case 32: |
9032 | poly = PolyfillNClamp32; |
9033 | break; |
9034 | |
9035 | case 64: |
9036 | poly = PolyfillNClamp64; |
9037 | break; |
9038 | |
9039 | default: |
9040 | SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); |
9041 | } |
9042 | |
9043 | require_polyfill(polyfill: poly, relaxed); |
9044 | |
9045 | // Function return decorations are broken, so need to do double polyfill. |
9046 | if (relaxed) |
9047 | require_polyfill(polyfill: poly, relaxed: false); |
9048 | |
9049 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: relaxed ? "spvNClampRelaxed": "spvNClamp"); |
9050 | } |
9051 | else |
9052 | { |
9053 | // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. |
9054 | // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. |
9055 | uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; |
9056 | if (!max_id) |
9057 | max_id = ir.increase_bound_by(count: 1); |
9058 | |
9059 | // Inherit precision qualifiers. |
9060 | ir.meta[max_id] = ir.meta[id]; |
9061 | |
9062 | emit_nminmax_op(result_type, id: max_id, op0: args[0], op1: args[1], op: GLSLstd450NMax); |
9063 | emit_nminmax_op(result_type, id, op0: max_id, op1: args[2], op: GLSLstd450NMin); |
9064 | } |
9065 | break; |
9066 | } |
9067 | |
9068 | default: |
9069 | statement(ts: "// unimplemented GLSL op ", ts&: eop); |
9070 | break; |
9071 | } |
9072 | } |
9073 | |
9074 | void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) |
9075 | { |
9076 | // Need to emulate this call. |
9077 | uint32_t &ids = extra_sub_expressions[id]; |
9078 | if (!ids) |
9079 | { |
9080 | ids = ir.increase_bound_by(count: 5); |
9081 | auto btype = get<SPIRType>(id: result_type); |
9082 | btype.basetype = SPIRType::Boolean; |
9083 | set<SPIRType>(id: ids, args&: btype); |
9084 | } |
9085 | |
9086 | uint32_t btype_id = ids + 0; |
9087 | uint32_t left_nan_id = ids + 1; |
9088 | uint32_t right_nan_id = ids + 2; |
9089 | uint32_t tmp_id = ids + 3; |
9090 | uint32_t mixed_first_id = ids + 4; |
9091 | |
9092 | // Inherit precision qualifiers. |
9093 | ir.meta[tmp_id] = ir.meta[id]; |
9094 | ir.meta[mixed_first_id] = ir.meta[id]; |
9095 | |
9096 | if (!is_legacy()) |
9097 | { |
9098 | emit_unary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op: "isnan"); |
9099 | emit_unary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op: "isnan"); |
9100 | } |
9101 | else if (expression_type(id: op0).vecsize > 1) |
9102 | { |
9103 | // If the number doesn't equal itself, it must be NaN |
9104 | emit_binary_func_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "notEqual"); |
9105 | emit_binary_func_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "notEqual"); |
9106 | } |
9107 | else |
9108 | { |
9109 | emit_binary_op(result_type: btype_id, result_id: left_nan_id, op0, op1: op0, op: "!="); |
9110 | emit_binary_op(result_type: btype_id, result_id: right_nan_id, op0: op1, op1, op: "!="); |
9111 | } |
9112 | emit_binary_func_op(result_type, result_id: tmp_id, op0, op1, op: op == GLSLstd450NMin ? "min": "max"); |
9113 | emit_mix_op(result_type, id: mixed_first_id, left: tmp_id, right: op1, lerp: left_nan_id); |
9114 | emit_mix_op(result_type, id, left: mixed_first_id, right: op0, lerp: right_nan_id); |
9115 | } |
9116 | |
9117 | void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op) |
9118 | { |
9119 | const char *one = backend.float_literal_suffix ? "1.0f": "1.0"; |
9120 | std::string expr; |
9121 | bool forward = should_forward(id: op0); |
9122 | |
9123 | switch (op) |
9124 | { |
9125 | case GLSLstd450Asinh: |
9126 | expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(", |
9127 | ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " + ", ts&: one, ts: "))"); |
9128 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
9129 | break; |
9130 | |
9131 | case GLSLstd450Acosh: |
9132 | expr = join(ts: "log(", ts: to_enclosed_expression(id: op0), ts: " + sqrt(", |
9133 | ts: to_enclosed_expression(id: op0), ts: " * ", ts: to_enclosed_expression(id: op0), ts: " - ", ts&: one, ts: "))"); |
9134 | break; |
9135 | |
9136 | case GLSLstd450Atanh: |
9137 | expr = join(ts: "log((", ts&: one, ts: " + ", ts: to_enclosed_expression(id: op0), ts: ") / " |
9138 | "(", ts&: one, ts: " - ", ts: to_enclosed_expression(id: op0), ts: ")) * 0.5", |
9139 | ts: backend.float_literal_suffix ? "f": ""); |
9140 | break; |
9141 | |
9142 | default: |
9143 | SPIRV_CROSS_THROW("Invalid op."); |
9144 | } |
9145 | |
9146 | emit_op(result_type, result_id: id, rhs: expr, forwarding: forward); |
9147 | inherit_expression_dependencies(dst: id, source: op0); |
9148 | } |
9149 | |
9150 | void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, |
9151 | uint32_t) |
9152 | { |
9153 | require_extension_internal(ext: "GL_AMD_shader_ballot"); |
9154 | |
9155 | enum AMDShaderBallot |
9156 | { |
9157 | SwizzleInvocationsAMD = 1, |
9158 | SwizzleInvocationsMaskedAMD = 2, |
9159 | WriteInvocationAMD = 3, |
9160 | MbcntAMD = 4 |
9161 | }; |
9162 | |
9163 | auto op = static_cast<AMDShaderBallot>(eop); |
9164 | |
9165 | switch (op) |
9166 | { |
9167 | case SwizzleInvocationsAMD: |
9168 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsAMD"); |
9169 | register_control_dependent_expression(expr: id); |
9170 | break; |
9171 | |
9172 | case SwizzleInvocationsMaskedAMD: |
9173 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "swizzleInvocationsMaskedAMD"); |
9174 | register_control_dependent_expression(expr: id); |
9175 | break; |
9176 | |
9177 | case WriteInvocationAMD: |
9178 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "writeInvocationAMD"); |
9179 | register_control_dependent_expression(expr: id); |
9180 | break; |
9181 | |
9182 | case MbcntAMD: |
9183 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "mbcntAMD"); |
9184 | register_control_dependent_expression(expr: id); |
9185 | break; |
9186 | |
9187 | default: |
9188 | statement(ts: "// unimplemented SPV AMD shader ballot op ", ts&: eop); |
9189 | break; |
9190 | } |
9191 | } |
9192 | |
9193 | void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, |
9194 | const uint32_t *args, uint32_t) |
9195 | { |
9196 | require_extension_internal(ext: "GL_AMD_shader_explicit_vertex_parameter"); |
9197 | |
9198 | enum AMDShaderExplicitVertexParameter |
9199 | { |
9200 | InterpolateAtVertexAMD = 1 |
9201 | }; |
9202 | |
9203 | auto op = static_cast<AMDShaderExplicitVertexParameter>(eop); |
9204 | |
9205 | switch (op) |
9206 | { |
9207 | case InterpolateAtVertexAMD: |
9208 | emit_binary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op: "interpolateAtVertexAMD"); |
9209 | break; |
9210 | |
9211 | default: |
9212 | statement(ts: "// unimplemented SPV AMD shader explicit vertex parameter op ", ts&: eop); |
9213 | break; |
9214 | } |
9215 | } |
9216 | |
9217 | void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, |
9218 | const uint32_t *args, uint32_t) |
9219 | { |
9220 | require_extension_internal(ext: "GL_AMD_shader_trinary_minmax"); |
9221 | |
9222 | enum AMDShaderTrinaryMinMax |
9223 | { |
9224 | FMin3AMD = 1, |
9225 | UMin3AMD = 2, |
9226 | SMin3AMD = 3, |
9227 | FMax3AMD = 4, |
9228 | UMax3AMD = 5, |
9229 | SMax3AMD = 6, |
9230 | FMid3AMD = 7, |
9231 | UMid3AMD = 8, |
9232 | SMid3AMD = 9 |
9233 | }; |
9234 | |
9235 | auto op = static_cast<AMDShaderTrinaryMinMax>(eop); |
9236 | |
9237 | switch (op) |
9238 | { |
9239 | case FMin3AMD: |
9240 | case UMin3AMD: |
9241 | case SMin3AMD: |
9242 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "min3"); |
9243 | break; |
9244 | |
9245 | case FMax3AMD: |
9246 | case UMax3AMD: |
9247 | case SMax3AMD: |
9248 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "max3"); |
9249 | break; |
9250 | |
9251 | case FMid3AMD: |
9252 | case UMid3AMD: |
9253 | case SMid3AMD: |
9254 | emit_trinary_func_op(result_type, result_id: id, op0: args[0], op1: args[1], op2: args[2], op: "mid3"); |
9255 | break; |
9256 | |
9257 | default: |
9258 | statement(ts: "// unimplemented SPV AMD shader trinary minmax op ", ts&: eop); |
9259 | break; |
9260 | } |
9261 | } |
9262 | |
9263 | void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, |
9264 | uint32_t) |
9265 | { |
9266 | require_extension_internal(ext: "GL_AMD_gcn_shader"); |
9267 | |
9268 | enum AMDGCNShader |
9269 | { |
9270 | CubeFaceIndexAMD = 1, |
9271 | CubeFaceCoordAMD = 2, |
9272 | TimeAMD = 3 |
9273 | }; |
9274 | |
9275 | auto op = static_cast<AMDGCNShader>(eop); |
9276 | |
9277 | switch (op) |
9278 | { |
9279 | case CubeFaceIndexAMD: |
9280 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceIndexAMD"); |
9281 | break; |
9282 | case CubeFaceCoordAMD: |
9283 | emit_unary_func_op(result_type, result_id: id, op0: args[0], op: "cubeFaceCoordAMD"); |
9284 | break; |
9285 | case TimeAMD: |
9286 | { |
9287 | string expr = "timeAMD()"; |
9288 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
9289 | register_control_dependent_expression(expr: id); |
9290 | break; |
9291 | } |
9292 | |
9293 | default: |
9294 | statement(ts: "// unimplemented SPV AMD gcn shader op ", ts&: eop); |
9295 | break; |
9296 | } |
9297 | } |
9298 | |
9299 | void CompilerGLSL::emit_subgroup_op(const Instruction &i) |
9300 | { |
9301 | const uint32_t *ops = stream(instr: i); |
9302 | auto op = static_cast<Op>(i.op); |
9303 | |
9304 | if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops)) |
9305 | SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); |
9306 | |
9307 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
9308 | uint32_t integer_width = get_integer_width_for_instruction(instr: i); |
9309 | auto int_type = to_signed_basetype(width: integer_width); |
9310 | auto uint_type = to_unsigned_basetype(width: integer_width); |
9311 | |
9312 | switch (op) |
9313 | { |
9314 | case OpGroupNonUniformElect: |
9315 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupElect); |
9316 | break; |
9317 | |
9318 | case OpGroupNonUniformBallotBitCount: |
9319 | { |
9320 | const GroupOperation operation = static_cast<GroupOperation>(ops[3]); |
9321 | if (operation == GroupOperationReduce) |
9322 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitCount); |
9323 | else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) |
9324 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); |
9325 | } |
9326 | break; |
9327 | |
9328 | case OpGroupNonUniformBallotBitExtract: |
9329 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); |
9330 | break; |
9331 | |
9332 | case OpGroupNonUniformInverseBallot: |
9333 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); |
9334 | break; |
9335 | |
9336 | case OpGroupNonUniformBallot: |
9337 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallot); |
9338 | break; |
9339 | |
9340 | case OpGroupNonUniformBallotFindLSB: |
9341 | case OpGroupNonUniformBallotFindMSB: |
9342 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); |
9343 | break; |
9344 | |
9345 | case OpGroupNonUniformBroadcast: |
9346 | case OpGroupNonUniformBroadcastFirst: |
9347 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBroadcast_First); |
9348 | break; |
9349 | |
9350 | case OpGroupNonUniformShuffle: |
9351 | case OpGroupNonUniformShuffleXor: |
9352 | require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle"); |
9353 | break; |
9354 | |
9355 | case OpGroupNonUniformShuffleUp: |
9356 | case OpGroupNonUniformShuffleDown: |
9357 | require_extension_internal(ext: "GL_KHR_shader_subgroup_shuffle_relative"); |
9358 | break; |
9359 | |
9360 | case OpGroupNonUniformAll: |
9361 | case OpGroupNonUniformAny: |
9362 | case OpGroupNonUniformAllEqual: |
9363 | { |
9364 | const SPIRType &type = expression_type(id: ops[3]); |
9365 | if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) |
9366 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); |
9367 | else |
9368 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupAllEqualT); |
9369 | } |
9370 | break; |
9371 | |
9372 | // clang-format off |
9373 | #define GLSL_GROUP_OP(OP)\ |
9374 | case OpGroupNonUniform##OP:\ |
9375 | {\ |
9376 | auto operation = static_cast<GroupOperation>(ops[3]);\ |
9377 | if (operation == GroupOperationClusteredReduce)\ |
9378 | require_extension_internal("GL_KHR_shader_subgroup_clustered");\ |
9379 | else if (operation == GroupOperationReduce)\ |
9380 | request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\ |
9381 | else if (operation == GroupOperationExclusiveScan)\ |
9382 | request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\ |
9383 | else if (operation == GroupOperationInclusiveScan)\ |
9384 | request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\ |
9385 | else\ |
9386 | SPIRV_CROSS_THROW("Invalid group operation.");\ |
9387 | break;\ |
9388 | } |
9389 | |
9390 | GLSL_GROUP_OP(IAdd) |
9391 | GLSL_GROUP_OP(FAdd) |
9392 | GLSL_GROUP_OP(IMul) |
9393 | GLSL_GROUP_OP(FMul) |
9394 | |
9395 | #undef GLSL_GROUP_OP |
9396 | // clang-format on |
9397 | |
9398 | case OpGroupNonUniformFMin: |
9399 | case OpGroupNonUniformFMax: |
9400 | case OpGroupNonUniformSMin: |
9401 | case OpGroupNonUniformSMax: |
9402 | case OpGroupNonUniformUMin: |
9403 | case OpGroupNonUniformUMax: |
9404 | case OpGroupNonUniformBitwiseAnd: |
9405 | case OpGroupNonUniformBitwiseOr: |
9406 | case OpGroupNonUniformBitwiseXor: |
9407 | case OpGroupNonUniformLogicalAnd: |
9408 | case OpGroupNonUniformLogicalOr: |
9409 | case OpGroupNonUniformLogicalXor: |
9410 | { |
9411 | auto operation = static_cast<GroupOperation>(ops[3]); |
9412 | if (operation == GroupOperationClusteredReduce) |
9413 | { |
9414 | require_extension_internal(ext: "GL_KHR_shader_subgroup_clustered"); |
9415 | } |
9416 | else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || |
9417 | operation == GroupOperationReduce) |
9418 | { |
9419 | require_extension_internal(ext: "GL_KHR_shader_subgroup_arithmetic"); |
9420 | } |
9421 | else |
9422 | SPIRV_CROSS_THROW("Invalid group operation."); |
9423 | break; |
9424 | } |
9425 | |
9426 | case OpGroupNonUniformQuadSwap: |
9427 | case OpGroupNonUniformQuadBroadcast: |
9428 | require_extension_internal(ext: "GL_KHR_shader_subgroup_quad"); |
9429 | break; |
9430 | |
9431 | default: |
9432 | SPIRV_CROSS_THROW("Invalid opcode for subgroup."); |
9433 | } |
9434 | |
9435 | uint32_t result_type = ops[0]; |
9436 | uint32_t id = ops[1]; |
9437 | |
9438 | auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2])); |
9439 | if (scope != ScopeSubgroup) |
9440 | SPIRV_CROSS_THROW("Only subgroup scope is supported."); |
9441 | |
9442 | switch (op) |
9443 | { |
9444 | case OpGroupNonUniformElect: |
9445 | emit_op(result_type, result_id: id, rhs: "subgroupElect()", forwarding: true); |
9446 | break; |
9447 | |
9448 | case OpGroupNonUniformBroadcast: |
9449 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBroadcast"); |
9450 | break; |
9451 | |
9452 | case OpGroupNonUniformBroadcastFirst: |
9453 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBroadcastFirst"); |
9454 | break; |
9455 | |
9456 | case OpGroupNonUniformBallot: |
9457 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallot"); |
9458 | break; |
9459 | |
9460 | case OpGroupNonUniformInverseBallot: |
9461 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupInverseBallot"); |
9462 | break; |
9463 | |
9464 | case OpGroupNonUniformBallotBitExtract: |
9465 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupBallotBitExtract"); |
9466 | break; |
9467 | |
9468 | case OpGroupNonUniformBallotFindLSB: |
9469 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindLSB"); |
9470 | break; |
9471 | |
9472 | case OpGroupNonUniformBallotFindMSB: |
9473 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupBallotFindMSB"); |
9474 | break; |
9475 | |
9476 | case OpGroupNonUniformBallotBitCount: |
9477 | { |
9478 | auto operation = static_cast<GroupOperation>(ops[3]); |
9479 | if (operation == GroupOperationReduce) |
9480 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotBitCount"); |
9481 | else if (operation == GroupOperationInclusiveScan) |
9482 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotInclusiveBitCount"); |
9483 | else if (operation == GroupOperationExclusiveScan) |
9484 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "subgroupBallotExclusiveBitCount"); |
9485 | else |
9486 | SPIRV_CROSS_THROW("Invalid BitCount operation."); |
9487 | break; |
9488 | } |
9489 | |
9490 | case OpGroupNonUniformShuffle: |
9491 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffle"); |
9492 | break; |
9493 | |
9494 | case OpGroupNonUniformShuffleXor: |
9495 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleXor"); |
9496 | break; |
9497 | |
9498 | case OpGroupNonUniformShuffleUp: |
9499 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleUp"); |
9500 | break; |
9501 | |
9502 | case OpGroupNonUniformShuffleDown: |
9503 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupShuffleDown"); |
9504 | break; |
9505 | |
9506 | case OpGroupNonUniformAll: |
9507 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAll"); |
9508 | break; |
9509 | |
9510 | case OpGroupNonUniformAny: |
9511 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAny"); |
9512 | break; |
9513 | |
9514 | case OpGroupNonUniformAllEqual: |
9515 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupAllEqual"); |
9516 | break; |
9517 | |
9518 | // clang-format off |
9519 | #define GLSL_GROUP_OP(op, glsl_op) \ |
9520 | case OpGroupNonUniform##op: \ |
9521 | { \ |
9522 | auto operation = static_cast<GroupOperation>(ops[3]); \ |
9523 | if (operation == GroupOperationReduce) \ |
9524 | emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ |
9525 | else if (operation == GroupOperationInclusiveScan) \ |
9526 | emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ |
9527 | else if (operation == GroupOperationExclusiveScan) \ |
9528 | emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ |
9529 | else if (operation == GroupOperationClusteredReduce) \ |
9530 | emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ |
9531 | else \ |
9532 | SPIRV_CROSS_THROW("Invalid group operation."); \ |
9533 | break; \ |
9534 | } |
9535 | |
9536 | #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ |
9537 | case OpGroupNonUniform##op: \ |
9538 | { \ |
9539 | auto operation = static_cast<GroupOperation>(ops[3]); \ |
9540 | if (operation == GroupOperationReduce) \ |
9541 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ |
9542 | else if (operation == GroupOperationInclusiveScan) \ |
9543 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ |
9544 | else if (operation == GroupOperationExclusiveScan) \ |
9545 | emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ |
9546 | else if (operation == GroupOperationClusteredReduce) \ |
9547 | emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ |
9548 | else \ |
9549 | SPIRV_CROSS_THROW("Invalid group operation."); \ |
9550 | break; \ |
9551 | } |
9552 | |
9553 | GLSL_GROUP_OP(FAdd, Add) |
9554 | GLSL_GROUP_OP(FMul, Mul) |
9555 | GLSL_GROUP_OP(FMin, Min) |
9556 | GLSL_GROUP_OP(FMax, Max) |
9557 | GLSL_GROUP_OP(IAdd, Add) |
9558 | GLSL_GROUP_OP(IMul, Mul) |
9559 | GLSL_GROUP_OP_CAST(SMin, Min, int_type) |
9560 | GLSL_GROUP_OP_CAST(SMax, Max, int_type) |
9561 | GLSL_GROUP_OP_CAST(UMin, Min, uint_type) |
9562 | GLSL_GROUP_OP_CAST(UMax, Max, uint_type) |
9563 | GLSL_GROUP_OP(BitwiseAnd, And) |
9564 | GLSL_GROUP_OP(BitwiseOr, Or) |
9565 | GLSL_GROUP_OP(BitwiseXor, Xor) |
9566 | GLSL_GROUP_OP(LogicalAnd, And) |
9567 | GLSL_GROUP_OP(LogicalOr, Or) |
9568 | GLSL_GROUP_OP(LogicalXor, Xor) |
9569 | #undef GLSL_GROUP_OP |
9570 | #undef GLSL_GROUP_OP_CAST |
9571 | // clang-format on |
9572 | |
9573 | case OpGroupNonUniformQuadSwap: |
9574 | { |
9575 | uint32_t direction = evaluate_constant_u32(id: ops[4]); |
9576 | if (direction == 0) |
9577 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapHorizontal"); |
9578 | else if (direction == 1) |
9579 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapVertical"); |
9580 | else if (direction == 2) |
9581 | emit_unary_func_op(result_type, result_id: id, op0: ops[3], op: "subgroupQuadSwapDiagonal"); |
9582 | else |
9583 | SPIRV_CROSS_THROW("Invalid quad swap direction."); |
9584 | break; |
9585 | } |
9586 | |
9587 | case OpGroupNonUniformQuadBroadcast: |
9588 | { |
9589 | emit_binary_func_op(result_type, result_id: id, op0: ops[3], op1: ops[4], op: "subgroupQuadBroadcast"); |
9590 | break; |
9591 | } |
9592 | |
9593 | default: |
9594 | SPIRV_CROSS_THROW("Invalid opcode for subgroup."); |
9595 | } |
9596 | |
9597 | register_control_dependent_expression(expr: id); |
9598 | } |
9599 | |
9600 | string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) |
9601 | { |
9602 | // OpBitcast can deal with pointers. |
9603 | if (out_type.pointer || in_type.pointer) |
9604 | { |
9605 | if (out_type.vecsize == 2 || in_type.vecsize == 2) |
9606 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2"); |
9607 | return type_to_glsl(type: out_type); |
9608 | } |
9609 | |
9610 | if (out_type.basetype == in_type.basetype) |
9611 | return ""; |
9612 | |
9613 | assert(out_type.basetype != SPIRType::Boolean); |
9614 | assert(in_type.basetype != SPIRType::Boolean); |
9615 | |
9616 | bool integral_cast = type_is_integral(type: out_type) && type_is_integral(type: in_type); |
9617 | bool same_size_cast = out_type.width == in_type.width; |
9618 | |
9619 | // Trivial bitcast case, casts between integers. |
9620 | if (integral_cast && same_size_cast) |
9621 | return type_to_glsl(type: out_type); |
9622 | |
9623 | // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). |
9624 | if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) |
9625 | return "unpack8"; |
9626 | else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) |
9627 | return "pack16"; |
9628 | else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) |
9629 | return "pack32"; |
9630 | |
9631 | // Floating <-> Integer special casts. Just have to enumerate all cases. :( |
9632 | // 16-bit, 32-bit and 64-bit floats. |
9633 | if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) |
9634 | { |
9635 | if (is_legacy_es()) |
9636 | SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); |
9637 | else if (!options.es && options.version < 330) |
9638 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding"); |
9639 | return "floatBitsToUint"; |
9640 | } |
9641 | else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) |
9642 | { |
9643 | if (is_legacy_es()) |
9644 | SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); |
9645 | else if (!options.es && options.version < 330) |
9646 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding"); |
9647 | return "floatBitsToInt"; |
9648 | } |
9649 | else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) |
9650 | { |
9651 | if (is_legacy_es()) |
9652 | SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); |
9653 | else if (!options.es && options.version < 330) |
9654 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding"); |
9655 | return "uintBitsToFloat"; |
9656 | } |
9657 | else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) |
9658 | { |
9659 | if (is_legacy_es()) |
9660 | SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); |
9661 | else if (!options.es && options.version < 330) |
9662 | require_extension_internal(ext: "GL_ARB_shader_bit_encoding"); |
9663 | return "intBitsToFloat"; |
9664 | } |
9665 | |
9666 | else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) |
9667 | return "doubleBitsToInt64"; |
9668 | else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) |
9669 | return "doubleBitsToUint64"; |
9670 | else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) |
9671 | return "int64BitsToDouble"; |
9672 | else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) |
9673 | return "uint64BitsToDouble"; |
9674 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) |
9675 | return "float16BitsToInt16"; |
9676 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) |
9677 | return "float16BitsToUint16"; |
9678 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) |
9679 | return "int16BitsToFloat16"; |
9680 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) |
9681 | return "uint16BitsToFloat16"; |
9682 | |
9683 | // And finally, some even more special purpose casts. |
9684 | if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) |
9685 | return "packUint2x32"; |
9686 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) |
9687 | return "unpackUint2x32"; |
9688 | else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) |
9689 | return "unpackFloat2x16"; |
9690 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) |
9691 | return "packFloat2x16"; |
9692 | else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) |
9693 | return "packInt2x16"; |
9694 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) |
9695 | return "unpackInt2x16"; |
9696 | else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) |
9697 | return "packUint2x16"; |
9698 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) |
9699 | return "unpackUint2x16"; |
9700 | else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) |
9701 | return "packInt4x16"; |
9702 | else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) |
9703 | return "unpackInt4x16"; |
9704 | else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) |
9705 | return "packUint4x16"; |
9706 | else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) |
9707 | return "unpackUint4x16"; |
9708 | |
9709 | return ""; |
9710 | } |
9711 | |
9712 | string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) |
9713 | { |
9714 | auto op = bitcast_glsl_op(out_type: result_type, in_type: expression_type(id: argument)); |
9715 | if (op.empty()) |
9716 | return to_enclosed_unpacked_expression(id: argument); |
9717 | else |
9718 | return join(ts&: op, ts: "(", ts: to_unpacked_expression(id: argument), ts: ")"); |
9719 | } |
9720 | |
9721 | std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) |
9722 | { |
9723 | auto expr = to_expression(id: arg); |
9724 | auto &src_type = expression_type(id: arg); |
9725 | if (src_type.basetype != target_type) |
9726 | { |
9727 | auto target = src_type; |
9728 | target.basetype = target_type; |
9729 | expr = join(ts: bitcast_glsl_op(out_type: target, in_type: src_type), ts: "(", ts&: expr, ts: ")"); |
9730 | } |
9731 | |
9732 | return expr; |
9733 | } |
9734 | |
9735 | std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, |
9736 | const std::string &expr) |
9737 | { |
9738 | if (target_type.basetype == expr_type) |
9739 | return expr; |
9740 | |
9741 | auto src_type = target_type; |
9742 | src_type.basetype = expr_type; |
9743 | return join(ts: bitcast_glsl_op(out_type: target_type, in_type: src_type), ts: "(", ts: expr, ts: ")"); |
9744 | } |
9745 | |
9746 | string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) |
9747 | { |
9748 | switch (builtin) |
9749 | { |
9750 | case BuiltInPosition: |
9751 | return "gl_Position"; |
9752 | case BuiltInPointSize: |
9753 | return "gl_PointSize"; |
9754 | case BuiltInClipDistance: |
9755 | { |
9756 | if (options.es) |
9757 | require_extension_internal(ext: "GL_EXT_clip_cull_distance"); |
9758 | return "gl_ClipDistance"; |
9759 | } |
9760 | case BuiltInCullDistance: |
9761 | { |
9762 | if (options.es) |
9763 | require_extension_internal(ext: "GL_EXT_clip_cull_distance"); |
9764 | return "gl_CullDistance"; |
9765 | } |
9766 | case BuiltInVertexId: |
9767 | if (options.vulkan_semantics) |
9768 | SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " |
9769 | "with GL semantics."); |
9770 | return "gl_VertexID"; |
9771 | case BuiltInInstanceId: |
9772 | if (options.vulkan_semantics) |
9773 | { |
9774 | auto model = get_entry_point().model; |
9775 | switch (model) |
9776 | { |
9777 | case spv::ExecutionModelIntersectionKHR: |
9778 | case spv::ExecutionModelAnyHitKHR: |
9779 | case spv::ExecutionModelClosestHitKHR: |
9780 | // gl_InstanceID is allowed in these shaders. |
9781 | break; |
9782 | |
9783 | default: |
9784 | SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " |
9785 | "created with GL semantics."); |
9786 | } |
9787 | } |
9788 | if (!options.es && options.version < 140) |
9789 | { |
9790 | require_extension_internal(ext: "GL_ARB_draw_instanced"); |
9791 | } |
9792 | return "gl_InstanceID"; |
9793 | case BuiltInVertexIndex: |
9794 | if (options.vulkan_semantics) |
9795 | return "gl_VertexIndex"; |
9796 | else |
9797 | return "gl_VertexID"; // gl_VertexID already has the base offset applied. |
9798 | case BuiltInInstanceIndex: |
9799 | if (options.vulkan_semantics) |
9800 | return "gl_InstanceIndex"; |
9801 | |
9802 | if (!options.es && options.version < 140) |
9803 | { |
9804 | require_extension_internal(ext: "GL_ARB_draw_instanced"); |
9805 | } |
9806 | |
9807 | if (options.vertex.support_nonzero_base_instance) |
9808 | { |
9809 | if (!options.vulkan_semantics) |
9810 | { |
9811 | // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. |
9812 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9813 | } |
9814 | return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. |
9815 | } |
9816 | else |
9817 | return "gl_InstanceID"; |
9818 | case BuiltInPrimitiveId: |
9819 | if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) |
9820 | return "gl_PrimitiveIDIn"; |
9821 | else |
9822 | return "gl_PrimitiveID"; |
9823 | case BuiltInInvocationId: |
9824 | return "gl_InvocationID"; |
9825 | case BuiltInLayer: |
9826 | return "gl_Layer"; |
9827 | case BuiltInViewportIndex: |
9828 | return "gl_ViewportIndex"; |
9829 | case BuiltInTessLevelOuter: |
9830 | return "gl_TessLevelOuter"; |
9831 | case BuiltInTessLevelInner: |
9832 | return "gl_TessLevelInner"; |
9833 | case BuiltInTessCoord: |
9834 | return "gl_TessCoord"; |
9835 | case BuiltInPatchVertices: |
9836 | return "gl_PatchVerticesIn"; |
9837 | case BuiltInFragCoord: |
9838 | return "gl_FragCoord"; |
9839 | case BuiltInPointCoord: |
9840 | return "gl_PointCoord"; |
9841 | case BuiltInFrontFacing: |
9842 | return "gl_FrontFacing"; |
9843 | case BuiltInFragDepth: |
9844 | return "gl_FragDepth"; |
9845 | case BuiltInNumWorkgroups: |
9846 | return "gl_NumWorkGroups"; |
9847 | case BuiltInWorkgroupSize: |
9848 | return "gl_WorkGroupSize"; |
9849 | case BuiltInWorkgroupId: |
9850 | return "gl_WorkGroupID"; |
9851 | case BuiltInLocalInvocationId: |
9852 | return "gl_LocalInvocationID"; |
9853 | case BuiltInGlobalInvocationId: |
9854 | return "gl_GlobalInvocationID"; |
9855 | case BuiltInLocalInvocationIndex: |
9856 | return "gl_LocalInvocationIndex"; |
9857 | case BuiltInHelperInvocation: |
9858 | return "gl_HelperInvocation"; |
9859 | |
9860 | case BuiltInBaseVertex: |
9861 | if (options.es) |
9862 | SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); |
9863 | |
9864 | if (options.vulkan_semantics) |
9865 | { |
9866 | if (options.version < 460) |
9867 | { |
9868 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9869 | return "gl_BaseVertexARB"; |
9870 | } |
9871 | return "gl_BaseVertex"; |
9872 | } |
9873 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
9874 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9875 | return "SPIRV_Cross_BaseVertex"; |
9876 | |
9877 | case BuiltInBaseInstance: |
9878 | if (options.es) |
9879 | SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); |
9880 | |
9881 | if (options.vulkan_semantics) |
9882 | { |
9883 | if (options.version < 460) |
9884 | { |
9885 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9886 | return "gl_BaseInstanceARB"; |
9887 | } |
9888 | return "gl_BaseInstance"; |
9889 | } |
9890 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
9891 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9892 | return "SPIRV_Cross_BaseInstance"; |
9893 | |
9894 | case BuiltInDrawIndex: |
9895 | if (options.es) |
9896 | SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); |
9897 | |
9898 | if (options.vulkan_semantics) |
9899 | { |
9900 | if (options.version < 460) |
9901 | { |
9902 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9903 | return "gl_DrawIDARB"; |
9904 | } |
9905 | return "gl_DrawID"; |
9906 | } |
9907 | // On regular GL, this is soft-enabled and we emit ifdefs in code. |
9908 | require_extension_internal(ext: "GL_ARB_shader_draw_parameters"); |
9909 | return "gl_DrawIDARB"; |
9910 | |
9911 | case BuiltInSampleId: |
9912 | if (is_legacy()) |
9913 | SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); |
9914 | else if (options.es && options.version < 320) |
9915 | require_extension_internal(ext: "GL_OES_sample_variables"); |
9916 | else if (!options.es && options.version < 400) |
9917 | require_extension_internal(ext: "GL_ARB_sample_shading"); |
9918 | return "gl_SampleID"; |
9919 | |
9920 | case BuiltInSampleMask: |
9921 | if (is_legacy()) |
9922 | SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); |
9923 | else if (options.es && options.version < 320) |
9924 | require_extension_internal(ext: "GL_OES_sample_variables"); |
9925 | else if (!options.es && options.version < 400) |
9926 | require_extension_internal(ext: "GL_ARB_sample_shading"); |
9927 | |
9928 | if (storage == StorageClassInput) |
9929 | return "gl_SampleMaskIn"; |
9930 | else |
9931 | return "gl_SampleMask"; |
9932 | |
9933 | case BuiltInSamplePosition: |
9934 | if (is_legacy()) |
9935 | SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); |
9936 | else if (options.es && options.version < 320) |
9937 | require_extension_internal(ext: "GL_OES_sample_variables"); |
9938 | else if (!options.es && options.version < 400) |
9939 | require_extension_internal(ext: "GL_ARB_sample_shading"); |
9940 | return "gl_SamplePosition"; |
9941 | |
9942 | case BuiltInViewIndex: |
9943 | if (options.vulkan_semantics) |
9944 | return "gl_ViewIndex"; |
9945 | else |
9946 | return "gl_ViewID_OVR"; |
9947 | |
9948 | case BuiltInNumSubgroups: |
9949 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::NumSubgroups); |
9950 | return "gl_NumSubgroups"; |
9951 | |
9952 | case BuiltInSubgroupId: |
9953 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupID); |
9954 | return "gl_SubgroupID"; |
9955 | |
9956 | case BuiltInSubgroupSize: |
9957 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupSize); |
9958 | return "gl_SubgroupSize"; |
9959 | |
9960 | case BuiltInSubgroupLocalInvocationId: |
9961 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupInvocationID); |
9962 | return "gl_SubgroupInvocationID"; |
9963 | |
9964 | case BuiltInSubgroupEqMask: |
9965 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
9966 | return "gl_SubgroupEqMask"; |
9967 | |
9968 | case BuiltInSubgroupGeMask: |
9969 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
9970 | return "gl_SubgroupGeMask"; |
9971 | |
9972 | case BuiltInSubgroupGtMask: |
9973 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
9974 | return "gl_SubgroupGtMask"; |
9975 | |
9976 | case BuiltInSubgroupLeMask: |
9977 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
9978 | return "gl_SubgroupLeMask"; |
9979 | |
9980 | case BuiltInSubgroupLtMask: |
9981 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMask); |
9982 | return "gl_SubgroupLtMask"; |
9983 | |
9984 | case BuiltInLaunchIdKHR: |
9985 | return ray_tracing_is_khr ? "gl_LaunchIDEXT": "gl_LaunchIDNV"; |
9986 | case BuiltInLaunchSizeKHR: |
9987 | return ray_tracing_is_khr ? "gl_LaunchSizeEXT": "gl_LaunchSizeNV"; |
9988 | case BuiltInWorldRayOriginKHR: |
9989 | return ray_tracing_is_khr ? "gl_WorldRayOriginEXT": "gl_WorldRayOriginNV"; |
9990 | case BuiltInWorldRayDirectionKHR: |
9991 | return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT": "gl_WorldRayDirectionNV"; |
9992 | case BuiltInObjectRayOriginKHR: |
9993 | return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT": "gl_ObjectRayOriginNV"; |
9994 | case BuiltInObjectRayDirectionKHR: |
9995 | return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT": "gl_ObjectRayDirectionNV"; |
9996 | case BuiltInRayTminKHR: |
9997 | return ray_tracing_is_khr ? "gl_RayTminEXT": "gl_RayTminNV"; |
9998 | case BuiltInRayTmaxKHR: |
9999 | return ray_tracing_is_khr ? "gl_RayTmaxEXT": "gl_RayTmaxNV"; |
10000 | case BuiltInInstanceCustomIndexKHR: |
10001 | return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT": "gl_InstanceCustomIndexNV"; |
10002 | case BuiltInObjectToWorldKHR: |
10003 | return ray_tracing_is_khr ? "gl_ObjectToWorldEXT": "gl_ObjectToWorldNV"; |
10004 | case BuiltInWorldToObjectKHR: |
10005 | return ray_tracing_is_khr ? "gl_WorldToObjectEXT": "gl_WorldToObjectNV"; |
10006 | case BuiltInHitTNV: |
10007 | // gl_HitTEXT is an alias of RayTMax in KHR. |
10008 | return "gl_HitTNV"; |
10009 | case BuiltInHitKindKHR: |
10010 | return ray_tracing_is_khr ? "gl_HitKindEXT": "gl_HitKindNV"; |
10011 | case BuiltInIncomingRayFlagsKHR: |
10012 | return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT": "gl_IncomingRayFlagsNV"; |
10013 | |
10014 | case BuiltInBaryCoordKHR: |
10015 | { |
10016 | if (options.es && options.version < 320) |
10017 | SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); |
10018 | else if (!options.es && options.version < 450) |
10019 | SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); |
10020 | |
10021 | if (barycentric_is_nv) |
10022 | { |
10023 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric"); |
10024 | return "gl_BaryCoordNV"; |
10025 | } |
10026 | else |
10027 | { |
10028 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric"); |
10029 | return "gl_BaryCoordEXT"; |
10030 | } |
10031 | } |
10032 | |
10033 | case BuiltInBaryCoordNoPerspNV: |
10034 | { |
10035 | if (options.es && options.version < 320) |
10036 | SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); |
10037 | else if (!options.es && options.version < 450) |
10038 | SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); |
10039 | |
10040 | if (barycentric_is_nv) |
10041 | { |
10042 | require_extension_internal(ext: "GL_NV_fragment_shader_barycentric"); |
10043 | return "gl_BaryCoordNoPerspNV"; |
10044 | } |
10045 | else |
10046 | { |
10047 | require_extension_internal(ext: "GL_EXT_fragment_shader_barycentric"); |
10048 | return "gl_BaryCoordNoPerspEXT"; |
10049 | } |
10050 | } |
10051 | |
10052 | case BuiltInFragStencilRefEXT: |
10053 | { |
10054 | if (!options.es) |
10055 | { |
10056 | require_extension_internal(ext: "GL_ARB_shader_stencil_export"); |
10057 | return "gl_FragStencilRefARB"; |
10058 | } |
10059 | else |
10060 | SPIRV_CROSS_THROW("Stencil export not supported in GLES."); |
10061 | } |
10062 | |
10063 | case BuiltInPrimitiveShadingRateKHR: |
10064 | { |
10065 | if (!options.vulkan_semantics) |
10066 | SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL."); |
10067 | require_extension_internal(ext: "GL_EXT_fragment_shading_rate"); |
10068 | return "gl_PrimitiveShadingRateEXT"; |
10069 | } |
10070 | |
10071 | case BuiltInShadingRateKHR: |
10072 | { |
10073 | if (!options.vulkan_semantics) |
10074 | SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL."); |
10075 | require_extension_internal(ext: "GL_EXT_fragment_shading_rate"); |
10076 | return "gl_ShadingRateEXT"; |
10077 | } |
10078 | |
10079 | case BuiltInDeviceIndex: |
10080 | if (!options.vulkan_semantics) |
10081 | SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); |
10082 | require_extension_internal(ext: "GL_EXT_device_group"); |
10083 | return "gl_DeviceIndex"; |
10084 | |
10085 | case BuiltInFullyCoveredEXT: |
10086 | if (!options.es) |
10087 | require_extension_internal(ext: "GL_NV_conservative_raster_underestimation"); |
10088 | else |
10089 | SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); |
10090 | return "gl_FragFullyCoveredNV"; |
10091 | |
10092 | case BuiltInPrimitiveTriangleIndicesEXT: |
10093 | return "gl_PrimitiveTriangleIndicesEXT"; |
10094 | case BuiltInPrimitiveLineIndicesEXT: |
10095 | return "gl_PrimitiveLineIndicesEXT"; |
10096 | case BuiltInPrimitivePointIndicesEXT: |
10097 | return "gl_PrimitivePointIndicesEXT"; |
10098 | case BuiltInCullPrimitiveEXT: |
10099 | return "gl_CullPrimitiveEXT"; |
10100 | |
10101 | default: |
10102 | return join(ts: "gl_BuiltIn_", ts: convert_to_string(t: builtin)); |
10103 | } |
10104 | } |
10105 | |
10106 | const char *CompilerGLSL::index_to_swizzle(uint32_t index) |
10107 | { |
10108 | switch (index) |
10109 | { |
10110 | case 0: |
10111 | return "x"; |
10112 | case 1: |
10113 | return "y"; |
10114 | case 2: |
10115 | return "z"; |
10116 | case 3: |
10117 | return "w"; |
10118 | default: |
10119 | return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. |
10120 | } |
10121 | } |
10122 | |
10123 | void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, |
10124 | AccessChainFlags flags, bool &access_chain_is_arrayed, |
10125 | uint32_t index) |
10126 | { |
10127 | bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; |
10128 | bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; |
10129 | bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; |
10130 | |
10131 | string idx_expr = index_is_literal ? convert_to_string(t: index) : to_unpacked_expression(id: index, register_expression_read); |
10132 | |
10133 | // For the case where the base of an OpPtrAccessChain already ends in [n], |
10134 | // we need to use the index as an offset to the existing index, otherwise, |
10135 | // we can just use the index directly. |
10136 | if (ptr_chain && access_chain_is_arrayed) |
10137 | { |
10138 | size_t split_pos = expr.find_last_of(c: ']'); |
10139 | size_t enclose_split = expr.find_last_of(c: ')'); |
10140 | |
10141 | // If we have already enclosed the expression, don't try to be clever, it will break. |
10142 | if (split_pos > enclose_split || enclose_split == string::npos) |
10143 | { |
10144 | string expr_front = expr.substr(pos: 0, n: split_pos); |
10145 | string expr_back = expr.substr(pos: split_pos); |
10146 | expr = expr_front + " + "+ enclose_expression(expr: idx_expr) + expr_back; |
10147 | return; |
10148 | } |
10149 | } |
10150 | |
10151 | expr += "["; |
10152 | expr += idx_expr; |
10153 | expr += "]"; |
10154 | } |
10155 | |
10156 | bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) |
10157 | { |
10158 | return true; |
10159 | } |
10160 | |
10161 | string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, |
10162 | AccessChainFlags flags, AccessChainMeta *meta) |
10163 | { |
10164 | string expr; |
10165 | |
10166 | bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; |
10167 | bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; |
10168 | bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; |
10169 | bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; |
10170 | bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; |
10171 | bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; |
10172 | |
10173 | if (!chain_only) |
10174 | { |
10175 | // We handle transpose explicitly, so don't resolve that here. |
10176 | auto *e = maybe_get<SPIRExpression>(id: base); |
10177 | bool old_transpose = e && e->need_transpose; |
10178 | if (e) |
10179 | e->need_transpose = false; |
10180 | expr = to_enclosed_expression(id: base, register_expression_read); |
10181 | if (e) |
10182 | e->need_transpose = old_transpose; |
10183 | } |
10184 | |
10185 | // Start traversing type hierarchy at the proper non-pointer types, |
10186 | // but keep type_id referencing the original pointer for use below. |
10187 | uint32_t type_id = expression_type_id(id: base); |
10188 | const auto *type = &get_pointee_type(type_id); |
10189 | |
10190 | if (!backend.native_pointers) |
10191 | { |
10192 | if (ptr_chain) |
10193 | SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); |
10194 | |
10195 | // Wrapped buffer reference pointer types will need to poke into the internal "value" member before |
10196 | // continuing the access chain. |
10197 | if (should_dereference(id: base)) |
10198 | expr = dereference_expression(expr_type: get<SPIRType>(id: type_id), expr); |
10199 | } |
10200 | else if (should_dereference(id: base) && type->basetype != SPIRType::Struct && !ptr_chain) |
10201 | expr = join(ts: "(", ts: dereference_expression(expr_type: *type, expr), ts: ")"); |
10202 | |
10203 | bool access_chain_is_arrayed = expr.find_first_of(c: '[') != string::npos; |
10204 | bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(id: base); |
10205 | bool is_packed = has_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
10206 | uint32_t physical_type = get_extended_decoration(id: base, decoration: SPIRVCrossDecorationPhysicalTypeID); |
10207 | bool is_invariant = has_decoration(id: base, decoration: DecorationInvariant); |
10208 | bool relaxed_precision = has_decoration(id: base, decoration: DecorationRelaxedPrecision); |
10209 | bool pending_array_enclose = false; |
10210 | bool dimension_flatten = false; |
10211 | bool access_meshlet_position_y = false; |
10212 | |
10213 | if (auto *base_expr = maybe_get<SPIRExpression>(id: base)) |
10214 | { |
10215 | access_meshlet_position_y = base_expr->access_meshlet_position_y; |
10216 | } |
10217 | |
10218 | // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden |
10219 | bool hide_first_subscript = count > 1 && is_user_type_structured(id: base); |
10220 | |
10221 | const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { |
10222 | AccessChainFlags mod_flags = flags; |
10223 | if (!is_literal) |
10224 | mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; |
10225 | if (!is_ptr_chain) |
10226 | mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT; |
10227 | access_chain_internal_append_index(expr, base, type, flags: mod_flags, access_chain_is_arrayed, index); |
10228 | check_physical_type_cast(expr, type, physical_type); |
10229 | }; |
10230 | |
10231 | for (uint32_t i = 0; i < count; i++) |
10232 | { |
10233 | uint32_t index = indices[i]; |
10234 | |
10235 | bool is_literal = index_is_literal; |
10236 | if (is_literal && msb_is_id && (index >> 31u) != 0u) |
10237 | { |
10238 | is_literal = false; |
10239 | index &= 0x7fffffffu; |
10240 | } |
10241 | |
10242 | bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(type: *type); |
10243 | |
10244 | if (ptr_chain_array_entry) |
10245 | { |
10246 | // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed. |
10247 | // We are considered to have a pointer to array and one element shifts by one array at a time. |
10248 | // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness, |
10249 | // so we have to take pointer to array explicitly. |
10250 | if (!should_dereference(id: base)) |
10251 | expr = enclose_expression(expr: address_of_expression(expr)); |
10252 | } |
10253 | |
10254 | if (ptr_chain && i == 0) |
10255 | { |
10256 | // Pointer chains |
10257 | // If we are flattening multidimensional arrays, only create opening bracket on first |
10258 | // array index. |
10259 | if (options.flatten_multidimensional_arrays) |
10260 | { |
10261 | dimension_flatten = type->array.size() >= 1; |
10262 | pending_array_enclose = dimension_flatten; |
10263 | if (pending_array_enclose) |
10264 | expr += "["; |
10265 | } |
10266 | |
10267 | if (options.flatten_multidimensional_arrays && dimension_flatten) |
10268 | { |
10269 | // If we are flattening multidimensional arrays, do manual stride computation. |
10270 | if (is_literal) |
10271 | expr += convert_to_string(t: index); |
10272 | else |
10273 | expr += to_enclosed_expression(id: index, register_expression_read); |
10274 | |
10275 | for (auto j = uint32_t(type->array.size()); j; j--) |
10276 | { |
10277 | expr += " * "; |
10278 | expr += enclose_expression(expr: to_array_size(type: *type, index: j - 1)); |
10279 | } |
10280 | |
10281 | if (type->array.empty()) |
10282 | pending_array_enclose = false; |
10283 | else |
10284 | expr += " + "; |
10285 | |
10286 | if (!pending_array_enclose) |
10287 | expr += "]"; |
10288 | } |
10289 | else |
10290 | { |
10291 | append_index(index, is_literal, true); |
10292 | } |
10293 | |
10294 | if (type->basetype == SPIRType::ControlPointArray) |
10295 | { |
10296 | type_id = type->parent_type; |
10297 | type = &get<SPIRType>(id: type_id); |
10298 | } |
10299 | |
10300 | access_chain_is_arrayed = true; |
10301 | |
10302 | // Explicitly enclose the expression if this is one of the weird pointer-to-array cases. |
10303 | // We don't want any future indexing to add to this array dereference. |
10304 | // Enclosing the expression blocks that and avoids any shenanigans with operand priority. |
10305 | if (ptr_chain_array_entry) |
10306 | expr = join(ts: "(", ts&: expr, ts: ")"); |
10307 | } |
10308 | // Arrays |
10309 | else if (!type->array.empty()) |
10310 | { |
10311 | // If we are flattening multidimensional arrays, only create opening bracket on first |
10312 | // array index. |
10313 | if (options.flatten_multidimensional_arrays && !pending_array_enclose) |
10314 | { |
10315 | dimension_flatten = type->array.size() > 1; |
10316 | pending_array_enclose = dimension_flatten; |
10317 | if (pending_array_enclose) |
10318 | expr += "["; |
10319 | } |
10320 | |
10321 | assert(type->parent_type); |
10322 | |
10323 | auto *var = maybe_get<SPIRVariable>(id: base); |
10324 | if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(var: *var) && |
10325 | !has_decoration(id: type->self, decoration: DecorationBlock)) |
10326 | { |
10327 | // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. |
10328 | // Normally, these variables live in blocks when compiled from GLSL, |
10329 | // but HLSL seems to just emit straight arrays here. |
10330 | // We must pretend this access goes through gl_in/gl_out arrays |
10331 | // to be able to access certain builtins as arrays. |
10332 | // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. |
10333 | auto builtin = ir.meta[base].decoration.builtin_type; |
10334 | bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; |
10335 | |
10336 | switch (builtin) |
10337 | { |
10338 | case BuiltInCullDistance: |
10339 | case BuiltInClipDistance: |
10340 | if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here. |
10341 | { |
10342 | append_index(index, is_literal); |
10343 | break; |
10344 | } |
10345 | // fallthrough |
10346 | case BuiltInPosition: |
10347 | case BuiltInPointSize: |
10348 | if (mesh_shader) |
10349 | expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10350 | else if (var->storage == StorageClassInput) |
10351 | expr = join(ts: "gl_in[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10352 | else if (var->storage == StorageClassOutput) |
10353 | expr = join(ts: "gl_out[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10354 | else |
10355 | append_index(index, is_literal); |
10356 | break; |
10357 | |
10358 | case BuiltInPrimitiveId: |
10359 | case BuiltInLayer: |
10360 | case BuiltInViewportIndex: |
10361 | case BuiltInCullPrimitiveEXT: |
10362 | case BuiltInPrimitiveShadingRateKHR: |
10363 | if (mesh_shader) |
10364 | expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10365 | else |
10366 | append_index(index, is_literal); |
10367 | break; |
10368 | |
10369 | default: |
10370 | append_index(index, is_literal); |
10371 | break; |
10372 | } |
10373 | } |
10374 | else if (backend.force_merged_mesh_block && i == 0 && var && |
10375 | !is_builtin_variable(var: *var) && var->storage == StorageClassOutput) |
10376 | { |
10377 | if (is_per_primitive_variable(var: *var)) |
10378 | expr = join(ts: "gl_MeshPrimitivesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10379 | else |
10380 | expr = join(ts: "gl_MeshVerticesEXT[", ts: to_expression(id: index, register_expression_read), ts: "].", ts&: expr); |
10381 | } |
10382 | else if (options.flatten_multidimensional_arrays && dimension_flatten) |
10383 | { |
10384 | // If we are flattening multidimensional arrays, do manual stride computation. |
10385 | auto &parent_type = get<SPIRType>(id: type->parent_type); |
10386 | |
10387 | if (is_literal) |
10388 | expr += convert_to_string(t: index); |
10389 | else |
10390 | expr += to_enclosed_expression(id: index, register_expression_read); |
10391 | |
10392 | for (auto j = uint32_t(parent_type.array.size()); j; j--) |
10393 | { |
10394 | expr += " * "; |
10395 | expr += enclose_expression(expr: to_array_size(type: parent_type, index: j - 1)); |
10396 | } |
10397 | |
10398 | if (parent_type.array.empty()) |
10399 | pending_array_enclose = false; |
10400 | else |
10401 | expr += " + "; |
10402 | |
10403 | if (!pending_array_enclose) |
10404 | expr += "]"; |
10405 | } |
10406 | else if (index_is_literal || !builtin_translates_to_nonarray(builtin: BuiltIn(get_decoration(id: base, decoration: DecorationBuiltIn)))) |
10407 | { |
10408 | // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. |
10409 | // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. |
10410 | // For literal indices we are working on composites, so we ignore this since we have already converted to proper array. |
10411 | append_index(index, is_literal); |
10412 | } |
10413 | |
10414 | if (var && has_decoration(id: var->self, decoration: DecorationBuiltIn) && |
10415 | get_decoration(id: var->self, decoration: DecorationBuiltIn) == BuiltInPosition && |
10416 | get_execution_model() == ExecutionModelMeshEXT) |
10417 | { |
10418 | access_meshlet_position_y = true; |
10419 | } |
10420 | |
10421 | type_id = type->parent_type; |
10422 | type = &get<SPIRType>(id: type_id); |
10423 | |
10424 | // If the physical type has an unnatural vecsize, |
10425 | // we must assume it's a faked struct where the .data member |
10426 | // is used for the real payload. |
10427 | if (physical_type && (is_vector(type: *type) || is_scalar(type: *type))) |
10428 | { |
10429 | auto &phys = get<SPIRType>(id: physical_type); |
10430 | if (phys.vecsize > 4) |
10431 | expr += ".data"; |
10432 | } |
10433 | |
10434 | access_chain_is_arrayed = true; |
10435 | } |
10436 | // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping. |
10437 | // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. |
10438 | else if (type->basetype == SPIRType::Struct) |
10439 | { |
10440 | if (!is_literal) |
10441 | index = evaluate_constant_u32(id: index); |
10442 | |
10443 | if (index < uint32_t(type->member_type_index_redirection.size())) |
10444 | index = type->member_type_index_redirection[index]; |
10445 | |
10446 | if (index >= type->member_types.size()) |
10447 | SPIRV_CROSS_THROW("Member index is out of bounds!"); |
10448 | |
10449 | if (hide_first_subscript) |
10450 | { |
10451 | // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers |
10452 | hide_first_subscript = false; |
10453 | } |
10454 | else |
10455 | { |
10456 | BuiltIn builtin = BuiltInMax; |
10457 | if (is_member_builtin(type: *type, index, builtin: &builtin) && access_chain_needs_stage_io_builtin_translation(base)) |
10458 | { |
10459 | if (access_chain_is_arrayed) |
10460 | { |
10461 | expr += "."; |
10462 | expr += builtin_to_glsl(builtin, storage: type->storage); |
10463 | } |
10464 | else |
10465 | expr = builtin_to_glsl(builtin, storage: type->storage); |
10466 | |
10467 | if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT) |
10468 | { |
10469 | access_meshlet_position_y = true; |
10470 | } |
10471 | } |
10472 | else |
10473 | { |
10474 | // If the member has a qualified name, use it as the entire chain |
10475 | string qual_mbr_name = get_member_qualified_name(type_id, index); |
10476 | if (!qual_mbr_name.empty()) |
10477 | expr = qual_mbr_name; |
10478 | else if (flatten_member_reference) |
10479 | expr += join(ts: "_", ts: to_member_name(type: *type, index)); |
10480 | else |
10481 | { |
10482 | // Any pointer de-refences for values are handled in the first access chain. |
10483 | // For pointer chains, the pointer-ness is resolved through an array access. |
10484 | // The only time this is not true is when accessing array of SSBO/UBO. |
10485 | // This case is explicitly handled. |
10486 | expr += to_member_reference(base, type: *type, index, ptr_chain_is_resolved: ptr_chain || i != 0); |
10487 | } |
10488 | } |
10489 | } |
10490 | |
10491 | if (has_member_decoration(id: type->self, index, decoration: DecorationInvariant)) |
10492 | is_invariant = true; |
10493 | if (has_member_decoration(id: type->self, index, decoration: DecorationRelaxedPrecision)) |
10494 | relaxed_precision = true; |
10495 | |
10496 | is_packed = member_is_packed_physical_type(type: *type, index); |
10497 | if (member_is_remapped_physical_type(type: *type, index)) |
10498 | physical_type = get_extended_member_decoration(type: type->self, index, decoration: SPIRVCrossDecorationPhysicalTypeID); |
10499 | else |
10500 | physical_type = 0; |
10501 | |
10502 | row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(type: *type, index); |
10503 | type = &get<SPIRType>(id: type->member_types[index]); |
10504 | } |
10505 | // Matrix -> Vector |
10506 | else if (type->columns > 1) |
10507 | { |
10508 | // If we have a row-major matrix here, we need to defer any transpose in case this access chain |
10509 | // is used to store a column. We can resolve it right here and now if we access a scalar directly, |
10510 | // by flipping indexing order of the matrix. |
10511 | |
10512 | expr += "["; |
10513 | if (is_literal) |
10514 | expr += convert_to_string(t: index); |
10515 | else |
10516 | expr += to_unpacked_expression(id: index, register_expression_read); |
10517 | expr += "]"; |
10518 | |
10519 | // If the physical type has an unnatural vecsize, |
10520 | // we must assume it's a faked struct where the .data member |
10521 | // is used for the real payload. |
10522 | if (physical_type) |
10523 | { |
10524 | auto &phys = get<SPIRType>(id: physical_type); |
10525 | if (phys.vecsize > 4 || phys.columns > 4) |
10526 | expr += ".data"; |
10527 | } |
10528 | |
10529 | type_id = type->parent_type; |
10530 | type = &get<SPIRType>(id: type_id); |
10531 | } |
10532 | // Vector -> Scalar |
10533 | else if (type->vecsize > 1) |
10534 | { |
10535 | string deferred_index; |
10536 | if (row_major_matrix_needs_conversion) |
10537 | { |
10538 | // Flip indexing order. |
10539 | auto column_index = expr.find_last_of(c: '['); |
10540 | if (column_index != string::npos) |
10541 | { |
10542 | deferred_index = expr.substr(pos: column_index); |
10543 | |
10544 | auto end_deferred_index = deferred_index.find_last_of(c: ']'); |
10545 | if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size()) |
10546 | { |
10547 | // If we have any data member fixups, it must be transposed so that it refers to this index. |
10548 | // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, |
10549 | // and needs to be [1].data[0] instead. |
10550 | end_deferred_index++; |
10551 | deferred_index = deferred_index.substr(pos: end_deferred_index) + |
10552 | deferred_index.substr(pos: 0, n: end_deferred_index); |
10553 | } |
10554 | |
10555 | expr.resize(n: column_index); |
10556 | } |
10557 | } |
10558 | |
10559 | // Internally, access chain implementation can also be used on composites, |
10560 | // ignore scalar access workarounds in this case. |
10561 | StorageClass effective_storage = StorageClassGeneric; |
10562 | bool ignore_potential_sliced_writes = false; |
10563 | if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) |
10564 | { |
10565 | if (expression_type(id: base).pointer) |
10566 | effective_storage = get_expression_effective_storage_class(ptr: base); |
10567 | |
10568 | // Special consideration for control points. |
10569 | // Control points can only be written by InvocationID, so there is no need |
10570 | // to consider scalar access chains here. |
10571 | // Cleans up some cases where it's very painful to determine the accurate storage class |
10572 | // since blocks can be partially masked ... |
10573 | auto *var = maybe_get_backing_variable(chain: base); |
10574 | if (var && var->storage == StorageClassOutput && |
10575 | get_execution_model() == ExecutionModelTessellationControl && |
10576 | !has_decoration(id: var->self, decoration: DecorationPatch)) |
10577 | { |
10578 | ignore_potential_sliced_writes = true; |
10579 | } |
10580 | } |
10581 | else |
10582 | ignore_potential_sliced_writes = true; |
10583 | |
10584 | if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) |
10585 | { |
10586 | // On some backends, we might not be able to safely access individual scalars in a vector. |
10587 | // To work around this, we might have to cast the access chain reference to something which can, |
10588 | // like a pointer to scalar, which we can then index into. |
10589 | prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage, |
10590 | is_packed); |
10591 | } |
10592 | |
10593 | if (is_literal) |
10594 | { |
10595 | bool out_of_bounds = (index >= type->vecsize); |
10596 | |
10597 | if (!is_packed && !row_major_matrix_needs_conversion) |
10598 | { |
10599 | expr += "."; |
10600 | expr += index_to_swizzle(index: out_of_bounds ? 0 : index); |
10601 | } |
10602 | else |
10603 | { |
10604 | // For packed vectors, we can only access them as an array, not by swizzle. |
10605 | expr += join(ts: "[", ts: out_of_bounds ? 0 : index, ts: "]"); |
10606 | } |
10607 | } |
10608 | else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) |
10609 | { |
10610 | auto &c = get<SPIRConstant>(id: index); |
10611 | bool out_of_bounds = (c.scalar() >= type->vecsize); |
10612 | |
10613 | if (c.specialization) |
10614 | { |
10615 | // If the index is a spec constant, we cannot turn extract into a swizzle. |
10616 | expr += join(ts: "[", ts: out_of_bounds ? "0": to_expression(id: index), ts: "]"); |
10617 | } |
10618 | else |
10619 | { |
10620 | expr += "."; |
10621 | expr += index_to_swizzle(index: out_of_bounds ? 0 : c.scalar()); |
10622 | } |
10623 | } |
10624 | else |
10625 | { |
10626 | expr += "["; |
10627 | expr += to_unpacked_expression(id: index, register_expression_read); |
10628 | expr += "]"; |
10629 | } |
10630 | |
10631 | if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) |
10632 | { |
10633 | if (prepare_access_chain_for_scalar_access(expr, type: get<SPIRType>(id: type->parent_type), storage: effective_storage, |
10634 | is_packed)) |
10635 | { |
10636 | // We're in a pointer context now, so just remove any member dereference. |
10637 | auto first_index = deferred_index.find_first_of(c: '['); |
10638 | if (first_index != string::npos && first_index != 0) |
10639 | deferred_index = deferred_index.substr(pos: first_index); |
10640 | } |
10641 | } |
10642 | |
10643 | if (access_meshlet_position_y) |
10644 | { |
10645 | if (is_literal) |
10646 | { |
10647 | access_meshlet_position_y = index == 1; |
10648 | } |
10649 | else |
10650 | { |
10651 | const auto *c = maybe_get<SPIRConstant>(id: index); |
10652 | if (c) |
10653 | access_meshlet_position_y = c->scalar() == 1; |
10654 | else |
10655 | { |
10656 | // We don't know, but we have to assume no. |
10657 | // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave. |
10658 | access_meshlet_position_y = false; |
10659 | } |
10660 | } |
10661 | } |
10662 | |
10663 | expr += deferred_index; |
10664 | row_major_matrix_needs_conversion = false; |
10665 | |
10666 | is_packed = false; |
10667 | physical_type = 0; |
10668 | type_id = type->parent_type; |
10669 | type = &get<SPIRType>(id: type_id); |
10670 | } |
10671 | else if (!backend.allow_truncated_access_chain) |
10672 | SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); |
10673 | } |
10674 | |
10675 | if (pending_array_enclose) |
10676 | { |
10677 | SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " |
10678 | "but the access chain was terminated in the middle of a multidimensional array. " |
10679 | "This is not supported."); |
10680 | } |
10681 | |
10682 | if (meta) |
10683 | { |
10684 | meta->need_transpose = row_major_matrix_needs_conversion; |
10685 | meta->storage_is_packed = is_packed; |
10686 | meta->storage_is_invariant = is_invariant; |
10687 | meta->storage_physical_type = physical_type; |
10688 | meta->relaxed_precision = relaxed_precision; |
10689 | meta->access_meshlet_position_y = access_meshlet_position_y; |
10690 | } |
10691 | |
10692 | return expr; |
10693 | } |
10694 | |
10695 | void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) |
10696 | { |
10697 | } |
10698 | |
10699 | bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) |
10700 | { |
10701 | return false; |
10702 | } |
10703 | |
10704 | string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) |
10705 | { |
10706 | auto ret = join(ts: basename, ts: "_", ts: to_member_name(type, index)); |
10707 | ParsedIR::sanitize_underscores(str&: ret); |
10708 | return ret; |
10709 | } |
10710 | |
10711 | string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, |
10712 | AccessChainMeta *meta, bool ptr_chain) |
10713 | { |
10714 | if (flattened_buffer_blocks.count(x: base)) |
10715 | { |
10716 | uint32_t matrix_stride = 0; |
10717 | uint32_t array_stride = 0; |
10718 | bool need_transpose = false; |
10719 | flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset: 0, word_stride: 16, need_transpose: &need_transpose, matrix_stride: &matrix_stride, |
10720 | array_stride: &array_stride, ptr_chain); |
10721 | |
10722 | if (meta) |
10723 | { |
10724 | meta->need_transpose = target_type.columns > 1 && need_transpose; |
10725 | meta->storage_is_packed = false; |
10726 | } |
10727 | |
10728 | return flattened_access_chain(base, indices, count, target_type, offset: 0, matrix_stride, array_stride, |
10729 | need_transpose); |
10730 | } |
10731 | else if (flattened_structs.count(x: base) && count > 0) |
10732 | { |
10733 | AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; |
10734 | if (ptr_chain) |
10735 | flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; |
10736 | |
10737 | if (flattened_structs[base]) |
10738 | { |
10739 | flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; |
10740 | if (meta) |
10741 | meta->flattened_struct = target_type.basetype == SPIRType::Struct; |
10742 | } |
10743 | |
10744 | auto chain = access_chain_internal(base, indices, count, flags, meta: nullptr).substr(pos: 1); |
10745 | if (meta) |
10746 | { |
10747 | meta->need_transpose = false; |
10748 | meta->storage_is_packed = false; |
10749 | } |
10750 | |
10751 | auto basename = to_flattened_access_chain_expression(id: base); |
10752 | auto ret = join(ts&: basename, ts: "_", ts&: chain); |
10753 | ParsedIR::sanitize_underscores(str&: ret); |
10754 | return ret; |
10755 | } |
10756 | else |
10757 | { |
10758 | AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; |
10759 | if (ptr_chain) |
10760 | flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; |
10761 | return access_chain_internal(base, indices, count, flags, meta); |
10762 | } |
10763 | } |
10764 | |
10765 | string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) |
10766 | { |
10767 | auto expr = type_to_glsl_constructor(type); |
10768 | expr += '('; |
10769 | |
10770 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
10771 | { |
10772 | if (i) |
10773 | expr += ", "; |
10774 | |
10775 | auto &member_type = get<SPIRType>(id: type.member_types[i]); |
10776 | if (member_type.basetype == SPIRType::Struct) |
10777 | expr += load_flattened_struct(basename: to_flattened_struct_member(basename, type, index: i), type: member_type); |
10778 | else |
10779 | expr += to_flattened_struct_member(basename, type, index: i); |
10780 | } |
10781 | expr += ')'; |
10782 | return expr; |
10783 | } |
10784 | |
10785 | std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) |
10786 | { |
10787 | // Do not use to_expression as that will unflatten access chains. |
10788 | string basename; |
10789 | if (const auto *var = maybe_get<SPIRVariable>(id)) |
10790 | basename = to_name(id: var->self); |
10791 | else if (const auto *expr = maybe_get<SPIRExpression>(id)) |
10792 | basename = expr->expression; |
10793 | else |
10794 | basename = to_expression(id); |
10795 | |
10796 | return basename; |
10797 | } |
10798 | |
10799 | void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, |
10800 | const SmallVector<uint32_t> &indices) |
10801 | { |
10802 | SmallVector<uint32_t> sub_indices = indices; |
10803 | sub_indices.push_back(t: 0); |
10804 | |
10805 | auto *member_type = &type; |
10806 | for (auto &index : indices) |
10807 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
10808 | |
10809 | for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) |
10810 | { |
10811 | sub_indices.back() = i; |
10812 | auto lhs = join(ts: basename, ts: "_", ts: to_member_name(type: *member_type, index: i)); |
10813 | ParsedIR::sanitize_underscores(str&: lhs); |
10814 | |
10815 | if (get<SPIRType>(id: member_type->member_types[i]).basetype == SPIRType::Struct) |
10816 | { |
10817 | store_flattened_struct(basename: lhs, rhs_id, type, indices: sub_indices); |
10818 | } |
10819 | else |
10820 | { |
10821 | auto rhs = to_expression(id: rhs_id) + to_multi_member_reference(type, indices: sub_indices); |
10822 | statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";"); |
10823 | } |
10824 | } |
10825 | } |
10826 | |
10827 | void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) |
10828 | { |
10829 | auto &type = expression_type(id: lhs_id); |
10830 | auto basename = to_flattened_access_chain_expression(id: lhs_id); |
10831 | store_flattened_struct(basename, rhs_id: value, type, indices: {}); |
10832 | } |
10833 | |
10834 | std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, |
10835 | const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, |
10836 | uint32_t /* array_stride */, bool need_transpose) |
10837 | { |
10838 | if (!target_type.array.empty()) |
10839 | SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); |
10840 | else if (target_type.basetype == SPIRType::Struct) |
10841 | return flattened_access_chain_struct(base, indices, count, target_type, offset); |
10842 | else if (target_type.columns > 1) |
10843 | return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); |
10844 | else |
10845 | return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); |
10846 | } |
10847 | |
10848 | std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, |
10849 | const SPIRType &target_type, uint32_t offset) |
10850 | { |
10851 | std::string expr; |
10852 | |
10853 | if (backend.can_declare_struct_inline) |
10854 | { |
10855 | expr += type_to_glsl_constructor(type: target_type); |
10856 | expr += "("; |
10857 | } |
10858 | else |
10859 | expr += "{"; |
10860 | |
10861 | for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) |
10862 | { |
10863 | if (i != 0) |
10864 | expr += ", "; |
10865 | |
10866 | const SPIRType &member_type = get<SPIRType>(id: target_type.member_types[i]); |
10867 | uint32_t member_offset = type_struct_member_offset(type: target_type, index: i); |
10868 | |
10869 | // The access chain terminates at the struct, so we need to find matrix strides and row-major information |
10870 | // ahead of time. |
10871 | bool need_transpose = false; |
10872 | bool relaxed = false; |
10873 | uint32_t matrix_stride = 0; |
10874 | if (member_type.columns > 1) |
10875 | { |
10876 | auto decorations = combined_decoration_for_member(type: target_type, index: i); |
10877 | need_transpose = decorations.get(bit: DecorationRowMajor); |
10878 | relaxed = decorations.get(bit: DecorationRelaxedPrecision); |
10879 | matrix_stride = type_struct_member_matrix_stride(type: target_type, index: i); |
10880 | } |
10881 | |
10882 | auto tmp = flattened_access_chain(base, indices, count, target_type: member_type, offset: offset + member_offset, matrix_stride, |
10883 | 0 /* array_stride */, need_transpose); |
10884 | |
10885 | // Cannot forward transpositions, so resolve them here. |
10886 | if (need_transpose) |
10887 | expr += convert_row_major_matrix(exp_str: tmp, exp_type: member_type, physical_type_id: 0, is_packed: false, relaxed); |
10888 | else |
10889 | expr += tmp; |
10890 | } |
10891 | |
10892 | expr += backend.can_declare_struct_inline ? ")": "}"; |
10893 | |
10894 | return expr; |
10895 | } |
10896 | |
10897 | std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, |
10898 | const SPIRType &target_type, uint32_t offset, |
10899 | uint32_t matrix_stride, bool need_transpose) |
10900 | { |
10901 | assert(matrix_stride); |
10902 | SPIRType tmp_type = target_type; |
10903 | if (need_transpose) |
10904 | swap(a&: tmp_type.vecsize, b&: tmp_type.columns); |
10905 | |
10906 | std::string expr; |
10907 | |
10908 | expr += type_to_glsl_constructor(type: tmp_type); |
10909 | expr += "("; |
10910 | |
10911 | for (uint32_t i = 0; i < tmp_type.columns; i++) |
10912 | { |
10913 | if (i != 0) |
10914 | expr += ", "; |
10915 | |
10916 | expr += flattened_access_chain_vector(base, indices, count, target_type: tmp_type, offset: offset + i * matrix_stride, matrix_stride, |
10917 | /* need_transpose= */ false); |
10918 | } |
10919 | |
10920 | expr += ")"; |
10921 | |
10922 | return expr; |
10923 | } |
10924 | |
10925 | std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, |
10926 | const SPIRType &target_type, uint32_t offset, |
10927 | uint32_t matrix_stride, bool need_transpose) |
10928 | { |
10929 | auto result = flattened_access_chain_offset(basetype: expression_type(id: base), indices, count, offset, word_stride: 16); |
10930 | |
10931 | auto buffer_name = to_name(id: expression_type(id: base).self); |
10932 | |
10933 | if (need_transpose) |
10934 | { |
10935 | std::string expr; |
10936 | |
10937 | if (target_type.vecsize > 1) |
10938 | { |
10939 | expr += type_to_glsl_constructor(type: target_type); |
10940 | expr += "("; |
10941 | } |
10942 | |
10943 | for (uint32_t i = 0; i < target_type.vecsize; ++i) |
10944 | { |
10945 | if (i != 0) |
10946 | expr += ", "; |
10947 | |
10948 | uint32_t component_offset = result.second + i * matrix_stride; |
10949 | |
10950 | assert(component_offset % (target_type.width / 8) == 0); |
10951 | uint32_t index = component_offset / (target_type.width / 8); |
10952 | |
10953 | expr += buffer_name; |
10954 | expr += "["; |
10955 | expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + |
10956 | expr += convert_to_string(t: index / 4); |
10957 | expr += "]"; |
10958 | |
10959 | expr += vector_swizzle(vecsize: 1, index: index % 4); |
10960 | } |
10961 | |
10962 | if (target_type.vecsize > 1) |
10963 | { |
10964 | expr += ")"; |
10965 | } |
10966 | |
10967 | return expr; |
10968 | } |
10969 | else |
10970 | { |
10971 | assert(result.second % (target_type.width / 8) == 0); |
10972 | uint32_t index = result.second / (target_type.width / 8); |
10973 | |
10974 | std::string expr; |
10975 | |
10976 | expr += buffer_name; |
10977 | expr += "["; |
10978 | expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + |
10979 | expr += convert_to_string(t: index / 4); |
10980 | expr += "]"; |
10981 | |
10982 | expr += vector_swizzle(vecsize: target_type.vecsize, index: index % 4); |
10983 | |
10984 | return expr; |
10985 | } |
10986 | } |
10987 | |
10988 | std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset( |
10989 | const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, |
10990 | bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) |
10991 | { |
10992 | // Start traversing type hierarchy at the proper non-pointer types. |
10993 | const auto *type = &get_pointee_type(type: basetype); |
10994 | |
10995 | std::string expr; |
10996 | |
10997 | // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. |
10998 | bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; |
10999 | uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; |
11000 | uint32_t array_stride = out_array_stride ? *out_array_stride : 0; |
11001 | |
11002 | for (uint32_t i = 0; i < count; i++) |
11003 | { |
11004 | uint32_t index = indices[i]; |
11005 | |
11006 | // Pointers |
11007 | if (ptr_chain && i == 0) |
11008 | { |
11009 | // Here, the pointer type will be decorated with an array stride. |
11010 | array_stride = get_decoration(id: basetype.self, decoration: DecorationArrayStride); |
11011 | if (!array_stride) |
11012 | SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); |
11013 | |
11014 | auto *constant = maybe_get<SPIRConstant>(id: index); |
11015 | if (constant) |
11016 | { |
11017 | // Constant array access. |
11018 | offset += constant->scalar() * array_stride; |
11019 | } |
11020 | else |
11021 | { |
11022 | // Dynamic array access. |
11023 | if (array_stride % word_stride) |
11024 | { |
11025 | SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " |
11026 | "of a 4-component vector. " |
11027 | "Likely culprit here is a float or vec2 array inside a push " |
11028 | "constant block which is std430. " |
11029 | "This cannot be flattened. Try using std140 layout instead."); |
11030 | } |
11031 | |
11032 | expr += to_enclosed_expression(id: index); |
11033 | expr += " * "; |
11034 | expr += convert_to_string(t: array_stride / word_stride); |
11035 | expr += " + "; |
11036 | } |
11037 | } |
11038 | // Arrays |
11039 | else if (!type->array.empty()) |
11040 | { |
11041 | auto *constant = maybe_get<SPIRConstant>(id: index); |
11042 | if (constant) |
11043 | { |
11044 | // Constant array access. |
11045 | offset += constant->scalar() * array_stride; |
11046 | } |
11047 | else |
11048 | { |
11049 | // Dynamic array access. |
11050 | if (array_stride % word_stride) |
11051 | { |
11052 | SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " |
11053 | "of a 4-component vector. " |
11054 | "Likely culprit here is a float or vec2 array inside a push " |
11055 | "constant block which is std430. " |
11056 | "This cannot be flattened. Try using std140 layout instead."); |
11057 | } |
11058 | |
11059 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
11060 | expr += " * "; |
11061 | expr += convert_to_string(t: array_stride / word_stride); |
11062 | expr += " + "; |
11063 | } |
11064 | |
11065 | uint32_t parent_type = type->parent_type; |
11066 | type = &get<SPIRType>(id: parent_type); |
11067 | |
11068 | if (!type->array.empty()) |
11069 | array_stride = get_decoration(id: parent_type, decoration: DecorationArrayStride); |
11070 | } |
11071 | // For structs, the index refers to a constant, which indexes into the members. |
11072 | // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. |
11073 | else if (type->basetype == SPIRType::Struct) |
11074 | { |
11075 | index = evaluate_constant_u32(id: index); |
11076 | |
11077 | if (index >= type->member_types.size()) |
11078 | SPIRV_CROSS_THROW("Member index is out of bounds!"); |
11079 | |
11080 | offset += type_struct_member_offset(type: *type, index); |
11081 | |
11082 | auto &struct_type = *type; |
11083 | type = &get<SPIRType>(id: type->member_types[index]); |
11084 | |
11085 | if (type->columns > 1) |
11086 | { |
11087 | matrix_stride = type_struct_member_matrix_stride(type: struct_type, index); |
11088 | row_major_matrix_needs_conversion = |
11089 | combined_decoration_for_member(type: struct_type, index).get(bit: DecorationRowMajor); |
11090 | } |
11091 | else |
11092 | row_major_matrix_needs_conversion = false; |
11093 | |
11094 | if (!type->array.empty()) |
11095 | array_stride = type_struct_member_array_stride(type: struct_type, index); |
11096 | } |
11097 | // Matrix -> Vector |
11098 | else if (type->columns > 1) |
11099 | { |
11100 | auto *constant = maybe_get<SPIRConstant>(id: index); |
11101 | if (constant) |
11102 | { |
11103 | index = evaluate_constant_u32(id: index); |
11104 | offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); |
11105 | } |
11106 | else |
11107 | { |
11108 | uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; |
11109 | // Dynamic array access. |
11110 | if (indexing_stride % word_stride) |
11111 | { |
11112 | SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " |
11113 | "4-component vector. " |
11114 | "Likely culprit here is a row-major matrix being accessed dynamically. " |
11115 | "This cannot be flattened. Try using std140 layout instead."); |
11116 | } |
11117 | |
11118 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
11119 | expr += " * "; |
11120 | expr += convert_to_string(t: indexing_stride / word_stride); |
11121 | expr += " + "; |
11122 | } |
11123 | |
11124 | type = &get<SPIRType>(id: type->parent_type); |
11125 | } |
11126 | // Vector -> Scalar |
11127 | else if (type->vecsize > 1) |
11128 | { |
11129 | auto *constant = maybe_get<SPIRConstant>(id: index); |
11130 | if (constant) |
11131 | { |
11132 | index = evaluate_constant_u32(id: index); |
11133 | offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); |
11134 | } |
11135 | else |
11136 | { |
11137 | uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); |
11138 | |
11139 | // Dynamic array access. |
11140 | if (indexing_stride % word_stride) |
11141 | { |
11142 | SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " |
11143 | "size of a 4-component vector. " |
11144 | "This cannot be flattened in legacy targets."); |
11145 | } |
11146 | |
11147 | expr += to_enclosed_expression(id: index, register_expression_read: false); |
11148 | expr += " * "; |
11149 | expr += convert_to_string(t: indexing_stride / word_stride); |
11150 | expr += " + "; |
11151 | } |
11152 | |
11153 | type = &get<SPIRType>(id: type->parent_type); |
11154 | } |
11155 | else |
11156 | SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); |
11157 | } |
11158 | |
11159 | if (need_transpose) |
11160 | *need_transpose = row_major_matrix_needs_conversion; |
11161 | if (out_matrix_stride) |
11162 | *out_matrix_stride = matrix_stride; |
11163 | if (out_array_stride) |
11164 | *out_array_stride = array_stride; |
11165 | |
11166 | return std::make_pair(x&: expr, y&: offset); |
11167 | } |
11168 | |
11169 | bool CompilerGLSL::should_dereference(uint32_t id) |
11170 | { |
11171 | const auto &type = expression_type(id); |
11172 | // Non-pointer expressions don't need to be dereferenced. |
11173 | if (!type.pointer) |
11174 | return false; |
11175 | |
11176 | // Handles shouldn't be dereferenced either. |
11177 | if (!expression_is_lvalue(id)) |
11178 | return false; |
11179 | |
11180 | // If id is a variable but not a phi variable, we should not dereference it. |
11181 | if (auto *var = maybe_get<SPIRVariable>(id)) |
11182 | return var->phi_variable; |
11183 | |
11184 | if (auto *expr = maybe_get<SPIRExpression>(id)) |
11185 | { |
11186 | // If id is an access chain, we should not dereference it. |
11187 | if (expr->access_chain) |
11188 | return false; |
11189 | |
11190 | // If id is a forwarded copy of a variable pointer, we should not dereference it. |
11191 | SPIRVariable *var = nullptr; |
11192 | while (expr->loaded_from && expression_is_forwarded(id: expr->self)) |
11193 | { |
11194 | auto &src_type = expression_type(id: expr->loaded_from); |
11195 | // To be a copy, the pointer and its source expression must be the |
11196 | // same type. Can't check type.self, because for some reason that's |
11197 | // usually the base type with pointers stripped off. This check is |
11198 | // complex enough that I've hoisted it out of the while condition. |
11199 | if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth || |
11200 | src_type.parent_type != type.parent_type) |
11201 | break; |
11202 | if ((var = maybe_get<SPIRVariable>(id: expr->loaded_from))) |
11203 | break; |
11204 | if (!(expr = maybe_get<SPIRExpression>(id: expr->loaded_from))) |
11205 | break; |
11206 | } |
11207 | |
11208 | return !var || var->phi_variable; |
11209 | } |
11210 | |
11211 | // Otherwise, we should dereference this pointer expression. |
11212 | return true; |
11213 | } |
11214 | |
11215 | bool CompilerGLSL::should_forward(uint32_t id) const |
11216 | { |
11217 | // If id is a variable we will try to forward it regardless of force_temporary check below |
11218 | // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL |
11219 | |
11220 | auto *var = maybe_get<SPIRVariable>(id); |
11221 | if (var) |
11222 | { |
11223 | // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. |
11224 | return !(has_decoration(id, decoration: DecorationBuiltIn) && has_decoration(id, decoration: DecorationVolatile)); |
11225 | } |
11226 | |
11227 | // For debugging emit temporary variables for all expressions |
11228 | if (options.force_temporary) |
11229 | return false; |
11230 | |
11231 | // If an expression carries enough dependencies we need to stop forwarding at some point, |
11232 | // or we explode compilers. There are usually limits to how much we can nest expressions. |
11233 | auto *expr = maybe_get<SPIRExpression>(id); |
11234 | const uint32_t max_expression_dependencies = 64; |
11235 | if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) |
11236 | return false; |
11237 | |
11238 | if (expr && expr->loaded_from |
11239 | && has_decoration(id: expr->loaded_from, decoration: DecorationBuiltIn) |
11240 | && has_decoration(id: expr->loaded_from, decoration: DecorationVolatile)) |
11241 | { |
11242 | // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. |
11243 | return false; |
11244 | } |
11245 | |
11246 | // Immutable expression can always be forwarded. |
11247 | if (is_immutable(id)) |
11248 | return true; |
11249 | |
11250 | return false; |
11251 | } |
11252 | |
11253 | bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const |
11254 | { |
11255 | // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. |
11256 | return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); |
11257 | } |
11258 | |
11259 | void CompilerGLSL::track_expression_read(uint32_t id) |
11260 | { |
11261 | switch (ir.ids[id].get_type()) |
11262 | { |
11263 | case TypeExpression: |
11264 | { |
11265 | auto &e = get<SPIRExpression>(id); |
11266 | for (auto implied_read : e.implied_read_expressions) |
11267 | track_expression_read(id: implied_read); |
11268 | break; |
11269 | } |
11270 | |
11271 | case TypeAccessChain: |
11272 | { |
11273 | auto &e = get<SPIRAccessChain>(id); |
11274 | for (auto implied_read : e.implied_read_expressions) |
11275 | track_expression_read(id: implied_read); |
11276 | break; |
11277 | } |
11278 | |
11279 | default: |
11280 | break; |
11281 | } |
11282 | |
11283 | // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. |
11284 | // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. |
11285 | if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) |
11286 | { |
11287 | auto &v = expression_usage_counts[id]; |
11288 | v++; |
11289 | |
11290 | // If we create an expression outside a loop, |
11291 | // but access it inside a loop, we're implicitly reading it multiple times. |
11292 | // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion |
11293 | // working inside the backend compiler. |
11294 | if (expression_read_implies_multiple_reads(id)) |
11295 | v++; |
11296 | |
11297 | if (v >= 2) |
11298 | { |
11299 | //if (v == 2) |
11300 | // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); |
11301 | |
11302 | // Force a recompile after this pass to avoid forwarding this variable. |
11303 | force_temporary_and_recompile(id); |
11304 | } |
11305 | } |
11306 | } |
11307 | |
11308 | bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) |
11309 | { |
11310 | if (forced_temporaries.find(x: id) != end(cont&: forced_temporaries)) |
11311 | return false; |
11312 | |
11313 | for (uint32_t i = 0; i < num_args; i++) |
11314 | if (!should_forward(id: args[i])) |
11315 | return false; |
11316 | |
11317 | // We need to forward globals as well. |
11318 | if (!pure) |
11319 | { |
11320 | for (auto global : global_variables) |
11321 | if (!should_forward(id: global)) |
11322 | return false; |
11323 | for (auto aliased : aliased_variables) |
11324 | if (!should_forward(id: aliased)) |
11325 | return false; |
11326 | } |
11327 | |
11328 | return true; |
11329 | } |
11330 | |
11331 | void CompilerGLSL::register_impure_function_call() |
11332 | { |
11333 | // Impure functions can modify globals and aliased variables, so invalidate them as well. |
11334 | for (auto global : global_variables) |
11335 | flush_dependees(var&: get<SPIRVariable>(id: global)); |
11336 | for (auto aliased : aliased_variables) |
11337 | flush_dependees(var&: get<SPIRVariable>(id: aliased)); |
11338 | } |
11339 | |
11340 | void CompilerGLSL::register_call_out_argument(uint32_t id) |
11341 | { |
11342 | register_write(chain: id); |
11343 | |
11344 | auto *var = maybe_get<SPIRVariable>(id); |
11345 | if (var) |
11346 | flush_variable_declaration(id: var->self); |
11347 | } |
11348 | |
11349 | string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) |
11350 | { |
11351 | // These variables are always function local, |
11352 | // so make sure we emit the variable without storage qualifiers. |
11353 | // Some backends will inject custom variables locally in a function |
11354 | // with a storage qualifier which is not function-local. |
11355 | auto old_storage = var.storage; |
11356 | var.storage = StorageClassFunction; |
11357 | auto expr = variable_decl(variable: var); |
11358 | var.storage = old_storage; |
11359 | return expr; |
11360 | } |
11361 | |
11362 | void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) |
11363 | { |
11364 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
11365 | if (var.allocate_temporary_copy && !flushed_phi_variables.count(x: var.self)) |
11366 | { |
11367 | auto &type = get<SPIRType>(id: var.basetype); |
11368 | auto &flags = get_decoration_bitset(id: var.self); |
11369 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: join(ts: "_", ts: var.self, ts: "_copy")), ts: ";"); |
11370 | flushed_phi_variables.insert(x: var.self); |
11371 | } |
11372 | } |
11373 | |
11374 | void CompilerGLSL::flush_variable_declaration(uint32_t id) |
11375 | { |
11376 | // Ensure that we declare phi-variable copies even if the original declaration isn't deferred |
11377 | auto *var = maybe_get<SPIRVariable>(id); |
11378 | if (var && var->deferred_declaration) |
11379 | { |
11380 | string initializer; |
11381 | if (options.force_zero_initialized_variables && |
11382 | (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || |
11383 | var->storage == StorageClassPrivate) && |
11384 | !var->initializer && type_can_zero_initialize(type: get_variable_data_type(var: *var))) |
11385 | { |
11386 | initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: *var))); |
11387 | } |
11388 | |
11389 | statement(ts: variable_decl_function_local(var&: *var), ts&: initializer, ts: ";"); |
11390 | var->deferred_declaration = false; |
11391 | } |
11392 | if (var) |
11393 | { |
11394 | emit_variable_temporary_copies(var: *var); |
11395 | } |
11396 | } |
11397 | |
11398 | bool CompilerGLSL::remove_duplicate_swizzle(string &op) |
11399 | { |
11400 | auto pos = op.find_last_of(c: '.'); |
11401 | if (pos == string::npos || pos == 0) |
11402 | return false; |
11403 | |
11404 | string final_swiz = op.substr(pos: pos + 1, n: string::npos); |
11405 | |
11406 | if (backend.swizzle_is_function) |
11407 | { |
11408 | if (final_swiz.size() < 2) |
11409 | return false; |
11410 | |
11411 | if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()") |
11412 | final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos); |
11413 | else |
11414 | return false; |
11415 | } |
11416 | |
11417 | // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. |
11418 | // If so, and previous swizzle is of same length, |
11419 | // we can drop the final swizzle altogether. |
11420 | for (uint32_t i = 0; i < final_swiz.size(); i++) |
11421 | { |
11422 | static const char expected[] = { 'x', 'y', 'z', 'w' }; |
11423 | if (i >= 4 || final_swiz[i] != expected[i]) |
11424 | return false; |
11425 | } |
11426 | |
11427 | auto prevpos = op.find_last_of(c: '.', pos: pos - 1); |
11428 | if (prevpos == string::npos) |
11429 | return false; |
11430 | |
11431 | prevpos++; |
11432 | |
11433 | // Make sure there are only swizzles here ... |
11434 | for (auto i = prevpos; i < pos; i++) |
11435 | { |
11436 | if (op[i] < 'w' || op[i] > 'z') |
11437 | { |
11438 | // If swizzles are foo.xyz() like in C++ backend for example, check for that. |
11439 | if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') |
11440 | break; |
11441 | return false; |
11442 | } |
11443 | } |
11444 | |
11445 | // If original swizzle is large enough, just carve out the components we need. |
11446 | // E.g. foobar.wyx.xy will turn into foobar.wy. |
11447 | if (pos - prevpos >= final_swiz.size()) |
11448 | { |
11449 | op.erase(pos: prevpos + final_swiz.size(), n: string::npos); |
11450 | |
11451 | // Add back the function call ... |
11452 | if (backend.swizzle_is_function) |
11453 | op += "()"; |
11454 | } |
11455 | return true; |
11456 | } |
11457 | |
11458 | // Optimizes away vector swizzles where we have something like |
11459 | // vec3 foo; |
11460 | // foo.xyz <-- swizzle expression does nothing. |
11461 | // This is a very common pattern after OpCompositeCombine. |
11462 | bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) |
11463 | { |
11464 | auto pos = op.find_last_of(c: '.'); |
11465 | if (pos == string::npos || pos == 0) |
11466 | return false; |
11467 | |
11468 | string final_swiz = op.substr(pos: pos + 1, n: string::npos); |
11469 | |
11470 | if (backend.swizzle_is_function) |
11471 | { |
11472 | if (final_swiz.size() < 2) |
11473 | return false; |
11474 | |
11475 | if (final_swiz.substr(pos: final_swiz.size() - 2, n: string::npos) == "()") |
11476 | final_swiz.erase(pos: final_swiz.size() - 2, n: string::npos); |
11477 | else |
11478 | return false; |
11479 | } |
11480 | |
11481 | // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. |
11482 | // If so, and previous swizzle is of same length, |
11483 | // we can drop the final swizzle altogether. |
11484 | for (uint32_t i = 0; i < final_swiz.size(); i++) |
11485 | { |
11486 | static const char expected[] = { 'x', 'y', 'z', 'w' }; |
11487 | if (i >= 4 || final_swiz[i] != expected[i]) |
11488 | return false; |
11489 | } |
11490 | |
11491 | auto &type = expression_type(id: base); |
11492 | |
11493 | // Sanity checking ... |
11494 | assert(type.columns == 1 && type.array.empty()); |
11495 | |
11496 | if (type.vecsize == final_swiz.size()) |
11497 | op.erase(pos: pos, n: string::npos); |
11498 | return true; |
11499 | } |
11500 | |
11501 | string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) |
11502 | { |
11503 | ID base = 0; |
11504 | string op; |
11505 | string subop; |
11506 | |
11507 | // Can only merge swizzles for vectors. |
11508 | auto &type = get<SPIRType>(id: return_type); |
11509 | bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; |
11510 | bool swizzle_optimization = false; |
11511 | |
11512 | for (uint32_t i = 0; i < length; i++) |
11513 | { |
11514 | auto *e = maybe_get<SPIRExpression>(id: elems[i]); |
11515 | |
11516 | // If we're merging another scalar which belongs to the same base |
11517 | // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! |
11518 | if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) |
11519 | { |
11520 | // Only supposed to be used for vector swizzle -> scalar. |
11521 | assert(!e->expression.empty() && e->expression.front() == '.'); |
11522 | subop += e->expression.substr(pos: 1, n: string::npos); |
11523 | swizzle_optimization = true; |
11524 | } |
11525 | else |
11526 | { |
11527 | // We'll likely end up with duplicated swizzles, e.g. |
11528 | // foobar.xyz.xyz from patterns like |
11529 | // OpVectorShuffle |
11530 | // OpCompositeExtract x 3 |
11531 | // OpCompositeConstruct 3x + other scalar. |
11532 | // Just modify op in-place. |
11533 | if (swizzle_optimization) |
11534 | { |
11535 | if (backend.swizzle_is_function) |
11536 | subop += "()"; |
11537 | |
11538 | // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. |
11539 | // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. |
11540 | // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. |
11541 | // Essentially, we can only remove one set of swizzles, since that's what we have control over ... |
11542 | // Case 1: |
11543 | // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. |
11544 | // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. |
11545 | // Case 2: |
11546 | // foo.xyz: Duplicate swizzle won't kick in. |
11547 | // If foo is vec3, we can remove xyz, giving just foo. |
11548 | if (!remove_duplicate_swizzle(op&: subop)) |
11549 | remove_unity_swizzle(base, op&: subop); |
11550 | |
11551 | // Strips away redundant parens if we created them during component extraction. |
11552 | strip_enclosed_expression(expr&: subop); |
11553 | swizzle_optimization = false; |
11554 | op += subop; |
11555 | } |
11556 | else |
11557 | op += subop; |
11558 | |
11559 | if (i) |
11560 | op += ", "; |
11561 | |
11562 | bool uses_buffer_offset = |
11563 | type.basetype == SPIRType::Struct && has_member_decoration(id: type.self, index: i, decoration: DecorationOffset); |
11564 | subop = to_composite_constructor_expression(parent_type: type, id: elems[i], block_like_type: uses_buffer_offset); |
11565 | } |
11566 | |
11567 | base = e ? e->base_expression : ID(0); |
11568 | } |
11569 | |
11570 | if (swizzle_optimization) |
11571 | { |
11572 | if (backend.swizzle_is_function) |
11573 | subop += "()"; |
11574 | |
11575 | if (!remove_duplicate_swizzle(op&: subop)) |
11576 | remove_unity_swizzle(base, op&: subop); |
11577 | // Strips away redundant parens if we created them during component extraction. |
11578 | strip_enclosed_expression(expr&: subop); |
11579 | } |
11580 | |
11581 | op += subop; |
11582 | return op; |
11583 | } |
11584 | |
11585 | bool CompilerGLSL::skip_argument(uint32_t id) const |
11586 | { |
11587 | if (!combined_image_samplers.empty() || !options.vulkan_semantics) |
11588 | { |
11589 | auto &type = expression_type(id); |
11590 | if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) |
11591 | return true; |
11592 | } |
11593 | return false; |
11594 | } |
11595 | |
11596 | bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) |
11597 | { |
11598 | // Do this with strings because we have a very clear pattern we can check for and it avoids |
11599 | // adding lots of special cases to the code emission. |
11600 | if (rhs.size() < lhs.size() + 3) |
11601 | return false; |
11602 | |
11603 | // Do not optimize matrices. They are a bit awkward to reason about in general |
11604 | // (in which order does operation happen?), and it does not work on MSL anyways. |
11605 | if (type.vecsize > 1 && type.columns > 1) |
11606 | return false; |
11607 | |
11608 | auto index = rhs.find(str: lhs); |
11609 | if (index != 0) |
11610 | return false; |
11611 | |
11612 | // TODO: Shift operators, but it's not important for now. |
11613 | auto op = rhs.find_first_of(s: "+-/*%|&^", pos: lhs.size() + 1); |
11614 | if (op != lhs.size() + 1) |
11615 | return false; |
11616 | |
11617 | // Check that the op is followed by space. This excludes && and ||. |
11618 | if (rhs[op + 1] != ' ') |
11619 | return false; |
11620 | |
11621 | char bop = rhs[op]; |
11622 | auto expr = rhs.substr(pos: lhs.size() + 3); |
11623 | |
11624 | // Avoids false positives where we get a = a * b + c. |
11625 | // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this. |
11626 | if (needs_enclose_expression(expr)) |
11627 | return false; |
11628 | |
11629 | // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. |
11630 | // Find some common patterns which are equivalent. |
11631 | if ((bop == '+' || bop == '-') && (expr == "1"|| expr == "uint(1)"|| expr == "1u"|| expr == "int(1u)")) |
11632 | statement(ts: lhs, ts&: bop, ts&: bop, ts: ";"); |
11633 | else |
11634 | statement(ts: lhs, ts: " ", ts&: bop, ts: "= ", ts&: expr, ts: ";"); |
11635 | return true; |
11636 | } |
11637 | |
11638 | void CompilerGLSL::register_control_dependent_expression(uint32_t expr) |
11639 | { |
11640 | if (forwarded_temporaries.find(x: expr) == end(cont&: forwarded_temporaries)) |
11641 | return; |
11642 | |
11643 | assert(current_emitting_block); |
11644 | current_emitting_block->invalidate_expressions.push_back(t: expr); |
11645 | } |
11646 | |
11647 | void CompilerGLSL::emit_block_instructions(SPIRBlock &block) |
11648 | { |
11649 | current_emitting_block = █ |
11650 | |
11651 | if (backend.requires_relaxed_precision_analysis) |
11652 | { |
11653 | // If PHI variables are consumed in unexpected precision contexts, copy them here. |
11654 | for (size_t i = 0, n = block.phi_variables.size(); i < n; i++) |
11655 | { |
11656 | auto &phi = block.phi_variables[i]; |
11657 | |
11658 | // Ensure we only copy once. We know a-priori that this array will lay out |
11659 | // the same function variables together. |
11660 | if (i && block.phi_variables[i - 1].function_variable == phi.function_variable) |
11661 | continue; |
11662 | |
11663 | auto itr = temporary_to_mirror_precision_alias.find(x: phi.function_variable); |
11664 | if (itr != temporary_to_mirror_precision_alias.end()) |
11665 | { |
11666 | // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, |
11667 | // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). |
11668 | EmbeddedInstruction inst; |
11669 | inst.op = OpCopyObject; |
11670 | inst.length = 3; |
11671 | inst.ops.push_back(t: expression_type_id(id: itr->first)); |
11672 | inst.ops.push_back(t: itr->second); |
11673 | inst.ops.push_back(t: itr->first); |
11674 | emit_instruction(instr: inst); |
11675 | } |
11676 | } |
11677 | } |
11678 | |
11679 | for (auto &op : block.ops) |
11680 | { |
11681 | auto temporary_copy = handle_instruction_precision(instr: op); |
11682 | emit_instruction(instr: op); |
11683 | if (temporary_copy.dst_id) |
11684 | { |
11685 | // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, |
11686 | // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). |
11687 | EmbeddedInstruction inst; |
11688 | inst.op = OpCopyObject; |
11689 | inst.length = 3; |
11690 | inst.ops.push_back(t: expression_type_id(id: temporary_copy.src_id)); |
11691 | inst.ops.push_back(t: temporary_copy.dst_id); |
11692 | inst.ops.push_back(t: temporary_copy.src_id); |
11693 | |
11694 | // Never attempt to hoist mirrored temporaries. |
11695 | // They are hoisted in lock-step with their parents. |
11696 | block_temporary_hoisting = true; |
11697 | emit_instruction(instr: inst); |
11698 | block_temporary_hoisting = false; |
11699 | } |
11700 | } |
11701 | |
11702 | current_emitting_block = nullptr; |
11703 | } |
11704 | |
11705 | void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) |
11706 | { |
11707 | // Allow trivially forwarded expressions like OpLoad or trivial shuffles, |
11708 | // these will be marked as having suppressed usage tracking. |
11709 | // Our only concern is to make sure arithmetic operations are done in similar ways. |
11710 | if (expression_is_forwarded(id: expr.self) && !expression_suppresses_usage_tracking(id: expr.self) && |
11711 | forced_invariant_temporaries.count(x: expr.self) == 0) |
11712 | { |
11713 | force_temporary_and_recompile(id: expr.self); |
11714 | forced_invariant_temporaries.insert(x: expr.self); |
11715 | |
11716 | for (auto &dependent : expr.expression_dependencies) |
11717 | disallow_forwarding_in_expression_chain(expr: get<SPIRExpression>(id: dependent)); |
11718 | } |
11719 | } |
11720 | |
11721 | void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) |
11722 | { |
11723 | // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to |
11724 | // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary |
11725 | // in one translation unit, but not another, e.g. due to multiple use of an expression. |
11726 | // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent |
11727 | // expressions to be temporaries. |
11728 | // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough |
11729 | // for all reasonable uses of invariant. |
11730 | if (!has_decoration(id: store_id, decoration: DecorationInvariant)) |
11731 | return; |
11732 | |
11733 | auto *expr = maybe_get<SPIRExpression>(id: value_id); |
11734 | if (!expr) |
11735 | return; |
11736 | |
11737 | disallow_forwarding_in_expression_chain(expr: *expr); |
11738 | } |
11739 | |
11740 | void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) |
11741 | { |
11742 | auto rhs = to_pointer_expression(id: rhs_expression); |
11743 | |
11744 | // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. |
11745 | if (!rhs.empty()) |
11746 | { |
11747 | handle_store_to_invariant_variable(store_id: lhs_expression, value_id: rhs_expression); |
11748 | |
11749 | if (!unroll_array_to_complex_store(target_id: lhs_expression, source_id: rhs_expression)) |
11750 | { |
11751 | auto lhs = to_dereferenced_expression(id: lhs_expression); |
11752 | if (has_decoration(id: lhs_expression, decoration: DecorationNonUniform)) |
11753 | convert_non_uniform_expression(expr&: lhs, ptr_id: lhs_expression); |
11754 | |
11755 | // We might need to cast in order to store to a builtin. |
11756 | cast_to_variable_store(target_id: lhs_expression, expr&: rhs, expr_type: expression_type(id: rhs_expression)); |
11757 | |
11758 | // Tries to optimize assignments like "<lhs> = <lhs> op expr". |
11759 | // While this is purely cosmetic, this is important for legacy ESSL where loop |
11760 | // variable increments must be in either i++ or i += const-expr. |
11761 | // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. |
11762 | if (!optimize_read_modify_write(type: expression_type(id: rhs_expression), lhs, rhs)) |
11763 | statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";"); |
11764 | } |
11765 | register_write(chain: lhs_expression); |
11766 | } |
11767 | } |
11768 | |
11769 | uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const |
11770 | { |
11771 | if (instr.length < 3) |
11772 | return 32; |
11773 | |
11774 | auto *ops = stream(instr); |
11775 | |
11776 | switch (instr.op) |
11777 | { |
11778 | case OpSConvert: |
11779 | case OpConvertSToF: |
11780 | case OpUConvert: |
11781 | case OpConvertUToF: |
11782 | case OpIEqual: |
11783 | case OpINotEqual: |
11784 | case OpSLessThan: |
11785 | case OpSLessThanEqual: |
11786 | case OpSGreaterThan: |
11787 | case OpSGreaterThanEqual: |
11788 | case OpULessThan: |
11789 | case OpULessThanEqual: |
11790 | case OpUGreaterThan: |
11791 | case OpUGreaterThanEqual: |
11792 | return expression_type(id: ops[2]).width; |
11793 | |
11794 | case OpSMulExtended: |
11795 | case OpUMulExtended: |
11796 | return get<SPIRType>(id: get<SPIRType>(id: ops[0]).member_types[0]).width; |
11797 | |
11798 | default: |
11799 | { |
11800 | // We can look at result type which is more robust. |
11801 | auto *type = maybe_get<SPIRType>(id: ops[0]); |
11802 | if (type && type_is_integral(type: *type)) |
11803 | return type->width; |
11804 | else |
11805 | return 32; |
11806 | } |
11807 | } |
11808 | } |
11809 | |
11810 | uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const |
11811 | { |
11812 | if (length < 1) |
11813 | return 32; |
11814 | |
11815 | switch (op) |
11816 | { |
11817 | case GLSLstd450SAbs: |
11818 | case GLSLstd450SSign: |
11819 | case GLSLstd450UMin: |
11820 | case GLSLstd450SMin: |
11821 | case GLSLstd450UMax: |
11822 | case GLSLstd450SMax: |
11823 | case GLSLstd450UClamp: |
11824 | case GLSLstd450SClamp: |
11825 | case GLSLstd450FindSMsb: |
11826 | case GLSLstd450FindUMsb: |
11827 | return expression_type(id: ops[0]).width; |
11828 | |
11829 | default: |
11830 | { |
11831 | // We don't need to care about other opcodes, just return 32. |
11832 | return 32; |
11833 | } |
11834 | } |
11835 | } |
11836 | |
11837 | void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) |
11838 | { |
11839 | // Only GLSL supports RelaxedPrecision directly. |
11840 | // We cannot implement this in HLSL or MSL because it is tied to the type system. |
11841 | // In SPIR-V, everything must masquerade as 32-bit. |
11842 | if (!backend.requires_relaxed_precision_analysis) |
11843 | return; |
11844 | |
11845 | auto input_precision = analyze_expression_precision(args, length); |
11846 | |
11847 | // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. |
11848 | // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. |
11849 | if (input_precision == Options::Mediump) |
11850 | set_decoration(id: dst_id, decoration: DecorationRelaxedPrecision); |
11851 | } |
11852 | |
11853 | CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const |
11854 | { |
11855 | // Now, analyze the precision at which the arguments would run. |
11856 | // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision |
11857 | // for the inputs. Constants do not have inherent precision and do not contribute to this decision. |
11858 | // If all inputs are constants, they inherit precision from outer expressions, including an l-value. |
11859 | // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with |
11860 | // correct precision. |
11861 | bool expression_has_highp = false; |
11862 | bool expression_has_mediump = false; |
11863 | |
11864 | for (uint32_t i = 0; i < length; i++) |
11865 | { |
11866 | uint32_t arg = args[i]; |
11867 | |
11868 | auto handle_type = ir.ids[arg].get_type(); |
11869 | if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) |
11870 | continue; |
11871 | |
11872 | if (has_decoration(id: arg, decoration: DecorationRelaxedPrecision)) |
11873 | expression_has_mediump = true; |
11874 | else |
11875 | expression_has_highp = true; |
11876 | } |
11877 | |
11878 | if (expression_has_highp) |
11879 | return Options::Highp; |
11880 | else if (expression_has_mediump) |
11881 | return Options::Mediump; |
11882 | else |
11883 | return Options::DontCare; |
11884 | } |
11885 | |
11886 | void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) |
11887 | { |
11888 | if (!backend.requires_relaxed_precision_analysis) |
11889 | return; |
11890 | |
11891 | auto &type = get<SPIRType>(id: type_id); |
11892 | |
11893 | // RelaxedPrecision only applies to 32-bit values. |
11894 | if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) |
11895 | return; |
11896 | |
11897 | bool operation_is_highp = !has_decoration(id: dst_id, decoration: DecorationRelaxedPrecision); |
11898 | |
11899 | auto input_precision = analyze_expression_precision(args, length); |
11900 | if (input_precision == Options::DontCare) |
11901 | { |
11902 | consume_temporary_in_precision_context(type_id, id: dst_id, precision: input_precision); |
11903 | return; |
11904 | } |
11905 | |
11906 | // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. |
11907 | // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. |
11908 | // However, if the expression is not, inputs must be expanded to 32-bit first, |
11909 | // since the operation must run at high precision. |
11910 | // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, |
11911 | // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations |
11912 | // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. |
11913 | if ((operation_is_highp && input_precision == Options::Mediump) || |
11914 | (!operation_is_highp && input_precision == Options::Highp)) |
11915 | { |
11916 | auto precision = operation_is_highp ? Options::Highp : Options::Mediump; |
11917 | for (uint32_t i = 0; i < length; i++) |
11918 | { |
11919 | // Rewrites the opcode so that we consume an ID in correct precision context. |
11920 | // This is pretty hacky, but it's the most straight forward way of implementing this without adding |
11921 | // lots of extra passes to rewrite all code blocks. |
11922 | args[i] = consume_temporary_in_precision_context(type_id: expression_type_id(id: args[i]), id: args[i], precision); |
11923 | } |
11924 | } |
11925 | } |
11926 | |
11927 | // This is probably not exhaustive ... |
11928 | static bool opcode_is_precision_sensitive_operation(Op op) |
11929 | { |
11930 | switch (op) |
11931 | { |
11932 | case OpFAdd: |
11933 | case OpFSub: |
11934 | case OpFMul: |
11935 | case OpFNegate: |
11936 | case OpIAdd: |
11937 | case OpISub: |
11938 | case OpIMul: |
11939 | case OpSNegate: |
11940 | case OpFMod: |
11941 | case OpFDiv: |
11942 | case OpFRem: |
11943 | case OpSMod: |
11944 | case OpSDiv: |
11945 | case OpSRem: |
11946 | case OpUMod: |
11947 | case OpUDiv: |
11948 | case OpVectorTimesMatrix: |
11949 | case OpMatrixTimesVector: |
11950 | case OpMatrixTimesMatrix: |
11951 | case OpDPdx: |
11952 | case OpDPdy: |
11953 | case OpDPdxCoarse: |
11954 | case OpDPdyCoarse: |
11955 | case OpDPdxFine: |
11956 | case OpDPdyFine: |
11957 | case OpFwidth: |
11958 | case OpFwidthCoarse: |
11959 | case OpFwidthFine: |
11960 | case OpVectorTimesScalar: |
11961 | case OpMatrixTimesScalar: |
11962 | case OpOuterProduct: |
11963 | case OpFConvert: |
11964 | case OpSConvert: |
11965 | case OpUConvert: |
11966 | case OpConvertSToF: |
11967 | case OpConvertUToF: |
11968 | case OpConvertFToU: |
11969 | case OpConvertFToS: |
11970 | return true; |
11971 | |
11972 | default: |
11973 | return false; |
11974 | } |
11975 | } |
11976 | |
11977 | // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. |
11978 | // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only |
11979 | // relevant when operating on the IDs, not when shuffling things around. |
11980 | static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) |
11981 | { |
11982 | switch (op) |
11983 | { |
11984 | case OpLoad: |
11985 | case OpAccessChain: |
11986 | case OpInBoundsAccessChain: |
11987 | case OpCompositeExtract: |
11988 | case OpVectorExtractDynamic: |
11989 | case OpSampledImage: |
11990 | case OpImage: |
11991 | case OpCopyObject: |
11992 | |
11993 | case OpImageRead: |
11994 | case OpImageFetch: |
11995 | case OpImageSampleImplicitLod: |
11996 | case OpImageSampleProjImplicitLod: |
11997 | case OpImageSampleDrefImplicitLod: |
11998 | case OpImageSampleProjDrefImplicitLod: |
11999 | case OpImageSampleExplicitLod: |
12000 | case OpImageSampleProjExplicitLod: |
12001 | case OpImageSampleDrefExplicitLod: |
12002 | case OpImageSampleProjDrefExplicitLod: |
12003 | case OpImageGather: |
12004 | case OpImageDrefGather: |
12005 | case OpImageSparseRead: |
12006 | case OpImageSparseFetch: |
12007 | case OpImageSparseSampleImplicitLod: |
12008 | case OpImageSparseSampleProjImplicitLod: |
12009 | case OpImageSparseSampleDrefImplicitLod: |
12010 | case OpImageSparseSampleProjDrefImplicitLod: |
12011 | case OpImageSparseSampleExplicitLod: |
12012 | case OpImageSparseSampleProjExplicitLod: |
12013 | case OpImageSparseSampleDrefExplicitLod: |
12014 | case OpImageSparseSampleProjDrefExplicitLod: |
12015 | case OpImageSparseGather: |
12016 | case OpImageSparseDrefGather: |
12017 | arg_count = 1; |
12018 | return true; |
12019 | |
12020 | case OpVectorShuffle: |
12021 | arg_count = 2; |
12022 | return true; |
12023 | |
12024 | case OpCompositeConstruct: |
12025 | return true; |
12026 | |
12027 | default: |
12028 | break; |
12029 | } |
12030 | |
12031 | return false; |
12032 | } |
12033 | |
12034 | CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) |
12035 | { |
12036 | auto ops = stream_mutable(instr: instruction); |
12037 | auto opcode = static_cast<Op>(instruction.op); |
12038 | uint32_t length = instruction.length; |
12039 | |
12040 | if (backend.requires_relaxed_precision_analysis) |
12041 | { |
12042 | if (length > 2) |
12043 | { |
12044 | uint32_t forwarding_length = length - 2; |
12045 | |
12046 | if (opcode_is_precision_sensitive_operation(op: opcode)) |
12047 | analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[2], length: forwarding_length); |
12048 | else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(id: ops[2]).ext == SPIRExtension::GLSL) |
12049 | analyze_precision_requirements(type_id: ops[0], dst_id: ops[1], args: &ops[4], length: forwarding_length - 2); |
12050 | else if (opcode_is_precision_forwarding_instruction(op: opcode, arg_count&: forwarding_length)) |
12051 | forward_relaxed_precision(dst_id: ops[1], args: &ops[2], length: forwarding_length); |
12052 | } |
12053 | |
12054 | uint32_t result_type = 0, result_id = 0; |
12055 | if (instruction_to_result_type(result_type, result_id, op: opcode, args: ops, length)) |
12056 | { |
12057 | auto itr = temporary_to_mirror_precision_alias.find(x: ops[1]); |
12058 | if (itr != temporary_to_mirror_precision_alias.end()) |
12059 | return { .dst_id: itr->second, .src_id: itr->first }; |
12060 | } |
12061 | } |
12062 | |
12063 | return {}; |
12064 | } |
12065 | |
12066 | void CompilerGLSL::emit_instruction(const Instruction &instruction) |
12067 | { |
12068 | auto ops = stream(instr: instruction); |
12069 | auto opcode = static_cast<Op>(instruction.op); |
12070 | uint32_t length = instruction.length; |
12071 | |
12072 | #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) |
12073 | #define GLSL_BOP_CAST(op, type) \ |
12074 | emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \ |
12075 | opcode_is_sign_invariant(opcode), implicit_integer_promotion) |
12076 | #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) |
12077 | #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op) |
12078 | #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) |
12079 | #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) |
12080 | #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) |
12081 | #define GLSL_BFOP_CAST(op, type) \ |
12082 | emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) |
12083 | #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) |
12084 | #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) |
12085 | |
12086 | // If we need to do implicit bitcasts, make sure we do it with the correct type. |
12087 | uint32_t integer_width = get_integer_width_for_instruction(instr: instruction); |
12088 | auto int_type = to_signed_basetype(width: integer_width); |
12089 | auto uint_type = to_unsigned_basetype(width: integer_width); |
12090 | |
12091 | // Handle C implicit integer promotion rules. |
12092 | // If we get implicit promotion to int, need to make sure we cast by value to intended return type, |
12093 | // otherwise, future sign-dependent operations and bitcasts will break. |
12094 | bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules && |
12095 | opcode_can_promote_integer_implicitly(opcode) && |
12096 | get<SPIRType>(id: ops[0]).vecsize == 1; |
12097 | |
12098 | opcode = get_remapped_spirv_op(op: opcode); |
12099 | |
12100 | switch (opcode) |
12101 | { |
12102 | // Dealing with memory |
12103 | case OpLoad: |
12104 | { |
12105 | uint32_t result_type = ops[0]; |
12106 | uint32_t id = ops[1]; |
12107 | uint32_t ptr = ops[2]; |
12108 | |
12109 | flush_variable_declaration(id: ptr); |
12110 | |
12111 | // If we're loading from memory that cannot be changed by the shader, |
12112 | // just forward the expression directly to avoid needless temporaries. |
12113 | // If an expression is mutable and forwardable, we speculate that it is immutable. |
12114 | bool forward = should_forward(id: ptr) && forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
12115 | |
12116 | // If loading a non-native row-major matrix, mark the expression as need_transpose. |
12117 | bool need_transpose = false; |
12118 | bool old_need_transpose = false; |
12119 | |
12120 | auto *ptr_expression = maybe_get<SPIRExpression>(id: ptr); |
12121 | |
12122 | if (forward) |
12123 | { |
12124 | // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while |
12125 | // taking the expression. |
12126 | if (ptr_expression && ptr_expression->need_transpose) |
12127 | { |
12128 | old_need_transpose = true; |
12129 | ptr_expression->need_transpose = false; |
12130 | need_transpose = true; |
12131 | } |
12132 | else if (is_non_native_row_major_matrix(id: ptr)) |
12133 | need_transpose = true; |
12134 | } |
12135 | |
12136 | // If we are forwarding this load, |
12137 | // don't register the read to access chain here, defer that to when we actually use the expression, |
12138 | // using the add_implied_read_expression mechanism. |
12139 | string expr; |
12140 | |
12141 | bool is_packed = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
12142 | bool is_remapped = has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID); |
12143 | if (forward || (!is_packed && !is_remapped)) |
12144 | { |
12145 | // For the simple case, we do not need to deal with repacking. |
12146 | expr = to_dereferenced_expression(id: ptr, register_expression_read: false); |
12147 | } |
12148 | else |
12149 | { |
12150 | // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before |
12151 | // storing the expression to a temporary. |
12152 | expr = to_unpacked_expression(id: ptr); |
12153 | } |
12154 | |
12155 | auto &type = get<SPIRType>(id: result_type); |
12156 | auto &expr_type = expression_type(id: ptr); |
12157 | |
12158 | // If the expression has more vector components than the result type, insert |
12159 | // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might |
12160 | // happen with e.g. the MSL backend replacing the type of an input variable. |
12161 | if (expr_type.vecsize > type.vecsize) |
12162 | expr = enclose_expression(expr: expr + vector_swizzle(vecsize: type.vecsize, index: 0)); |
12163 | |
12164 | if (forward && ptr_expression) |
12165 | ptr_expression->need_transpose = old_need_transpose; |
12166 | |
12167 | // We might need to cast in order to load from a builtin. |
12168 | cast_from_variable_load(source_id: ptr, expr, expr_type: type); |
12169 | |
12170 | if (forward && ptr_expression) |
12171 | ptr_expression->need_transpose = false; |
12172 | |
12173 | // We might be trying to load a gl_Position[N], where we should be |
12174 | // doing float4[](gl_in[i].gl_Position, ...) instead. |
12175 | // Similar workarounds are required for input arrays in tessellation. |
12176 | // Also, loading from gl_SampleMask array needs special unroll. |
12177 | unroll_array_from_complex_load(target_id: id, source_id: ptr, expr); |
12178 | |
12179 | if (!type_is_opaque_value(type) && has_decoration(id: ptr, decoration: DecorationNonUniform)) |
12180 | { |
12181 | // If we're loading something non-opaque, we need to handle non-uniform descriptor access. |
12182 | convert_non_uniform_expression(expr, ptr_id: ptr); |
12183 | } |
12184 | |
12185 | if (forward && ptr_expression) |
12186 | ptr_expression->need_transpose = old_need_transpose; |
12187 | |
12188 | bool flattened = ptr_expression && flattened_buffer_blocks.count(x: ptr_expression->loaded_from) != 0; |
12189 | |
12190 | if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(id: ptr) && !flattened) |
12191 | rewrite_load_for_wrapped_row_major(expr, loaded_type: result_type, ptr); |
12192 | |
12193 | // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. |
12194 | // However, if we try to load a complex, composite object from a flattened buffer, |
12195 | // we should avoid emitting the same code over and over and lower the result to a temporary. |
12196 | bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); |
12197 | |
12198 | SPIRExpression *e = nullptr; |
12199 | if (!forward && expression_is_non_value_type_array(ptr)) |
12200 | { |
12201 | // Complicated load case where we need to make a copy of ptr, but we cannot, because |
12202 | // it is an array, and our backend does not support arrays as value types. |
12203 | // Emit the temporary, and copy it explicitly. |
12204 | e = &emit_uninitialized_temporary_expression(type: result_type, id); |
12205 | emit_array_copy(expr: nullptr, lhs_id: id, rhs_id: ptr, lhs_storage: StorageClassFunction, rhs_storage: get_expression_effective_storage_class(ptr)); |
12206 | } |
12207 | else |
12208 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: forward, suppress_usage_tracking: !usage_tracking); |
12209 | |
12210 | e->need_transpose = need_transpose; |
12211 | register_read(expr: id, chain: ptr, forwarded: forward); |
12212 | |
12213 | if (forward) |
12214 | { |
12215 | // Pass through whether the result is of a packed type and the physical type ID. |
12216 | if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
12217 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
12218 | if (has_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID)) |
12219 | { |
12220 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, |
12221 | value: get_extended_decoration(id: ptr, decoration: SPIRVCrossDecorationPhysicalTypeID)); |
12222 | } |
12223 | } |
12224 | else |
12225 | { |
12226 | // This might have been set on an earlier compilation iteration, force it to be unset. |
12227 | unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
12228 | unset_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID); |
12229 | } |
12230 | |
12231 | inherit_expression_dependencies(dst: id, source: ptr); |
12232 | if (forward) |
12233 | add_implied_read_expression(e&: *e, source: ptr); |
12234 | break; |
12235 | } |
12236 | |
12237 | case OpInBoundsAccessChain: |
12238 | case OpAccessChain: |
12239 | case OpPtrAccessChain: |
12240 | { |
12241 | auto *var = maybe_get<SPIRVariable>(id: ops[2]); |
12242 | if (var) |
12243 | flush_variable_declaration(id: var->self); |
12244 | |
12245 | // If the base is immutable, the access chain pointer must also be. |
12246 | // If an expression is mutable and forwardable, we speculate that it is immutable. |
12247 | AccessChainMeta meta; |
12248 | bool ptr_chain = opcode == OpPtrAccessChain; |
12249 | auto &target_type = get<SPIRType>(id: ops[0]); |
12250 | auto e = access_chain(base: ops[2], indices: &ops[3], count: length - 3, target_type, meta: &meta, ptr_chain); |
12251 | |
12252 | // If the base is flattened UBO of struct type, the expression has to be a composite. |
12253 | // In that case, backends which do not support inline syntax need it to be bound to a temporary. |
12254 | // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted. |
12255 | bool requires_temporary = false; |
12256 | if (flattened_buffer_blocks.count(x: ops[2]) && target_type.basetype == SPIRType::Struct) |
12257 | requires_temporary = !backend.can_declare_struct_inline; |
12258 | |
12259 | auto &expr = requires_temporary ? |
12260 | emit_op(result_type: ops[0], result_id: ops[1], rhs: std::move(e), forwarding: false) : |
12261 | set<SPIRExpression>(id: ops[1], args: std::move(e), args: ops[0], args: should_forward(id: ops[2])); |
12262 | |
12263 | auto *backing_variable = maybe_get_backing_variable(chain: ops[2]); |
12264 | expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); |
12265 | expr.need_transpose = meta.need_transpose; |
12266 | expr.access_chain = true; |
12267 | expr.access_meshlet_position_y = meta.access_meshlet_position_y; |
12268 | |
12269 | // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. |
12270 | if (meta.storage_is_packed) |
12271 | set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypePacked); |
12272 | if (meta.storage_physical_type != 0) |
12273 | set_extended_decoration(id: ops[1], decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type); |
12274 | if (meta.storage_is_invariant) |
12275 | set_decoration(id: ops[1], decoration: DecorationInvariant); |
12276 | if (meta.flattened_struct) |
12277 | flattened_structs[ops[1]] = true; |
12278 | if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) |
12279 | set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision); |
12280 | |
12281 | // If we have some expression dependencies in our access chain, this access chain is technically a forwarded |
12282 | // temporary which could be subject to invalidation. |
12283 | // Need to assume we're forwarded while calling inherit_expression_depdendencies. |
12284 | forwarded_temporaries.insert(x: ops[1]); |
12285 | // The access chain itself is never forced to a temporary, but its dependencies might. |
12286 | suppressed_usage_tracking.insert(x: ops[1]); |
12287 | |
12288 | for (uint32_t i = 2; i < length; i++) |
12289 | { |
12290 | inherit_expression_dependencies(dst: ops[1], source: ops[i]); |
12291 | add_implied_read_expression(e&: expr, source: ops[i]); |
12292 | } |
12293 | |
12294 | // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, |
12295 | // we're not forwarded after all. |
12296 | if (expr.expression_dependencies.empty()) |
12297 | forwarded_temporaries.erase(x: ops[1]); |
12298 | |
12299 | break; |
12300 | } |
12301 | |
12302 | case OpStore: |
12303 | { |
12304 | auto *var = maybe_get<SPIRVariable>(id: ops[0]); |
12305 | |
12306 | if (var && var->statically_assigned) |
12307 | var->static_expression = ops[1]; |
12308 | else if (var && var->loop_variable && !var->loop_variable_enable) |
12309 | var->static_expression = ops[1]; |
12310 | else if (var && var->remapped_variable && var->static_expression) |
12311 | { |
12312 | // Skip the write. |
12313 | } |
12314 | else if (flattened_structs.count(x: ops[0])) |
12315 | { |
12316 | store_flattened_struct(lhs_id: ops[0], value: ops[1]); |
12317 | register_write(chain: ops[0]); |
12318 | } |
12319 | else |
12320 | { |
12321 | emit_store_statement(lhs_expression: ops[0], rhs_expression: ops[1]); |
12322 | } |
12323 | |
12324 | // Storing a pointer results in a variable pointer, so we must conservatively assume |
12325 | // we can write through it. |
12326 | if (expression_type(id: ops[1]).pointer) |
12327 | register_write(chain: ops[1]); |
12328 | break; |
12329 | } |
12330 | |
12331 | case OpArrayLength: |
12332 | { |
12333 | uint32_t result_type = ops[0]; |
12334 | uint32_t id = ops[1]; |
12335 | auto e = access_chain_internal(base: ops[2], indices: &ops[3], count: length - 3, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
12336 | if (has_decoration(id: ops[2], decoration: DecorationNonUniform)) |
12337 | convert_non_uniform_expression(expr&: e, ptr_id: ops[2]); |
12338 | set<SPIRExpression>(id, args: join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts&: e, ts: ".length())"), args&: result_type, |
12339 | args: true); |
12340 | break; |
12341 | } |
12342 | |
12343 | // Function calls |
12344 | case OpFunctionCall: |
12345 | { |
12346 | uint32_t result_type = ops[0]; |
12347 | uint32_t id = ops[1]; |
12348 | uint32_t func = ops[2]; |
12349 | const auto *arg = &ops[3]; |
12350 | length -= 3; |
12351 | |
12352 | auto &callee = get<SPIRFunction>(id: func); |
12353 | auto &return_type = get<SPIRType>(id: callee.return_type); |
12354 | bool pure = function_is_pure(func: callee); |
12355 | bool control_dependent = function_is_control_dependent(func: callee); |
12356 | |
12357 | bool callee_has_out_variables = false; |
12358 | bool emit_return_value_as_argument = false; |
12359 | |
12360 | // Invalidate out variables passed to functions since they can be OpStore'd to. |
12361 | for (uint32_t i = 0; i < length; i++) |
12362 | { |
12363 | if (callee.arguments[i].write_count) |
12364 | { |
12365 | register_call_out_argument(id: arg[i]); |
12366 | callee_has_out_variables = true; |
12367 | } |
12368 | |
12369 | flush_variable_declaration(id: arg[i]); |
12370 | } |
12371 | |
12372 | if (!return_type.array.empty() && !backend.can_return_array) |
12373 | { |
12374 | callee_has_out_variables = true; |
12375 | emit_return_value_as_argument = true; |
12376 | } |
12377 | |
12378 | if (!pure) |
12379 | register_impure_function_call(); |
12380 | |
12381 | string funexpr; |
12382 | SmallVector<string> arglist; |
12383 | funexpr += to_name(id: func) + "("; |
12384 | |
12385 | if (emit_return_value_as_argument) |
12386 | { |
12387 | statement(ts: type_to_glsl(type: return_type), ts: " ", ts: to_name(id), ts: type_to_array_glsl(type: return_type, variable_id: 0), ts: ";"); |
12388 | arglist.push_back(t: to_name(id)); |
12389 | } |
12390 | |
12391 | for (uint32_t i = 0; i < length; i++) |
12392 | { |
12393 | // Do not pass in separate images or samplers if we're remapping |
12394 | // to combined image samplers. |
12395 | if (skip_argument(id: arg[i])) |
12396 | continue; |
12397 | |
12398 | arglist.push_back(t: to_func_call_arg(callee.arguments[i], id: arg[i])); |
12399 | } |
12400 | |
12401 | for (auto &combined : callee.combined_parameters) |
12402 | { |
12403 | auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); |
12404 | auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); |
12405 | arglist.push_back(t: to_combined_image_sampler(image_id, samp_id: sampler_id)); |
12406 | } |
12407 | |
12408 | append_global_func_args(func: callee, index: length, arglist); |
12409 | |
12410 | funexpr += merge(list: arglist); |
12411 | funexpr += ")"; |
12412 | |
12413 | // Check for function call constraints. |
12414 | check_function_call_constraints(args: arg, length); |
12415 | |
12416 | if (return_type.basetype != SPIRType::Void) |
12417 | { |
12418 | // If the function actually writes to an out variable, |
12419 | // take the conservative route and do not forward. |
12420 | // The problem is that we might not read the function |
12421 | // result (and emit the function) before an out variable |
12422 | // is read (common case when return value is ignored! |
12423 | // In order to avoid start tracking invalid variables, |
12424 | // just avoid the forwarding problem altogether. |
12425 | bool forward = args_will_forward(id, args: arg, num_args: length, pure) && !callee_has_out_variables && pure && |
12426 | (forced_temporaries.find(x: id) == end(cont&: forced_temporaries)); |
12427 | |
12428 | if (emit_return_value_as_argument) |
12429 | { |
12430 | statement(ts&: funexpr, ts: ";"); |
12431 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
12432 | } |
12433 | else |
12434 | emit_op(result_type, result_id: id, rhs: funexpr, forwarding: forward); |
12435 | |
12436 | // Function calls are implicit loads from all variables in question. |
12437 | // Set dependencies for them. |
12438 | for (uint32_t i = 0; i < length; i++) |
12439 | register_read(expr: id, chain: arg[i], forwarded: forward); |
12440 | |
12441 | // If we're going to forward the temporary result, |
12442 | // put dependencies on every variable that must not change. |
12443 | if (forward) |
12444 | register_global_read_dependencies(func: callee, id); |
12445 | } |
12446 | else |
12447 | statement(ts&: funexpr, ts: ";"); |
12448 | |
12449 | if (control_dependent) |
12450 | register_control_dependent_expression(expr: id); |
12451 | |
12452 | break; |
12453 | } |
12454 | |
12455 | // Composite munging |
12456 | case OpCompositeConstruct: |
12457 | { |
12458 | uint32_t result_type = ops[0]; |
12459 | uint32_t id = ops[1]; |
12460 | const auto *const elems = &ops[2]; |
12461 | length -= 2; |
12462 | |
12463 | bool forward = true; |
12464 | for (uint32_t i = 0; i < length; i++) |
12465 | forward = forward && should_forward(id: elems[i]); |
12466 | |
12467 | auto &out_type = get<SPIRType>(id: result_type); |
12468 | auto *in_type = length > 0 ? &expression_type(id: elems[0]) : nullptr; |
12469 | |
12470 | // Only splat if we have vector constructors. |
12471 | // Arrays and structs must be initialized properly in full. |
12472 | bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; |
12473 | |
12474 | bool splat = false; |
12475 | bool swizzle_splat = false; |
12476 | |
12477 | if (in_type) |
12478 | { |
12479 | splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; |
12480 | swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; |
12481 | |
12482 | if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(type: *in_type)) |
12483 | { |
12484 | // Cannot swizzle literal integers as a special case. |
12485 | swizzle_splat = false; |
12486 | } |
12487 | } |
12488 | |
12489 | if (splat || swizzle_splat) |
12490 | { |
12491 | uint32_t input = elems[0]; |
12492 | for (uint32_t i = 0; i < length; i++) |
12493 | { |
12494 | if (input != elems[i]) |
12495 | { |
12496 | splat = false; |
12497 | swizzle_splat = false; |
12498 | } |
12499 | } |
12500 | } |
12501 | |
12502 | if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) |
12503 | forward = false; |
12504 | if (!out_type.array.empty() && !backend.can_declare_arrays_inline) |
12505 | forward = false; |
12506 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
12507 | forward = false; |
12508 | |
12509 | string constructor_op; |
12510 | if (backend.use_initializer_list && composite) |
12511 | { |
12512 | bool needs_trailing_tracket = false; |
12513 | // Only use this path if we are building composites. |
12514 | // This path cannot be used for arithmetic. |
12515 | if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) |
12516 | constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)); |
12517 | else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) |
12518 | { |
12519 | // MSL path. Array constructor is baked into type here, do not use _constructor variant. |
12520 | constructor_op += type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "("; |
12521 | needs_trailing_tracket = true; |
12522 | } |
12523 | constructor_op += "{ "; |
12524 | |
12525 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
12526 | constructor_op += "0"; |
12527 | else if (splat) |
12528 | constructor_op += to_unpacked_expression(id: elems[0]); |
12529 | else |
12530 | constructor_op += build_composite_combiner(return_type: result_type, elems, length); |
12531 | constructor_op += " }"; |
12532 | if (needs_trailing_tracket) |
12533 | constructor_op += ")"; |
12534 | } |
12535 | else if (swizzle_splat && !composite) |
12536 | { |
12537 | constructor_op = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 1, expr: to_unpacked_expression(id: elems[0])); |
12538 | } |
12539 | else |
12540 | { |
12541 | constructor_op = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)) + "("; |
12542 | if (type_is_empty(type: out_type) && !backend.supports_empty_struct) |
12543 | constructor_op += "0"; |
12544 | else if (splat) |
12545 | constructor_op += to_unpacked_expression(id: elems[0]); |
12546 | else |
12547 | constructor_op += build_composite_combiner(return_type: result_type, elems, length); |
12548 | constructor_op += ")"; |
12549 | } |
12550 | |
12551 | if (!constructor_op.empty()) |
12552 | { |
12553 | emit_op(result_type, result_id: id, rhs: constructor_op, forwarding: forward); |
12554 | for (uint32_t i = 0; i < length; i++) |
12555 | inherit_expression_dependencies(dst: id, source: elems[i]); |
12556 | } |
12557 | break; |
12558 | } |
12559 | |
12560 | case OpVectorInsertDynamic: |
12561 | { |
12562 | uint32_t result_type = ops[0]; |
12563 | uint32_t id = ops[1]; |
12564 | uint32_t vec = ops[2]; |
12565 | uint32_t comp = ops[3]; |
12566 | uint32_t index = ops[4]; |
12567 | |
12568 | flush_variable_declaration(id: vec); |
12569 | |
12570 | // Make a copy, then use access chain to store the variable. |
12571 | statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: vec), ts: ";"); |
12572 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
12573 | auto chain = access_chain_internal(base: id, indices: &index, count: 1, flags: 0, meta: nullptr); |
12574 | statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: comp), ts: ";"); |
12575 | break; |
12576 | } |
12577 | |
12578 | case OpVectorExtractDynamic: |
12579 | { |
12580 | uint32_t result_type = ops[0]; |
12581 | uint32_t id = ops[1]; |
12582 | |
12583 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: 1, flags: 0, meta: nullptr); |
12584 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2])); |
12585 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12586 | inherit_expression_dependencies(dst: id, source: ops[3]); |
12587 | break; |
12588 | } |
12589 | |
12590 | case OpCompositeExtract: |
12591 | { |
12592 | uint32_t result_type = ops[0]; |
12593 | uint32_t id = ops[1]; |
12594 | length -= 3; |
12595 | |
12596 | auto &type = get<SPIRType>(id: result_type); |
12597 | |
12598 | // We can only split the expression here if our expression is forwarded as a temporary. |
12599 | bool allow_base_expression = forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
12600 | |
12601 | // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. |
12602 | auto &composite_type = expression_type(id: ops[2]); |
12603 | bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty(); |
12604 | if (composite_type_is_complex) |
12605 | allow_base_expression = false; |
12606 | |
12607 | // Packed expressions or physical ID mapped expressions cannot be split up. |
12608 | if (has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypePacked) || |
12609 | has_extended_decoration(id: ops[2], decoration: SPIRVCrossDecorationPhysicalTypeID)) |
12610 | allow_base_expression = false; |
12611 | |
12612 | // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern |
12613 | // into the base expression. |
12614 | if (is_non_native_row_major_matrix(id: ops[2])) |
12615 | allow_base_expression = false; |
12616 | |
12617 | AccessChainMeta meta; |
12618 | SPIRExpression *e = nullptr; |
12619 | auto *c = maybe_get<SPIRConstant>(id: ops[2]); |
12620 | |
12621 | if (c && !c->specialization && !composite_type_is_complex) |
12622 | { |
12623 | auto expr = to_extract_constant_composite_expression(result_type, c: *c, chain: ops + 3, length); |
12624 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: true); |
12625 | } |
12626 | else if (allow_base_expression && should_forward(id: ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) |
12627 | { |
12628 | // Only apply this optimization if result is scalar. |
12629 | |
12630 | // We want to split the access chain from the base. |
12631 | // This is so we can later combine different CompositeExtract results |
12632 | // with CompositeConstruct without emitting code like |
12633 | // |
12634 | // vec3 temp = texture(...).xyz |
12635 | // vec4(temp.x, temp.y, temp.z, 1.0). |
12636 | // |
12637 | // when we actually wanted to emit this |
12638 | // vec4(texture(...).xyz, 1.0). |
12639 | // |
12640 | // Including the base will prevent this and would trigger multiple reads |
12641 | // from expression causing it to be forced to an actual temporary in GLSL. |
12642 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length, |
12643 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | |
12644 | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta); |
12645 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: true, suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2])); |
12646 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12647 | e->base_expression = ops[2]; |
12648 | |
12649 | if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) |
12650 | set_decoration(id: ops[1], decoration: DecorationRelaxedPrecision); |
12651 | } |
12652 | else |
12653 | { |
12654 | auto expr = access_chain_internal(base: ops[2], indices: &ops[3], count: length, |
12655 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, meta: &meta); |
12656 | e = &emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2]), suppress_usage_tracking: should_suppress_usage_tracking(id: ops[2])); |
12657 | inherit_expression_dependencies(dst: id, source: ops[2]); |
12658 | } |
12659 | |
12660 | // Pass through some meta information to the loaded expression. |
12661 | // We can still end up loading a buffer type to a variable, then CompositeExtract from it |
12662 | // instead of loading everything through an access chain. |
12663 | e->need_transpose = meta.need_transpose; |
12664 | if (meta.storage_is_packed) |
12665 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
12666 | if (meta.storage_physical_type != 0) |
12667 | set_extended_decoration(id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: meta.storage_physical_type); |
12668 | if (meta.storage_is_invariant) |
12669 | set_decoration(id, decoration: DecorationInvariant); |
12670 | |
12671 | break; |
12672 | } |
12673 | |
12674 | case OpCompositeInsert: |
12675 | { |
12676 | uint32_t result_type = ops[0]; |
12677 | uint32_t id = ops[1]; |
12678 | uint32_t obj = ops[2]; |
12679 | uint32_t composite = ops[3]; |
12680 | const auto *elems = &ops[4]; |
12681 | length -= 4; |
12682 | |
12683 | flush_variable_declaration(id: composite); |
12684 | |
12685 | // CompositeInsert requires a copy + modification, but this is very awkward code in HLL. |
12686 | // Speculate that the input composite is no longer used, and we can modify it in-place. |
12687 | // There are various scenarios where this is not possible to satisfy. |
12688 | bool can_modify_in_place = true; |
12689 | forced_temporaries.insert(x: id); |
12690 | |
12691 | // Cannot safely RMW PHI variables since they have no way to be invalidated, |
12692 | // forcing temporaries is not going to help. |
12693 | // This is similar for Constant and Undef inputs. |
12694 | // The only safe thing to RMW is SPIRExpression. |
12695 | // If the expression has already been used (i.e. used in a continue block), we have to keep using |
12696 | // that loop variable, since we won't be able to override the expression after the fact. |
12697 | // If the composite is hoisted, we might never be able to properly invalidate any usage |
12698 | // of that composite in a subsequent loop iteration. |
12699 | if (invalid_expressions.count(x: composite) || |
12700 | block_composite_insert_overwrite.count(x: composite) || |
12701 | hoisted_temporaries.count(x: id) || hoisted_temporaries.count(x: composite) || |
12702 | maybe_get<SPIRExpression>(id: composite) == nullptr) |
12703 | { |
12704 | can_modify_in_place = false; |
12705 | } |
12706 | else if (backend.requires_relaxed_precision_analysis && |
12707 | has_decoration(id: composite, decoration: DecorationRelaxedPrecision) != |
12708 | has_decoration(id, decoration: DecorationRelaxedPrecision) && |
12709 | get<SPIRType>(id: result_type).basetype != SPIRType::Struct) |
12710 | { |
12711 | // Similarly, if precision does not match for input and output, |
12712 | // we cannot alias them. If we write a composite into a relaxed precision |
12713 | // ID, we might get a false truncation. |
12714 | can_modify_in_place = false; |
12715 | } |
12716 | |
12717 | if (can_modify_in_place) |
12718 | { |
12719 | // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place. |
12720 | if (!forced_temporaries.count(x: composite)) |
12721 | force_temporary_and_recompile(id: composite); |
12722 | |
12723 | auto chain = access_chain_internal(base: composite, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
12724 | statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";"); |
12725 | set<SPIRExpression>(id, args: to_expression(id: composite), args&: result_type, args: true); |
12726 | invalid_expressions.insert(x: composite); |
12727 | composite_insert_overwritten.insert(x: composite); |
12728 | } |
12729 | else |
12730 | { |
12731 | if (maybe_get<SPIRUndef>(id: composite) != nullptr) |
12732 | { |
12733 | emit_uninitialized_temporary_expression(type: result_type, id); |
12734 | } |
12735 | else |
12736 | { |
12737 | // Make a copy, then use access chain to store the variable. |
12738 | statement(ts: declare_temporary(result_type, result_id: id), ts: to_expression(id: composite), ts: ";"); |
12739 | set<SPIRExpression>(id, args: to_name(id), args&: result_type, args: true); |
12740 | } |
12741 | |
12742 | auto chain = access_chain_internal(base: id, indices: elems, count: length, flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: nullptr); |
12743 | statement(ts&: chain, ts: " = ", ts: to_unpacked_expression(id: obj), ts: ";"); |
12744 | } |
12745 | |
12746 | break; |
12747 | } |
12748 | |
12749 | case OpCopyMemory: |
12750 | { |
12751 | uint32_t lhs = ops[0]; |
12752 | uint32_t rhs = ops[1]; |
12753 | if (lhs != rhs) |
12754 | { |
12755 | uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET]; |
12756 | if (!tmp_id) |
12757 | tmp_id = ir.increase_bound_by(count: 1); |
12758 | uint32_t tmp_type_id = expression_type(id: rhs).parent_type; |
12759 | |
12760 | EmbeddedInstruction fake_load, fake_store; |
12761 | fake_load.op = OpLoad; |
12762 | fake_load.length = 3; |
12763 | fake_load.ops.push_back(t: tmp_type_id); |
12764 | fake_load.ops.push_back(t: tmp_id); |
12765 | fake_load.ops.push_back(t: rhs); |
12766 | |
12767 | fake_store.op = OpStore; |
12768 | fake_store.length = 2; |
12769 | fake_store.ops.push_back(t: lhs); |
12770 | fake_store.ops.push_back(t: tmp_id); |
12771 | |
12772 | // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible. |
12773 | // Synthesize a fake Load and Store pair for CopyMemory. |
12774 | emit_instruction(instruction: fake_load); |
12775 | emit_instruction(instruction: fake_store); |
12776 | } |
12777 | break; |
12778 | } |
12779 | |
12780 | case OpCopyLogical: |
12781 | { |
12782 | // This is used for copying object of different types, arrays and structs. |
12783 | // We need to unroll the copy, element-by-element. |
12784 | uint32_t result_type = ops[0]; |
12785 | uint32_t id = ops[1]; |
12786 | uint32_t rhs = ops[2]; |
12787 | |
12788 | emit_uninitialized_temporary_expression(type: result_type, id); |
12789 | emit_copy_logical_type(lhs_id: id, lhs_type_id: result_type, rhs_id: rhs, rhs_type_id: expression_type_id(id: rhs), chain: {}); |
12790 | break; |
12791 | } |
12792 | |
12793 | case OpCopyObject: |
12794 | { |
12795 | uint32_t result_type = ops[0]; |
12796 | uint32_t id = ops[1]; |
12797 | uint32_t rhs = ops[2]; |
12798 | bool pointer = get<SPIRType>(id: result_type).pointer; |
12799 | |
12800 | auto *chain = maybe_get<SPIRAccessChain>(id: rhs); |
12801 | auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(id: rhs); |
12802 | if (chain) |
12803 | { |
12804 | // Cannot lower to a SPIRExpression, just copy the object. |
12805 | auto &e = set<SPIRAccessChain>(id, args&: *chain); |
12806 | e.self = id; |
12807 | } |
12808 | else if (imgsamp) |
12809 | { |
12810 | // Cannot lower to a SPIRExpression, just copy the object. |
12811 | // GLSL does not currently use this type and will never get here, but MSL does. |
12812 | // Handled here instead of CompilerMSL for better integration and general handling, |
12813 | // and in case GLSL or other subclasses require it in the future. |
12814 | auto &e = set<SPIRCombinedImageSampler>(id, args&: *imgsamp); |
12815 | e.self = id; |
12816 | } |
12817 | else if (expression_is_lvalue(id: rhs) && !pointer) |
12818 | { |
12819 | // Need a copy. |
12820 | // For pointer types, we copy the pointer itself. |
12821 | emit_op(result_type, result_id: id, rhs: to_unpacked_expression(id: rhs), forwarding: false); |
12822 | } |
12823 | else |
12824 | { |
12825 | // RHS expression is immutable, so just forward it. |
12826 | // Copying these things really make no sense, but |
12827 | // seems to be allowed anyways. |
12828 | auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: rhs), forwarding: true, suppress_usage_tracking: true); |
12829 | if (pointer) |
12830 | { |
12831 | auto *var = maybe_get_backing_variable(chain: rhs); |
12832 | e.loaded_from = var ? var->self : ID(0); |
12833 | } |
12834 | |
12835 | // If we're copying an access chain, need to inherit the read expressions. |
12836 | auto *rhs_expr = maybe_get<SPIRExpression>(id: rhs); |
12837 | if (rhs_expr) |
12838 | { |
12839 | e.implied_read_expressions = rhs_expr->implied_read_expressions; |
12840 | e.expression_dependencies = rhs_expr->expression_dependencies; |
12841 | } |
12842 | } |
12843 | break; |
12844 | } |
12845 | |
12846 | case OpVectorShuffle: |
12847 | { |
12848 | uint32_t result_type = ops[0]; |
12849 | uint32_t id = ops[1]; |
12850 | uint32_t vec0 = ops[2]; |
12851 | uint32_t vec1 = ops[3]; |
12852 | const auto *elems = &ops[4]; |
12853 | length -= 4; |
12854 | |
12855 | auto &type0 = expression_type(id: vec0); |
12856 | |
12857 | // If we have the undefined swizzle index -1, we need to swizzle in undefined data, |
12858 | // or in our case, T(0). |
12859 | bool shuffle = false; |
12860 | for (uint32_t i = 0; i < length; i++) |
12861 | if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) |
12862 | shuffle = true; |
12863 | |
12864 | // Cannot use swizzles with packed expressions, force shuffle path. |
12865 | if (!shuffle && has_extended_decoration(id: vec0, decoration: SPIRVCrossDecorationPhysicalTypePacked)) |
12866 | shuffle = true; |
12867 | |
12868 | string expr; |
12869 | bool should_fwd, trivial_forward; |
12870 | |
12871 | if (shuffle) |
12872 | { |
12873 | should_fwd = should_forward(id: vec0) && should_forward(id: vec1); |
12874 | trivial_forward = should_suppress_usage_tracking(id: vec0) && should_suppress_usage_tracking(id: vec1); |
12875 | |
12876 | // Constructor style and shuffling from two different vectors. |
12877 | SmallVector<string> args; |
12878 | for (uint32_t i = 0; i < length; i++) |
12879 | { |
12880 | if (elems[i] == 0xffffffffu) |
12881 | { |
12882 | // Use a constant 0 here. |
12883 | // We could use the first component or similar, but then we risk propagating |
12884 | // a value we might not need, and bog down codegen. |
12885 | SPIRConstant c; |
12886 | c.constant_type = type0.parent_type; |
12887 | assert(type0.parent_type != ID(0)); |
12888 | args.push_back(t: constant_expression(c)); |
12889 | } |
12890 | else if (elems[i] >= type0.vecsize) |
12891 | args.push_back(t: to_extract_component_expression(id: vec1, index: elems[i] - type0.vecsize)); |
12892 | else |
12893 | args.push_back(t: to_extract_component_expression(id: vec0, index: elems[i])); |
12894 | } |
12895 | expr += join(ts: type_to_glsl_constructor(type: get<SPIRType>(id: result_type)), ts: "(", ts: merge(list: args), ts: ")"); |
12896 | } |
12897 | else |
12898 | { |
12899 | should_fwd = should_forward(id: vec0); |
12900 | trivial_forward = should_suppress_usage_tracking(id: vec0); |
12901 | |
12902 | // We only source from first vector, so can use swizzle. |
12903 | // If the vector is packed, unpack it before applying a swizzle (needed for MSL) |
12904 | expr += to_enclosed_unpacked_expression(id: vec0); |
12905 | expr += "."; |
12906 | for (uint32_t i = 0; i < length; i++) |
12907 | { |
12908 | assert(elems[i] != 0xffffffffu); |
12909 | expr += index_to_swizzle(index: elems[i]); |
12910 | } |
12911 | |
12912 | if (backend.swizzle_is_function && length > 1) |
12913 | expr += "()"; |
12914 | } |
12915 | |
12916 | // A shuffle is trivial in that it doesn't actually *do* anything. |
12917 | // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. |
12918 | |
12919 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_fwd, suppress_usage_tracking: trivial_forward); |
12920 | |
12921 | inherit_expression_dependencies(dst: id, source: vec0); |
12922 | if (vec0 != vec1) |
12923 | inherit_expression_dependencies(dst: id, source: vec1); |
12924 | break; |
12925 | } |
12926 | |
12927 | // ALU |
12928 | case OpIsNan: |
12929 | if (!is_legacy()) |
12930 | GLSL_UFOP(isnan); |
12931 | else |
12932 | { |
12933 | // Check if the number doesn't equal itself |
12934 | auto &type = get<SPIRType>(id: ops[0]); |
12935 | if (type.vecsize > 1) |
12936 | emit_binary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "notEqual"); |
12937 | else |
12938 | emit_binary_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[2], op: "!="); |
12939 | } |
12940 | break; |
12941 | |
12942 | case OpIsInf: |
12943 | if (!is_legacy()) |
12944 | GLSL_UFOP(isinf); |
12945 | else |
12946 | { |
12947 | // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0 |
12948 | // This is more reliable than checking if product with zero is NaN |
12949 | uint32_t result_type = ops[0]; |
12950 | uint32_t result_id = ops[1]; |
12951 | uint32_t operand = ops[2]; |
12952 | |
12953 | auto &type = get<SPIRType>(id: result_type); |
12954 | std::string expr; |
12955 | if (type.vecsize > 1) |
12956 | { |
12957 | expr = type_to_glsl_constructor(type); |
12958 | expr += '('; |
12959 | for (uint32_t i = 0; i < type.vecsize; i++) |
12960 | { |
12961 | auto comp = to_extract_component_expression(id: operand, index: i); |
12962 | expr += join(ts&: comp, ts: " != 0.0 && 2.0 * ", ts&: comp, ts: " == ", ts&: comp); |
12963 | |
12964 | if (i + 1 < type.vecsize) |
12965 | expr += ", "; |
12966 | } |
12967 | expr += ')'; |
12968 | } |
12969 | else |
12970 | { |
12971 | // Register an extra read to force writing out a temporary |
12972 | auto oper = to_enclosed_expression(id: operand); |
12973 | track_expression_read(id: operand); |
12974 | expr += join(ts&: oper, ts: " != 0.0 && 2.0 * ", ts&: oper, ts: " == ", ts&: oper); |
12975 | } |
12976 | emit_op(result_type, result_id, rhs: expr, forwarding: should_forward(id: operand)); |
12977 | |
12978 | inherit_expression_dependencies(dst: result_id, source: operand); |
12979 | } |
12980 | break; |
12981 | |
12982 | case OpSNegate: |
12983 | if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0]) |
12984 | GLSL_UOP_CAST(-); |
12985 | else |
12986 | GLSL_UOP(-); |
12987 | break; |
12988 | |
12989 | case OpFNegate: |
12990 | GLSL_UOP(-); |
12991 | break; |
12992 | |
12993 | case OpIAdd: |
12994 | { |
12995 | // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. |
12996 | auto type = get<SPIRType>(id: ops[0]).basetype; |
12997 | GLSL_BOP_CAST(+, type); |
12998 | break; |
12999 | } |
13000 | |
13001 | case OpFAdd: |
13002 | GLSL_BOP(+); |
13003 | break; |
13004 | |
13005 | case OpISub: |
13006 | { |
13007 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13008 | GLSL_BOP_CAST(-, type); |
13009 | break; |
13010 | } |
13011 | |
13012 | case OpFSub: |
13013 | GLSL_BOP(-); |
13014 | break; |
13015 | |
13016 | case OpIMul: |
13017 | { |
13018 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13019 | GLSL_BOP_CAST(*, type); |
13020 | break; |
13021 | } |
13022 | |
13023 | case OpVectorTimesMatrix: |
13024 | case OpMatrixTimesVector: |
13025 | { |
13026 | // If the matrix needs transpose, just flip the multiply order. |
13027 | auto *e = maybe_get<SPIRExpression>(id: ops[opcode == OpMatrixTimesVector ? 2 : 3]); |
13028 | if (e && e->need_transpose) |
13029 | { |
13030 | e->need_transpose = false; |
13031 | string expr; |
13032 | |
13033 | if (opcode == OpMatrixTimesVector) |
13034 | expr = join(ts: to_enclosed_unpacked_expression(id: ops[3]), ts: " * ", |
13035 | ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2]))); |
13036 | else |
13037 | expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ", |
13038 | ts: to_enclosed_unpacked_expression(id: ops[2])); |
13039 | |
13040 | bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]); |
13041 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward); |
13042 | e->need_transpose = true; |
13043 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
13044 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
13045 | } |
13046 | else |
13047 | GLSL_BOP(*); |
13048 | break; |
13049 | } |
13050 | |
13051 | case OpMatrixTimesMatrix: |
13052 | { |
13053 | auto *a = maybe_get<SPIRExpression>(id: ops[2]); |
13054 | auto *b = maybe_get<SPIRExpression>(id: ops[3]); |
13055 | |
13056 | // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. |
13057 | // a^T * b^T = (b * a)^T. |
13058 | if (a && b && a->need_transpose && b->need_transpose) |
13059 | { |
13060 | a->need_transpose = false; |
13061 | b->need_transpose = false; |
13062 | auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[3])), ts: " * ", |
13063 | ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2]))); |
13064 | bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]); |
13065 | auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward); |
13066 | e.need_transpose = true; |
13067 | a->need_transpose = true; |
13068 | b->need_transpose = true; |
13069 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
13070 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
13071 | } |
13072 | else |
13073 | GLSL_BOP(*); |
13074 | |
13075 | break; |
13076 | } |
13077 | |
13078 | case OpMatrixTimesScalar: |
13079 | { |
13080 | auto *a = maybe_get<SPIRExpression>(id: ops[2]); |
13081 | |
13082 | // If the matrix need transpose, just mark the result as needing so. |
13083 | if (a && a->need_transpose) |
13084 | { |
13085 | a->need_transpose = false; |
13086 | auto expr = join(ts: enclose_expression(expr: to_unpacked_row_major_matrix_expression(id: ops[2])), ts: " * ", |
13087 | ts: to_enclosed_unpacked_expression(id: ops[3])); |
13088 | bool forward = should_forward(id: ops[2]) && should_forward(id: ops[3]); |
13089 | auto &e = emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: forward); |
13090 | e.need_transpose = true; |
13091 | a->need_transpose = true; |
13092 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
13093 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
13094 | } |
13095 | else |
13096 | GLSL_BOP(*); |
13097 | break; |
13098 | } |
13099 | |
13100 | case OpFMul: |
13101 | case OpVectorTimesScalar: |
13102 | GLSL_BOP(*); |
13103 | break; |
13104 | |
13105 | case OpOuterProduct: |
13106 | if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 |
13107 | { |
13108 | uint32_t result_type = ops[0]; |
13109 | uint32_t id = ops[1]; |
13110 | uint32_t a = ops[2]; |
13111 | uint32_t b = ops[3]; |
13112 | |
13113 | auto &type = get<SPIRType>(id: result_type); |
13114 | string expr = type_to_glsl_constructor(type); |
13115 | expr += "("; |
13116 | for (uint32_t col = 0; col < type.columns; col++) |
13117 | { |
13118 | expr += to_enclosed_expression(id: a); |
13119 | expr += " * "; |
13120 | expr += to_extract_component_expression(id: b, index: col); |
13121 | if (col + 1 < type.columns) |
13122 | expr += ", "; |
13123 | } |
13124 | expr += ")"; |
13125 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: a) && should_forward(id: b)); |
13126 | inherit_expression_dependencies(dst: id, source: a); |
13127 | inherit_expression_dependencies(dst: id, source: b); |
13128 | } |
13129 | else |
13130 | GLSL_BFOP(outerProduct); |
13131 | break; |
13132 | |
13133 | case OpDot: |
13134 | GLSL_BFOP(dot); |
13135 | break; |
13136 | |
13137 | case OpTranspose: |
13138 | if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 |
13139 | { |
13140 | // transpose() is not available, so instead, flip need_transpose, |
13141 | // which can later be turned into an emulated transpose op by |
13142 | // convert_row_major_matrix(), if necessary. |
13143 | uint32_t result_type = ops[0]; |
13144 | uint32_t result_id = ops[1]; |
13145 | uint32_t input = ops[2]; |
13146 | |
13147 | // Force need_transpose to false temporarily to prevent |
13148 | // to_expression() from doing the transpose. |
13149 | bool need_transpose = false; |
13150 | auto *input_e = maybe_get<SPIRExpression>(id: input); |
13151 | if (input_e) |
13152 | swap(a&: need_transpose, b&: input_e->need_transpose); |
13153 | |
13154 | bool forward = should_forward(id: input); |
13155 | auto &e = emit_op(result_type, result_id, rhs: to_expression(id: input), forwarding: forward); |
13156 | e.need_transpose = !need_transpose; |
13157 | |
13158 | // Restore the old need_transpose flag. |
13159 | if (input_e) |
13160 | input_e->need_transpose = need_transpose; |
13161 | } |
13162 | else |
13163 | GLSL_UFOP(transpose); |
13164 | break; |
13165 | |
13166 | case OpSRem: |
13167 | { |
13168 | uint32_t result_type = ops[0]; |
13169 | uint32_t result_id = ops[1]; |
13170 | uint32_t op0 = ops[2]; |
13171 | uint32_t op1 = ops[3]; |
13172 | |
13173 | // Needs special handling. |
13174 | bool forward = should_forward(id: op0) && should_forward(id: op1); |
13175 | auto expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "(", |
13176 | ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")"); |
13177 | |
13178 | if (implicit_integer_promotion) |
13179 | expr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: '(', ts&: expr, ts: ')'); |
13180 | |
13181 | emit_op(result_type, result_id, rhs: expr, forwarding: forward); |
13182 | inherit_expression_dependencies(dst: result_id, source: op0); |
13183 | inherit_expression_dependencies(dst: result_id, source: op1); |
13184 | break; |
13185 | } |
13186 | |
13187 | case OpSDiv: |
13188 | GLSL_BOP_CAST(/, int_type); |
13189 | break; |
13190 | |
13191 | case OpUDiv: |
13192 | GLSL_BOP_CAST(/, uint_type); |
13193 | break; |
13194 | |
13195 | case OpIAddCarry: |
13196 | case OpISubBorrow: |
13197 | { |
13198 | if (options.es && options.version < 310) |
13199 | SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); |
13200 | else if (!options.es && options.version < 400) |
13201 | SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); |
13202 | |
13203 | uint32_t result_type = ops[0]; |
13204 | uint32_t result_id = ops[1]; |
13205 | uint32_t op0 = ops[2]; |
13206 | uint32_t op1 = ops[3]; |
13207 | auto &type = get<SPIRType>(id: result_type); |
13208 | emit_uninitialized_temporary_expression(type: result_type, id: result_id); |
13209 | const char *op = opcode == OpIAddCarry ? "uaddCarry": "usubBorrow"; |
13210 | |
13211 | statement(ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: " = ", ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ", |
13212 | ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 1), ts: ");"); |
13213 | break; |
13214 | } |
13215 | |
13216 | case OpUMulExtended: |
13217 | case OpSMulExtended: |
13218 | { |
13219 | if (options.es && options.version < 310) |
13220 | SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); |
13221 | else if (!options.es && options.version < 400) |
13222 | SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); |
13223 | |
13224 | uint32_t result_type = ops[0]; |
13225 | uint32_t result_id = ops[1]; |
13226 | uint32_t op0 = ops[2]; |
13227 | uint32_t op1 = ops[3]; |
13228 | auto &type = get<SPIRType>(id: result_type); |
13229 | emit_uninitialized_temporary_expression(type: result_type, id: result_id); |
13230 | const char *op = opcode == OpUMulExtended ? "umulExtended": "imulExtended"; |
13231 | |
13232 | statement(ts&: op, ts: "(", ts: to_expression(id: op0), ts: ", ", ts: to_expression(id: op1), ts: ", ", ts: to_expression(id: result_id), ts: ".", |
13233 | ts: to_member_name(type, index: 1), ts: ", ", ts: to_expression(id: result_id), ts: ".", ts: to_member_name(type, index: 0), ts: ");"); |
13234 | break; |
13235 | } |
13236 | |
13237 | case OpFDiv: |
13238 | GLSL_BOP(/); |
13239 | break; |
13240 | |
13241 | case OpShiftRightLogical: |
13242 | GLSL_BOP_CAST(>>, uint_type); |
13243 | break; |
13244 | |
13245 | case OpShiftRightArithmetic: |
13246 | GLSL_BOP_CAST(>>, int_type); |
13247 | break; |
13248 | |
13249 | case OpShiftLeftLogical: |
13250 | { |
13251 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13252 | GLSL_BOP_CAST(<<, type); |
13253 | break; |
13254 | } |
13255 | |
13256 | case OpBitwiseOr: |
13257 | { |
13258 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13259 | GLSL_BOP_CAST(|, type); |
13260 | break; |
13261 | } |
13262 | |
13263 | case OpBitwiseXor: |
13264 | { |
13265 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13266 | GLSL_BOP_CAST(^, type); |
13267 | break; |
13268 | } |
13269 | |
13270 | case OpBitwiseAnd: |
13271 | { |
13272 | auto type = get<SPIRType>(id: ops[0]).basetype; |
13273 | GLSL_BOP_CAST(&, type); |
13274 | break; |
13275 | } |
13276 | |
13277 | case OpNot: |
13278 | if (implicit_integer_promotion || expression_type_id(id: ops[2]) != ops[0]) |
13279 | GLSL_UOP_CAST(~); |
13280 | else |
13281 | GLSL_UOP(~); |
13282 | break; |
13283 | |
13284 | case OpUMod: |
13285 | GLSL_BOP_CAST(%, uint_type); |
13286 | break; |
13287 | |
13288 | case OpSMod: |
13289 | GLSL_BOP_CAST(%, int_type); |
13290 | break; |
13291 | |
13292 | case OpFMod: |
13293 | GLSL_BFOP(mod); |
13294 | break; |
13295 | |
13296 | case OpFRem: |
13297 | { |
13298 | uint32_t result_type = ops[0]; |
13299 | uint32_t result_id = ops[1]; |
13300 | uint32_t op0 = ops[2]; |
13301 | uint32_t op1 = ops[3]; |
13302 | |
13303 | // Needs special handling. |
13304 | bool forward = should_forward(id: op0) && should_forward(id: op1); |
13305 | std::string expr; |
13306 | if (!is_legacy()) |
13307 | { |
13308 | expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", ts: "trunc(", |
13309 | ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: ")"); |
13310 | } |
13311 | else |
13312 | { |
13313 | // Legacy GLSL has no trunc, emulate by casting to int and back |
13314 | auto &op0_type = expression_type(id: op0); |
13315 | auto via_type = op0_type; |
13316 | via_type.basetype = SPIRType::Int; |
13317 | expr = join(ts: to_enclosed_expression(id: op0), ts: " - ", ts: to_enclosed_expression(id: op1), ts: " * ", |
13318 | ts: type_to_glsl(type: op0_type), ts: "(", ts: type_to_glsl(type: via_type), ts: "(", |
13319 | ts: to_enclosed_expression(id: op0), ts: " / ", ts: to_enclosed_expression(id: op1), ts: "))"); |
13320 | } |
13321 | |
13322 | emit_op(result_type, result_id, rhs: expr, forwarding: forward); |
13323 | inherit_expression_dependencies(dst: result_id, source: op0); |
13324 | inherit_expression_dependencies(dst: result_id, source: op1); |
13325 | break; |
13326 | } |
13327 | |
13328 | // Relational |
13329 | case OpAny: |
13330 | GLSL_UFOP(any); |
13331 | break; |
13332 | |
13333 | case OpAll: |
13334 | GLSL_UFOP(all); |
13335 | break; |
13336 | |
13337 | case OpSelect: |
13338 | emit_mix_op(result_type: ops[0], id: ops[1], left: ops[4], right: ops[3], lerp: ops[2]); |
13339 | break; |
13340 | |
13341 | case OpLogicalOr: |
13342 | { |
13343 | // No vector variant in GLSL for logical OR. |
13344 | auto result_type = ops[0]; |
13345 | auto id = ops[1]; |
13346 | auto &type = get<SPIRType>(id: result_type); |
13347 | |
13348 | if (type.vecsize > 1) |
13349 | emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "||", negate: false, expected_type: SPIRType::Unknown); |
13350 | else |
13351 | GLSL_BOP(||); |
13352 | break; |
13353 | } |
13354 | |
13355 | case OpLogicalAnd: |
13356 | { |
13357 | // No vector variant in GLSL for logical AND. |
13358 | auto result_type = ops[0]; |
13359 | auto id = ops[1]; |
13360 | auto &type = get<SPIRType>(id: result_type); |
13361 | |
13362 | if (type.vecsize > 1) |
13363 | emit_unrolled_binary_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "&&", negate: false, expected_type: SPIRType::Unknown); |
13364 | else |
13365 | GLSL_BOP(&&); |
13366 | break; |
13367 | } |
13368 | |
13369 | case OpLogicalNot: |
13370 | { |
13371 | auto &type = get<SPIRType>(id: ops[0]); |
13372 | if (type.vecsize > 1) |
13373 | GLSL_UFOP(not ); |
13374 | else |
13375 | GLSL_UOP(!); |
13376 | break; |
13377 | } |
13378 | |
13379 | case OpIEqual: |
13380 | { |
13381 | if (expression_type(id: ops[2]).vecsize > 1) |
13382 | GLSL_BFOP_CAST(equal, int_type); |
13383 | else |
13384 | GLSL_BOP_CAST(==, int_type); |
13385 | break; |
13386 | } |
13387 | |
13388 | case OpLogicalEqual: |
13389 | case OpFOrdEqual: |
13390 | { |
13391 | if (expression_type(id: ops[2]).vecsize > 1) |
13392 | GLSL_BFOP(equal); |
13393 | else |
13394 | GLSL_BOP(==); |
13395 | break; |
13396 | } |
13397 | |
13398 | case OpINotEqual: |
13399 | { |
13400 | if (expression_type(id: ops[2]).vecsize > 1) |
13401 | GLSL_BFOP_CAST(notEqual, int_type); |
13402 | else |
13403 | GLSL_BOP_CAST(!=, int_type); |
13404 | break; |
13405 | } |
13406 | |
13407 | case OpLogicalNotEqual: |
13408 | case OpFOrdNotEqual: |
13409 | case OpFUnordNotEqual: |
13410 | { |
13411 | // GLSL is fuzzy on what to do with ordered vs unordered not equal. |
13412 | // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, |
13413 | // but this means we have no easy way of implementing ordered not equal. |
13414 | if (expression_type(id: ops[2]).vecsize > 1) |
13415 | GLSL_BFOP(notEqual); |
13416 | else |
13417 | GLSL_BOP(!=); |
13418 | break; |
13419 | } |
13420 | |
13421 | case OpUGreaterThan: |
13422 | case OpSGreaterThan: |
13423 | { |
13424 | auto type = opcode == OpUGreaterThan ? uint_type : int_type; |
13425 | if (expression_type(id: ops[2]).vecsize > 1) |
13426 | GLSL_BFOP_CAST(greaterThan, type); |
13427 | else |
13428 | GLSL_BOP_CAST(>, type); |
13429 | break; |
13430 | } |
13431 | |
13432 | case OpFOrdGreaterThan: |
13433 | { |
13434 | if (expression_type(id: ops[2]).vecsize > 1) |
13435 | GLSL_BFOP(greaterThan); |
13436 | else |
13437 | GLSL_BOP(>); |
13438 | break; |
13439 | } |
13440 | |
13441 | case OpUGreaterThanEqual: |
13442 | case OpSGreaterThanEqual: |
13443 | { |
13444 | auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; |
13445 | if (expression_type(id: ops[2]).vecsize > 1) |
13446 | GLSL_BFOP_CAST(greaterThanEqual, type); |
13447 | else |
13448 | GLSL_BOP_CAST(>=, type); |
13449 | break; |
13450 | } |
13451 | |
13452 | case OpFOrdGreaterThanEqual: |
13453 | { |
13454 | if (expression_type(id: ops[2]).vecsize > 1) |
13455 | GLSL_BFOP(greaterThanEqual); |
13456 | else |
13457 | GLSL_BOP(>=); |
13458 | break; |
13459 | } |
13460 | |
13461 | case OpULessThan: |
13462 | case OpSLessThan: |
13463 | { |
13464 | auto type = opcode == OpULessThan ? uint_type : int_type; |
13465 | if (expression_type(id: ops[2]).vecsize > 1) |
13466 | GLSL_BFOP_CAST(lessThan, type); |
13467 | else |
13468 | GLSL_BOP_CAST(<, type); |
13469 | break; |
13470 | } |
13471 | |
13472 | case OpFOrdLessThan: |
13473 | { |
13474 | if (expression_type(id: ops[2]).vecsize > 1) |
13475 | GLSL_BFOP(lessThan); |
13476 | else |
13477 | GLSL_BOP(<); |
13478 | break; |
13479 | } |
13480 | |
13481 | case OpULessThanEqual: |
13482 | case OpSLessThanEqual: |
13483 | { |
13484 | auto type = opcode == OpULessThanEqual ? uint_type : int_type; |
13485 | if (expression_type(id: ops[2]).vecsize > 1) |
13486 | GLSL_BFOP_CAST(lessThanEqual, type); |
13487 | else |
13488 | GLSL_BOP_CAST(<=, type); |
13489 | break; |
13490 | } |
13491 | |
13492 | case OpFOrdLessThanEqual: |
13493 | { |
13494 | if (expression_type(id: ops[2]).vecsize > 1) |
13495 | GLSL_BFOP(lessThanEqual); |
13496 | else |
13497 | GLSL_BOP(<=); |
13498 | break; |
13499 | } |
13500 | |
13501 | // Conversion |
13502 | case OpSConvert: |
13503 | case OpConvertSToF: |
13504 | case OpUConvert: |
13505 | case OpConvertUToF: |
13506 | { |
13507 | auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; |
13508 | uint32_t result_type = ops[0]; |
13509 | uint32_t id = ops[1]; |
13510 | |
13511 | auto &type = get<SPIRType>(id: result_type); |
13512 | auto &arg_type = expression_type(id: ops[2]); |
13513 | auto func = type_to_glsl_constructor(type); |
13514 | |
13515 | if (arg_type.width < type.width || type_is_floating_point(type)) |
13516 | emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type, expected_result_type: type.basetype); |
13517 | else |
13518 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str()); |
13519 | break; |
13520 | } |
13521 | |
13522 | case OpConvertFToU: |
13523 | case OpConvertFToS: |
13524 | { |
13525 | // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. |
13526 | uint32_t result_type = ops[0]; |
13527 | uint32_t id = ops[1]; |
13528 | auto &type = get<SPIRType>(id: result_type); |
13529 | auto expected_type = type; |
13530 | auto &float_type = expression_type(id: ops[2]); |
13531 | expected_type.basetype = |
13532 | opcode == OpConvertFToS ? to_signed_basetype(width: type.width) : to_unsigned_basetype(width: type.width); |
13533 | |
13534 | auto func = type_to_glsl_constructor(type: expected_type); |
13535 | emit_unary_func_op_cast(result_type, result_id: id, op0: ops[2], op: func.c_str(), input_type: float_type.basetype, expected_result_type: expected_type.basetype); |
13536 | break; |
13537 | } |
13538 | |
13539 | case OpFConvert: |
13540 | { |
13541 | uint32_t result_type = ops[0]; |
13542 | uint32_t id = ops[1]; |
13543 | |
13544 | auto func = type_to_glsl_constructor(type: get<SPIRType>(id: result_type)); |
13545 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: func.c_str()); |
13546 | break; |
13547 | } |
13548 | |
13549 | case OpBitcast: |
13550 | { |
13551 | uint32_t result_type = ops[0]; |
13552 | uint32_t id = ops[1]; |
13553 | uint32_t arg = ops[2]; |
13554 | |
13555 | if (!emit_complex_bitcast(result_type, id, op0: arg)) |
13556 | { |
13557 | auto op = bitcast_glsl_op(out_type: get<SPIRType>(id: result_type), in_type: expression_type(id: arg)); |
13558 | emit_unary_func_op(result_type, result_id: id, op0: arg, op: op.c_str()); |
13559 | } |
13560 | break; |
13561 | } |
13562 | |
13563 | case OpQuantizeToF16: |
13564 | { |
13565 | uint32_t result_type = ops[0]; |
13566 | uint32_t id = ops[1]; |
13567 | uint32_t arg = ops[2]; |
13568 | |
13569 | string op; |
13570 | auto &type = get<SPIRType>(id: result_type); |
13571 | |
13572 | switch (type.vecsize) |
13573 | { |
13574 | case 1: |
13575 | op = join(ts: "unpackHalf2x16(packHalf2x16(vec2(", ts: to_expression(id: arg), ts: "))).x"); |
13576 | break; |
13577 | case 2: |
13578 | op = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: "))"); |
13579 | break; |
13580 | case 3: |
13581 | { |
13582 | auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))"); |
13583 | auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zz)).x"); |
13584 | op = join(ts: "vec3(", ts&: op0, ts: ", ", ts&: op1, ts: ")"); |
13585 | break; |
13586 | } |
13587 | case 4: |
13588 | { |
13589 | auto op0 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".xy))"); |
13590 | auto op1 = join(ts: "unpackHalf2x16(packHalf2x16(", ts: to_expression(id: arg), ts: ".zw))"); |
13591 | op = join(ts: "vec4(", ts&: op0, ts: ", ", ts&: op1, ts: ")"); |
13592 | break; |
13593 | } |
13594 | default: |
13595 | SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); |
13596 | } |
13597 | |
13598 | emit_op(result_type, result_id: id, rhs: op, forwarding: should_forward(id: arg)); |
13599 | inherit_expression_dependencies(dst: id, source: arg); |
13600 | break; |
13601 | } |
13602 | |
13603 | // Derivatives |
13604 | case OpDPdx: |
13605 | GLSL_UFOP(dFdx); |
13606 | if (is_legacy_es()) |
13607 | require_extension_internal(ext: "GL_OES_standard_derivatives"); |
13608 | register_control_dependent_expression(expr: ops[1]); |
13609 | break; |
13610 | |
13611 | case OpDPdy: |
13612 | GLSL_UFOP(dFdy); |
13613 | if (is_legacy_es()) |
13614 | require_extension_internal(ext: "GL_OES_standard_derivatives"); |
13615 | register_control_dependent_expression(expr: ops[1]); |
13616 | break; |
13617 | |
13618 | case OpDPdxFine: |
13619 | GLSL_UFOP(dFdxFine); |
13620 | if (options.es) |
13621 | { |
13622 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13623 | } |
13624 | if (options.version < 450) |
13625 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13626 | register_control_dependent_expression(expr: ops[1]); |
13627 | break; |
13628 | |
13629 | case OpDPdyFine: |
13630 | GLSL_UFOP(dFdyFine); |
13631 | if (options.es) |
13632 | { |
13633 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13634 | } |
13635 | if (options.version < 450) |
13636 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13637 | register_control_dependent_expression(expr: ops[1]); |
13638 | break; |
13639 | |
13640 | case OpDPdxCoarse: |
13641 | if (options.es) |
13642 | { |
13643 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13644 | } |
13645 | GLSL_UFOP(dFdxCoarse); |
13646 | if (options.version < 450) |
13647 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13648 | register_control_dependent_expression(expr: ops[1]); |
13649 | break; |
13650 | |
13651 | case OpDPdyCoarse: |
13652 | GLSL_UFOP(dFdyCoarse); |
13653 | if (options.es) |
13654 | { |
13655 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13656 | } |
13657 | if (options.version < 450) |
13658 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13659 | register_control_dependent_expression(expr: ops[1]); |
13660 | break; |
13661 | |
13662 | case OpFwidth: |
13663 | GLSL_UFOP(fwidth); |
13664 | if (is_legacy_es()) |
13665 | require_extension_internal(ext: "GL_OES_standard_derivatives"); |
13666 | register_control_dependent_expression(expr: ops[1]); |
13667 | break; |
13668 | |
13669 | case OpFwidthCoarse: |
13670 | GLSL_UFOP(fwidthCoarse); |
13671 | if (options.es) |
13672 | { |
13673 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13674 | } |
13675 | if (options.version < 450) |
13676 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13677 | register_control_dependent_expression(expr: ops[1]); |
13678 | break; |
13679 | |
13680 | case OpFwidthFine: |
13681 | GLSL_UFOP(fwidthFine); |
13682 | if (options.es) |
13683 | { |
13684 | SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); |
13685 | } |
13686 | if (options.version < 450) |
13687 | require_extension_internal(ext: "GL_ARB_derivative_control"); |
13688 | register_control_dependent_expression(expr: ops[1]); |
13689 | break; |
13690 | |
13691 | // Bitfield |
13692 | case OpBitFieldInsert: |
13693 | { |
13694 | emit_bitfield_insert_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op3: ops[5], op: "bitfieldInsert", offset_count_type: SPIRType::Int); |
13695 | break; |
13696 | } |
13697 | |
13698 | case OpBitFieldSExtract: |
13699 | { |
13700 | emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: int_type, input_type0: int_type, |
13701 | input_type1: SPIRType::Int, input_type2: SPIRType::Int); |
13702 | break; |
13703 | } |
13704 | |
13705 | case OpBitFieldUExtract: |
13706 | { |
13707 | emit_trinary_func_op_bitextract(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[3], op2: ops[4], op: "bitfieldExtract", expected_result_type: uint_type, input_type0: uint_type, |
13708 | input_type1: SPIRType::Int, input_type2: SPIRType::Int); |
13709 | break; |
13710 | } |
13711 | |
13712 | case OpBitReverse: |
13713 | // BitReverse does not have issues with sign since result type must match input type. |
13714 | GLSL_UFOP(bitfieldReverse); |
13715 | break; |
13716 | |
13717 | case OpBitCount: |
13718 | { |
13719 | auto basetype = expression_type(id: ops[2]).basetype; |
13720 | emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "bitCount", input_type: basetype, expected_result_type: int_type); |
13721 | break; |
13722 | } |
13723 | |
13724 | // Atomics |
13725 | case OpAtomicExchange: |
13726 | { |
13727 | uint32_t result_type = ops[0]; |
13728 | uint32_t id = ops[1]; |
13729 | uint32_t ptr = ops[2]; |
13730 | // Ignore semantics for now, probably only relevant to CL. |
13731 | uint32_t val = ops[5]; |
13732 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange": "atomicExchange"; |
13733 | |
13734 | emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: val, op); |
13735 | break; |
13736 | } |
13737 | |
13738 | case OpAtomicCompareExchange: |
13739 | { |
13740 | uint32_t result_type = ops[0]; |
13741 | uint32_t id = ops[1]; |
13742 | uint32_t ptr = ops[2]; |
13743 | uint32_t val = ops[6]; |
13744 | uint32_t comp = ops[7]; |
13745 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicCompSwap": "atomicCompSwap"; |
13746 | |
13747 | emit_atomic_func_op(result_type, result_id: id, op0: ptr, op1: comp, op2: val, op); |
13748 | break; |
13749 | } |
13750 | |
13751 | case OpAtomicLoad: |
13752 | { |
13753 | // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. |
13754 | // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. |
13755 | auto &type = expression_type(id: ops[2]); |
13756 | forced_temporaries.insert(x: ops[1]); |
13757 | bool atomic_image = check_atomic_image(id: ops[2]); |
13758 | bool unsigned_type = (type.basetype == SPIRType::UInt) || |
13759 | (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt); |
13760 | const char *op = atomic_image ? "imageAtomicAdd": "atomicAdd"; |
13761 | const char *increment = unsigned_type ? "0u": "0"; |
13762 | emit_op(result_type: ops[0], result_id: ops[1], |
13763 | rhs: join(ts&: op, ts: "(", |
13764 | ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false); |
13765 | flush_all_atomic_capable_variables(); |
13766 | break; |
13767 | } |
13768 | |
13769 | case OpAtomicStore: |
13770 | { |
13771 | // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. |
13772 | // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. |
13773 | uint32_t ptr = ops[0]; |
13774 | // Ignore semantics for now, probably only relevant to CL. |
13775 | uint32_t val = ops[3]; |
13776 | const char *op = check_atomic_image(id: ptr) ? "imageAtomicExchange": "atomicExchange"; |
13777 | statement(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ptr), ts: ", ", ts: to_expression(id: val), ts: ");"); |
13778 | flush_all_atomic_capable_variables(); |
13779 | break; |
13780 | } |
13781 | |
13782 | case OpAtomicIIncrement: |
13783 | case OpAtomicIDecrement: |
13784 | { |
13785 | forced_temporaries.insert(x: ops[1]); |
13786 | auto &type = expression_type(id: ops[2]); |
13787 | if (type.storage == StorageClassAtomicCounter) |
13788 | { |
13789 | // Legacy GLSL stuff, not sure if this is relevant to support. |
13790 | if (opcode == OpAtomicIIncrement) |
13791 | GLSL_UFOP(atomicCounterIncrement); |
13792 | else |
13793 | GLSL_UFOP(atomicCounterDecrement); |
13794 | } |
13795 | else |
13796 | { |
13797 | bool atomic_image = check_atomic_image(id: ops[2]); |
13798 | bool unsigned_type = (type.basetype == SPIRType::UInt) || |
13799 | (atomic_image && get<SPIRType>(id: type.image.type).basetype == SPIRType::UInt); |
13800 | const char *op = atomic_image ? "imageAtomicAdd": "atomicAdd"; |
13801 | |
13802 | const char *increment = nullptr; |
13803 | if (opcode == OpAtomicIIncrement && unsigned_type) |
13804 | increment = "1u"; |
13805 | else if (opcode == OpAtomicIIncrement) |
13806 | increment = "1"; |
13807 | else if (unsigned_type) |
13808 | increment = "uint(-1)"; |
13809 | else |
13810 | increment = "-1"; |
13811 | |
13812 | emit_op(result_type: ops[0], result_id: ops[1], |
13813 | rhs: join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: increment, ts: ")"), forwarding: false); |
13814 | } |
13815 | |
13816 | flush_all_atomic_capable_variables(); |
13817 | break; |
13818 | } |
13819 | |
13820 | case OpAtomicIAdd: |
13821 | case OpAtomicFAddEXT: |
13822 | { |
13823 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd": "atomicAdd"; |
13824 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13825 | break; |
13826 | } |
13827 | |
13828 | case OpAtomicISub: |
13829 | { |
13830 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAdd": "atomicAdd"; |
13831 | forced_temporaries.insert(x: ops[1]); |
13832 | auto expr = join(ts&: op, ts: "(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", -", ts: to_enclosed_expression(id: ops[5]), ts: ")"); |
13833 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: ops[2]) && should_forward(id: ops[5])); |
13834 | flush_all_atomic_capable_variables(); |
13835 | break; |
13836 | } |
13837 | |
13838 | case OpAtomicSMin: |
13839 | case OpAtomicUMin: |
13840 | { |
13841 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMin": "atomicMin"; |
13842 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13843 | break; |
13844 | } |
13845 | |
13846 | case OpAtomicSMax: |
13847 | case OpAtomicUMax: |
13848 | { |
13849 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicMax": "atomicMax"; |
13850 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13851 | break; |
13852 | } |
13853 | |
13854 | case OpAtomicAnd: |
13855 | { |
13856 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicAnd": "atomicAnd"; |
13857 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13858 | break; |
13859 | } |
13860 | |
13861 | case OpAtomicOr: |
13862 | { |
13863 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicOr": "atomicOr"; |
13864 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13865 | break; |
13866 | } |
13867 | |
13868 | case OpAtomicXor: |
13869 | { |
13870 | const char *op = check_atomic_image(id: ops[2]) ? "imageAtomicXor": "atomicXor"; |
13871 | emit_atomic_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op1: ops[5], op); |
13872 | break; |
13873 | } |
13874 | |
13875 | // Geometry shaders |
13876 | case OpEmitVertex: |
13877 | statement(ts: "EmitVertex();"); |
13878 | break; |
13879 | |
13880 | case OpEndPrimitive: |
13881 | statement(ts: "EndPrimitive();"); |
13882 | break; |
13883 | |
13884 | case OpEmitStreamVertex: |
13885 | { |
13886 | if (options.es) |
13887 | SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); |
13888 | else if (!options.es && options.version < 400) |
13889 | SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); |
13890 | |
13891 | auto stream_expr = to_expression(id: ops[0]); |
13892 | if (expression_type(id: ops[0]).basetype != SPIRType::Int) |
13893 | stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")"); |
13894 | statement(ts: "EmitStreamVertex(", ts&: stream_expr, ts: ");"); |
13895 | break; |
13896 | } |
13897 | |
13898 | case OpEndStreamPrimitive: |
13899 | { |
13900 | if (options.es) |
13901 | SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); |
13902 | else if (!options.es && options.version < 400) |
13903 | SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); |
13904 | |
13905 | auto stream_expr = to_expression(id: ops[0]); |
13906 | if (expression_type(id: ops[0]).basetype != SPIRType::Int) |
13907 | stream_expr = join(ts: "int(", ts&: stream_expr, ts: ")"); |
13908 | statement(ts: "EndStreamPrimitive(", ts&: stream_expr, ts: ");"); |
13909 | break; |
13910 | } |
13911 | |
13912 | // Textures |
13913 | case OpImageSampleExplicitLod: |
13914 | case OpImageSampleProjExplicitLod: |
13915 | case OpImageSampleDrefExplicitLod: |
13916 | case OpImageSampleProjDrefExplicitLod: |
13917 | case OpImageSampleImplicitLod: |
13918 | case OpImageSampleProjImplicitLod: |
13919 | case OpImageSampleDrefImplicitLod: |
13920 | case OpImageSampleProjDrefImplicitLod: |
13921 | case OpImageFetch: |
13922 | case OpImageGather: |
13923 | case OpImageDrefGather: |
13924 | // Gets a bit hairy, so move this to a separate instruction. |
13925 | emit_texture_op(i: instruction, sparse: false); |
13926 | break; |
13927 | |
13928 | case OpImageSparseSampleExplicitLod: |
13929 | case OpImageSparseSampleProjExplicitLod: |
13930 | case OpImageSparseSampleDrefExplicitLod: |
13931 | case OpImageSparseSampleProjDrefExplicitLod: |
13932 | case OpImageSparseSampleImplicitLod: |
13933 | case OpImageSparseSampleProjImplicitLod: |
13934 | case OpImageSparseSampleDrefImplicitLod: |
13935 | case OpImageSparseSampleProjDrefImplicitLod: |
13936 | case OpImageSparseFetch: |
13937 | case OpImageSparseGather: |
13938 | case OpImageSparseDrefGather: |
13939 | // Gets a bit hairy, so move this to a separate instruction. |
13940 | emit_texture_op(i: instruction, sparse: true); |
13941 | break; |
13942 | |
13943 | case OpImageSparseTexelsResident: |
13944 | if (options.es) |
13945 | SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL."); |
13946 | require_extension_internal(ext: "GL_ARB_sparse_texture2"); |
13947 | emit_unary_func_op_cast(result_type: ops[0], result_id: ops[1], op0: ops[2], op: "sparseTexelsResidentARB", input_type: int_type, expected_result_type: SPIRType::Boolean); |
13948 | break; |
13949 | |
13950 | case OpImage: |
13951 | { |
13952 | uint32_t result_type = ops[0]; |
13953 | uint32_t id = ops[1]; |
13954 | |
13955 | // Suppress usage tracking. |
13956 | auto &e = emit_op(result_type, result_id: id, rhs: to_expression(id: ops[2]), forwarding: true, suppress_usage_tracking: true); |
13957 | |
13958 | // When using the image, we need to know which variable it is actually loaded from. |
13959 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
13960 | e.loaded_from = var ? var->self : ID(0); |
13961 | break; |
13962 | } |
13963 | |
13964 | case OpImageQueryLod: |
13965 | { |
13966 | const char *op = nullptr; |
13967 | if (!options.es && options.version < 400) |
13968 | { |
13969 | require_extension_internal(ext: "GL_ARB_texture_query_lod"); |
13970 | // For some reason, the ARB spec is all-caps. |
13971 | op = "textureQueryLOD"; |
13972 | } |
13973 | else if (options.es) |
13974 | { |
13975 | if (options.version < 300) |
13976 | SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES"); |
13977 | require_extension_internal(ext: "GL_EXT_texture_query_lod"); |
13978 | op = "textureQueryLOD"; |
13979 | } |
13980 | else |
13981 | op = "textureQueryLod"; |
13982 | |
13983 | auto sampler_expr = to_expression(id: ops[2]); |
13984 | if (has_decoration(id: ops[2], decoration: DecorationNonUniform)) |
13985 | { |
13986 | if (maybe_get_backing_variable(chain: ops[2])) |
13987 | convert_non_uniform_expression(expr&: sampler_expr, ptr_id: ops[2]); |
13988 | else if (*backend.nonuniform_qualifier != '\0') |
13989 | sampler_expr = join(ts&: backend.nonuniform_qualifier, ts: "(", ts&: sampler_expr, ts: ")"); |
13990 | } |
13991 | |
13992 | bool forward = should_forward(id: ops[3]); |
13993 | emit_op(result_type: ops[0], result_id: ops[1], |
13994 | rhs: join(ts&: op, ts: "(", ts&: sampler_expr, ts: ", ", ts: to_unpacked_expression(id: ops[3]), ts: ")"), |
13995 | forwarding: forward); |
13996 | inherit_expression_dependencies(dst: ops[1], source: ops[2]); |
13997 | inherit_expression_dependencies(dst: ops[1], source: ops[3]); |
13998 | register_control_dependent_expression(expr: ops[1]); |
13999 | break; |
14000 | } |
14001 | |
14002 | case OpImageQueryLevels: |
14003 | { |
14004 | uint32_t result_type = ops[0]; |
14005 | uint32_t id = ops[1]; |
14006 | |
14007 | if (!options.es && options.version < 430) |
14008 | require_extension_internal(ext: "GL_ARB_texture_query_levels"); |
14009 | if (options.es) |
14010 | SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); |
14011 | |
14012 | auto expr = join(ts: "textureQueryLevels(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")"); |
14013 | auto &restype = get<SPIRType>(id: ops[0]); |
14014 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
14015 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
14016 | break; |
14017 | } |
14018 | |
14019 | case OpImageQuerySamples: |
14020 | { |
14021 | auto &type = expression_type(id: ops[2]); |
14022 | uint32_t result_type = ops[0]; |
14023 | uint32_t id = ops[1]; |
14024 | |
14025 | if (options.es) |
14026 | SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile."); |
14027 | else if (options.version < 450) |
14028 | require_extension_internal(ext: "GL_ARB_texture_query_samples"); |
14029 | |
14030 | string expr; |
14031 | if (type.image.sampled == 2) |
14032 | expr = join(ts: "imageSamples(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")"); |
14033 | else |
14034 | expr = join(ts: "textureSamples(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")"); |
14035 | |
14036 | auto &restype = get<SPIRType>(id: ops[0]); |
14037 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
14038 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
14039 | break; |
14040 | } |
14041 | |
14042 | case OpSampledImage: |
14043 | { |
14044 | uint32_t result_type = ops[0]; |
14045 | uint32_t id = ops[1]; |
14046 | emit_sampled_image_op(result_type, result_id: id, image_id: ops[2], samp_id: ops[3]); |
14047 | inherit_expression_dependencies(dst: id, source: ops[2]); |
14048 | inherit_expression_dependencies(dst: id, source: ops[3]); |
14049 | break; |
14050 | } |
14051 | |
14052 | case OpImageQuerySizeLod: |
14053 | { |
14054 | uint32_t result_type = ops[0]; |
14055 | uint32_t id = ops[1]; |
14056 | uint32_t img = ops[2]; |
14057 | auto &type = expression_type(id: img); |
14058 | auto &imgtype = get<SPIRType>(id: type.self); |
14059 | |
14060 | std::string fname = "textureSize"; |
14061 | if (is_legacy_desktop()) |
14062 | { |
14063 | fname = legacy_tex_op(op: fname, imgtype, tex: img); |
14064 | } |
14065 | else if (is_legacy_es()) |
14066 | SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); |
14067 | |
14068 | auto expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: img), ts: ", ", |
14069 | ts: bitcast_expression(target_type: SPIRType::Int, arg: ops[3]), ts: ")"); |
14070 | |
14071 | // ES needs to emulate 1D images as 2D. |
14072 | if (type.image.dim == Dim1D && options.es) |
14073 | expr = join(ts&: expr, ts: ".x"); |
14074 | |
14075 | auto &restype = get<SPIRType>(id: ops[0]); |
14076 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
14077 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
14078 | break; |
14079 | } |
14080 | |
14081 | // Image load/store |
14082 | case OpImageRead: |
14083 | case OpImageSparseRead: |
14084 | { |
14085 | // We added Nonreadable speculatively to the OpImage variable due to glslangValidator |
14086 | // not adding the proper qualifiers. |
14087 | // If it turns out we need to read the image after all, remove the qualifier and recompile. |
14088 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
14089 | if (var) |
14090 | { |
14091 | auto &flags = get_decoration_bitset(id: var->self); |
14092 | if (flags.get(bit: DecorationNonReadable)) |
14093 | { |
14094 | unset_decoration(id: var->self, decoration: DecorationNonReadable); |
14095 | force_recompile(); |
14096 | } |
14097 | } |
14098 | |
14099 | uint32_t result_type = ops[0]; |
14100 | uint32_t id = ops[1]; |
14101 | |
14102 | bool pure; |
14103 | string imgexpr; |
14104 | auto &type = expression_type(id: ops[2]); |
14105 | |
14106 | if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code |
14107 | { |
14108 | if (type.image.ms) |
14109 | SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); |
14110 | |
14111 | auto itr = |
14112 | find_if(first: begin(cont&: pls_inputs), last: end(cont&: pls_inputs), pred: [var](const PlsRemap &pls) { return pls.id == var->self; }); |
14113 | |
14114 | if (itr == end(cont&: pls_inputs)) |
14115 | { |
14116 | // For non-PLS inputs, we rely on subpass type remapping information to get it right |
14117 | // since ImageRead always returns 4-component vectors and the backing type is opaque. |
14118 | if (!var->remapped_components) |
14119 | SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); |
14120 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: var->remapped_components, expr: to_expression(id: ops[2])); |
14121 | } |
14122 | else |
14123 | { |
14124 | // PLS input could have different number of components than what the SPIR expects, swizzle to |
14125 | // the appropriate vector size. |
14126 | uint32_t components = pls_format_to_components(format: itr->format); |
14127 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: components, expr: to_expression(id: ops[2])); |
14128 | } |
14129 | pure = true; |
14130 | } |
14131 | else if (type.image.dim == DimSubpassData) |
14132 | { |
14133 | if (var && subpass_input_is_framebuffer_fetch(id: var->self)) |
14134 | { |
14135 | imgexpr = to_expression(id: var->self); |
14136 | } |
14137 | else if (options.vulkan_semantics) |
14138 | { |
14139 | // With Vulkan semantics, use the proper Vulkan GLSL construct. |
14140 | if (type.image.ms) |
14141 | { |
14142 | uint32_t operands = ops[4]; |
14143 | if (operands != ImageOperandsSampleMask || length != 6) |
14144 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
14145 | "operand mask was used."); |
14146 | |
14147 | uint32_t samples = ops[5]; |
14148 | imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts: to_expression(id: samples), ts: ")"); |
14149 | } |
14150 | else |
14151 | imgexpr = join(ts: "subpassLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")"); |
14152 | } |
14153 | else |
14154 | { |
14155 | if (type.image.ms) |
14156 | { |
14157 | uint32_t operands = ops[4]; |
14158 | if (operands != ImageOperandsSampleMask || length != 6) |
14159 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
14160 | "operand mask was used."); |
14161 | |
14162 | uint32_t samples = ops[5]; |
14163 | imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), ", |
14164 | ts: to_expression(id: samples), ts: ")"); |
14165 | } |
14166 | else |
14167 | { |
14168 | // Implement subpass loads via texture barrier style sampling. |
14169 | imgexpr = join(ts: "texelFetch(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ivec2(gl_FragCoord.xy), 0)"); |
14170 | } |
14171 | } |
14172 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr); |
14173 | pure = true; |
14174 | } |
14175 | else |
14176 | { |
14177 | bool sparse = opcode == OpImageSparseRead; |
14178 | uint32_t sparse_code_id = 0; |
14179 | uint32_t sparse_texel_id = 0; |
14180 | if (sparse) |
14181 | emit_sparse_feedback_temporaries(result_type_id: ops[0], id: ops[1], feedback_id&: sparse_code_id, texel_id&: sparse_texel_id); |
14182 | |
14183 | // imageLoad only accepts int coords, not uint. |
14184 | auto coord_expr = to_expression(id: ops[3]); |
14185 | auto target_coord_type = expression_type(id: ops[3]); |
14186 | target_coord_type.basetype = SPIRType::Int; |
14187 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr); |
14188 | |
14189 | // ES needs to emulate 1D images as 2D. |
14190 | if (type.image.dim == Dim1D && options.es) |
14191 | coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)"); |
14192 | |
14193 | // Plain image load/store. |
14194 | if (sparse) |
14195 | { |
14196 | if (type.image.ms) |
14197 | { |
14198 | uint32_t operands = ops[4]; |
14199 | if (operands != ImageOperandsSampleMask || length != 6) |
14200 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
14201 | "operand mask was used."); |
14202 | |
14203 | uint32_t samples = ops[5]; |
14204 | statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", |
14205 | ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");"); |
14206 | } |
14207 | else |
14208 | { |
14209 | statement(ts: to_expression(id: sparse_code_id), ts: " = sparseImageLoadARB(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", |
14210 | ts&: coord_expr, ts: ", ", ts: to_expression(id: sparse_texel_id), ts: ");"); |
14211 | } |
14212 | imgexpr = join(ts: type_to_glsl(type: get<SPIRType>(id: result_type)), ts: "(", ts: to_expression(id: sparse_code_id), ts: ", ", |
14213 | ts: to_expression(id: sparse_texel_id), ts: ")"); |
14214 | } |
14215 | else |
14216 | { |
14217 | if (type.image.ms) |
14218 | { |
14219 | uint32_t operands = ops[4]; |
14220 | if (operands != ImageOperandsSampleMask || length != 6) |
14221 | SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " |
14222 | "operand mask was used."); |
14223 | |
14224 | uint32_t samples = ops[5]; |
14225 | imgexpr = |
14226 | join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ")"); |
14227 | } |
14228 | else |
14229 | imgexpr = join(ts: "imageLoad(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ", ", ts&: coord_expr, ts: ")"); |
14230 | } |
14231 | |
14232 | if (!sparse) |
14233 | imgexpr = remap_swizzle(out_type: get<SPIRType>(id: result_type), input_components: 4, expr: imgexpr); |
14234 | pure = false; |
14235 | } |
14236 | |
14237 | if (var) |
14238 | { |
14239 | bool forward = forced_temporaries.find(x: id) == end(cont&: forced_temporaries); |
14240 | auto &e = emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: forward); |
14241 | |
14242 | // We only need to track dependencies if we're reading from image load/store. |
14243 | if (!pure) |
14244 | { |
14245 | e.loaded_from = var->self; |
14246 | if (forward) |
14247 | var->dependees.push_back(t: id); |
14248 | } |
14249 | } |
14250 | else |
14251 | emit_op(result_type, result_id: id, rhs: imgexpr, forwarding: false); |
14252 | |
14253 | inherit_expression_dependencies(dst: id, source: ops[2]); |
14254 | if (type.image.ms) |
14255 | inherit_expression_dependencies(dst: id, source: ops[5]); |
14256 | break; |
14257 | } |
14258 | |
14259 | case OpImageTexelPointer: |
14260 | { |
14261 | uint32_t result_type = ops[0]; |
14262 | uint32_t id = ops[1]; |
14263 | |
14264 | auto coord_expr = to_expression(id: ops[3]); |
14265 | auto target_coord_type = expression_type(id: ops[3]); |
14266 | target_coord_type.basetype = SPIRType::Int; |
14267 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[3]).basetype, expr: coord_expr); |
14268 | |
14269 | auto expr = join(ts: to_expression(id: ops[2]), ts: ", ", ts&: coord_expr); |
14270 | auto &e = set<SPIRExpression>(id, args&: expr, args&: result_type, args: true); |
14271 | |
14272 | // When using the pointer, we need to know which variable it is actually loaded from. |
14273 | auto *var = maybe_get_backing_variable(chain: ops[2]); |
14274 | e.loaded_from = var ? var->self : ID(0); |
14275 | inherit_expression_dependencies(dst: id, source: ops[3]); |
14276 | break; |
14277 | } |
14278 | |
14279 | case OpImageWrite: |
14280 | { |
14281 | // We added Nonwritable speculatively to the OpImage variable due to glslangValidator |
14282 | // not adding the proper qualifiers. |
14283 | // If it turns out we need to write to the image after all, remove the qualifier and recompile. |
14284 | auto *var = maybe_get_backing_variable(chain: ops[0]); |
14285 | if (var) |
14286 | { |
14287 | if (has_decoration(id: var->self, decoration: DecorationNonWritable)) |
14288 | { |
14289 | unset_decoration(id: var->self, decoration: DecorationNonWritable); |
14290 | force_recompile(); |
14291 | } |
14292 | } |
14293 | |
14294 | auto &type = expression_type(id: ops[0]); |
14295 | auto &value_type = expression_type(id: ops[2]); |
14296 | auto store_type = value_type; |
14297 | store_type.vecsize = 4; |
14298 | |
14299 | // imageStore only accepts int coords, not uint. |
14300 | auto coord_expr = to_expression(id: ops[1]); |
14301 | auto target_coord_type = expression_type(id: ops[1]); |
14302 | target_coord_type.basetype = SPIRType::Int; |
14303 | coord_expr = bitcast_expression(target_type: target_coord_type, expr_type: expression_type(id: ops[1]).basetype, expr: coord_expr); |
14304 | |
14305 | // ES needs to emulate 1D images as 2D. |
14306 | if (type.image.dim == Dim1D && options.es) |
14307 | coord_expr = join(ts: "ivec2(", ts&: coord_expr, ts: ", 0)"); |
14308 | |
14309 | if (type.image.ms) |
14310 | { |
14311 | uint32_t operands = ops[3]; |
14312 | if (operands != ImageOperandsSampleMask || length != 5) |
14313 | SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); |
14314 | uint32_t samples = ops[4]; |
14315 | statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ", ts: to_expression(id: samples), ts: ", ", |
14316 | ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");"); |
14317 | } |
14318 | else |
14319 | statement(ts: "imageStore(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts&: coord_expr, ts: ", ", |
14320 | ts: remap_swizzle(out_type: store_type, input_components: value_type.vecsize, expr: to_expression(id: ops[2])), ts: ");"); |
14321 | |
14322 | if (var && variable_storage_is_aliased(var: *var)) |
14323 | flush_all_aliased_variables(); |
14324 | break; |
14325 | } |
14326 | |
14327 | case OpImageQuerySize: |
14328 | { |
14329 | auto &type = expression_type(id: ops[2]); |
14330 | uint32_t result_type = ops[0]; |
14331 | uint32_t id = ops[1]; |
14332 | |
14333 | if (type.basetype == SPIRType::Image) |
14334 | { |
14335 | string expr; |
14336 | if (type.image.sampled == 2) |
14337 | { |
14338 | if (!options.es && options.version < 430) |
14339 | require_extension_internal(ext: "GL_ARB_shader_image_size"); |
14340 | else if (options.es && options.version < 310) |
14341 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); |
14342 | |
14343 | // The size of an image is always constant. |
14344 | expr = join(ts: "imageSize(", ts: to_non_uniform_aware_expression(id: ops[2]), ts: ")"); |
14345 | } |
14346 | else |
14347 | { |
14348 | // This path is hit for samplerBuffers and multisampled images which do not have LOD. |
14349 | std::string fname = "textureSize"; |
14350 | if (is_legacy()) |
14351 | { |
14352 | auto &imgtype = get<SPIRType>(id: type.self); |
14353 | fname = legacy_tex_op(op: fname, imgtype, tex: ops[2]); |
14354 | } |
14355 | expr = join(ts&: fname, ts: "(", ts: convert_separate_image_to_expression(id: ops[2]), ts: ")"); |
14356 | } |
14357 | |
14358 | auto &restype = get<SPIRType>(id: ops[0]); |
14359 | expr = bitcast_expression(target_type: restype, expr_type: SPIRType::Int, expr); |
14360 | emit_op(result_type, result_id: id, rhs: expr, forwarding: true); |
14361 | } |
14362 | else |
14363 | SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); |
14364 | break; |
14365 | } |
14366 | |
14367 | case OpImageSampleWeightedQCOM: |
14368 | case OpImageBoxFilterQCOM: |
14369 | case OpImageBlockMatchSSDQCOM: |
14370 | case OpImageBlockMatchSADQCOM: |
14371 | { |
14372 | require_extension_internal(ext: "GL_QCOM_image_processing"); |
14373 | uint32_t result_type_id = ops[0]; |
14374 | uint32_t id = ops[1]; |
14375 | string expr; |
14376 | switch (opcode) |
14377 | { |
14378 | case OpImageSampleWeightedQCOM: |
14379 | expr = "textureWeightedQCOM"; |
14380 | break; |
14381 | case OpImageBoxFilterQCOM: |
14382 | expr = "textureBoxFilterQCOM"; |
14383 | break; |
14384 | case OpImageBlockMatchSSDQCOM: |
14385 | expr = "textureBlockMatchSSDQCOM"; |
14386 | break; |
14387 | case OpImageBlockMatchSADQCOM: |
14388 | expr = "textureBlockMatchSADQCOM"; |
14389 | break; |
14390 | default: |
14391 | SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing."); |
14392 | } |
14393 | expr += "("; |
14394 | |
14395 | bool forward = false; |
14396 | expr += to_expression(id: ops[2]); |
14397 | expr += ", "+ to_expression(id: ops[3]); |
14398 | |
14399 | switch (opcode) |
14400 | { |
14401 | case OpImageSampleWeightedQCOM: |
14402 | expr += ", "+ to_non_uniform_aware_expression(id: ops[4]); |
14403 | break; |
14404 | case OpImageBoxFilterQCOM: |
14405 | expr += ", "+ to_expression(id: ops[4]); |
14406 | break; |
14407 | case OpImageBlockMatchSSDQCOM: |
14408 | case OpImageBlockMatchSADQCOM: |
14409 | expr += ", "+ to_non_uniform_aware_expression(id: ops[4]); |
14410 | expr += ", "+ to_expression(id: ops[5]); |
14411 | expr += ", "+ to_expression(id: ops[6]); |
14412 | break; |
14413 | default: |
14414 | SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing."); |
14415 | } |
14416 | |
14417 | expr += ")"; |
14418 | emit_op(result_type: result_type_id, result_id: id, rhs: expr, forwarding: forward); |
14419 | |
14420 | inherit_expression_dependencies(dst: id, source: ops[3]); |
14421 | if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM) |
14422 | inherit_expression_dependencies(dst: id, source: ops[5]); |
14423 | |
14424 | break; |
14425 | } |
14426 | |
14427 | // Compute |
14428 | case OpControlBarrier: |
14429 | case OpMemoryBarrier: |
14430 | { |
14431 | uint32_t execution_scope = 0; |
14432 | uint32_t memory; |
14433 | uint32_t semantics; |
14434 | |
14435 | if (opcode == OpMemoryBarrier) |
14436 | { |
14437 | memory = evaluate_constant_u32(id: ops[0]); |
14438 | semantics = evaluate_constant_u32(id: ops[1]); |
14439 | } |
14440 | else |
14441 | { |
14442 | execution_scope = evaluate_constant_u32(id: ops[0]); |
14443 | memory = evaluate_constant_u32(id: ops[1]); |
14444 | semantics = evaluate_constant_u32(id: ops[2]); |
14445 | } |
14446 | |
14447 | if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) |
14448 | { |
14449 | // OpControlBarrier with ScopeSubgroup is subgroupBarrier() |
14450 | if (opcode != OpControlBarrier) |
14451 | { |
14452 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupMemBarrier); |
14453 | } |
14454 | else |
14455 | { |
14456 | request_subgroup_feature(feature: ShaderSubgroupSupportHelper::SubgroupBarrier); |
14457 | } |
14458 | } |
14459 | |
14460 | if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) |
14461 | { |
14462 | // Control shaders only have barriers, and it implies memory barriers. |
14463 | if (opcode == OpControlBarrier) |
14464 | statement(ts: "barrier();"); |
14465 | break; |
14466 | } |
14467 | |
14468 | // We only care about these flags, acquire/release and friends are not relevant to GLSL. |
14469 | semantics = mask_relevant_memory_semantics(semantics); |
14470 | |
14471 | if (opcode == OpMemoryBarrier) |
14472 | { |
14473 | // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier |
14474 | // does what we need, so we avoid redundant barriers. |
14475 | const Instruction *next = get_next_instruction_in_block(instr: instruction); |
14476 | if (next && next->op == OpControlBarrier) |
14477 | { |
14478 | auto *next_ops = stream(instr: *next); |
14479 | uint32_t next_memory = evaluate_constant_u32(id: next_ops[1]); |
14480 | uint32_t next_semantics = evaluate_constant_u32(id: next_ops[2]); |
14481 | next_semantics = mask_relevant_memory_semantics(semantics: next_semantics); |
14482 | |
14483 | bool memory_scope_covered = false; |
14484 | if (next_memory == memory) |
14485 | memory_scope_covered = true; |
14486 | else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) |
14487 | { |
14488 | // If we only care about workgroup memory, either Device or Workgroup scope is fine, |
14489 | // scope does not have to match. |
14490 | if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && |
14491 | (memory == ScopeDevice || memory == ScopeWorkgroup)) |
14492 | { |
14493 | memory_scope_covered = true; |
14494 | } |
14495 | } |
14496 | else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) |
14497 | { |
14498 | // The control barrier has device scope, but the memory barrier just has workgroup scope. |
14499 | memory_scope_covered = true; |
14500 | } |
14501 | |
14502 | // If we have the same memory scope, and all memory types are covered, we're good. |
14503 | if (memory_scope_covered && (semantics & next_semantics) == semantics) |
14504 | break; |
14505 | } |
14506 | } |
14507 | |
14508 | // We are synchronizing some memory or syncing execution, |
14509 | // so we cannot forward any loads beyond the memory barrier. |
14510 | if (semantics || opcode == OpControlBarrier) |
14511 | { |
14512 | assert(current_emitting_block); |
14513 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14514 | flush_all_active_variables(); |
14515 | } |
14516 | |
14517 | if (memory == ScopeWorkgroup) // Only need to consider memory within a group |
14518 | { |
14519 | if (semantics == MemorySemanticsWorkgroupMemoryMask) |
14520 | { |
14521 | // OpControlBarrier implies a memory barrier for shared memory as well. |
14522 | bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; |
14523 | if (!implies_shared_barrier) |
14524 | statement(ts: "memoryBarrierShared();"); |
14525 | } |
14526 | else if (semantics != 0) |
14527 | statement(ts: "groupMemoryBarrier();"); |
14528 | } |
14529 | else if (memory == ScopeSubgroup) |
14530 | { |
14531 | const uint32_t all_barriers = |
14532 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; |
14533 | |
14534 | if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) |
14535 | { |
14536 | // These are not relevant for GLSL, but assume it means memoryBarrier(). |
14537 | // memoryBarrier() does everything, so no need to test anything else. |
14538 | statement(ts: "subgroupMemoryBarrier();"); |
14539 | } |
14540 | else if ((semantics & all_barriers) == all_barriers) |
14541 | { |
14542 | // Short-hand instead of emitting 3 barriers. |
14543 | statement(ts: "subgroupMemoryBarrier();"); |
14544 | } |
14545 | else |
14546 | { |
14547 | // Pick out individual barriers. |
14548 | if (semantics & MemorySemanticsWorkgroupMemoryMask) |
14549 | statement(ts: "subgroupMemoryBarrierShared();"); |
14550 | if (semantics & MemorySemanticsUniformMemoryMask) |
14551 | statement(ts: "subgroupMemoryBarrierBuffer();"); |
14552 | if (semantics & MemorySemanticsImageMemoryMask) |
14553 | statement(ts: "subgroupMemoryBarrierImage();"); |
14554 | } |
14555 | } |
14556 | else |
14557 | { |
14558 | const uint32_t all_barriers = |
14559 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; |
14560 | |
14561 | if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) |
14562 | { |
14563 | // These are not relevant for GLSL, but assume it means memoryBarrier(). |
14564 | // memoryBarrier() does everything, so no need to test anything else. |
14565 | statement(ts: "memoryBarrier();"); |
14566 | } |
14567 | else if ((semantics & all_barriers) == all_barriers) |
14568 | { |
14569 | // Short-hand instead of emitting 4 barriers. |
14570 | statement(ts: "memoryBarrier();"); |
14571 | } |
14572 | else |
14573 | { |
14574 | // Pick out individual barriers. |
14575 | if (semantics & MemorySemanticsWorkgroupMemoryMask) |
14576 | statement(ts: "memoryBarrierShared();"); |
14577 | if (semantics & MemorySemanticsUniformMemoryMask) |
14578 | statement(ts: "memoryBarrierBuffer();"); |
14579 | if (semantics & MemorySemanticsImageMemoryMask) |
14580 | statement(ts: "memoryBarrierImage();"); |
14581 | } |
14582 | } |
14583 | |
14584 | if (opcode == OpControlBarrier) |
14585 | { |
14586 | if (execution_scope == ScopeSubgroup) |
14587 | statement(ts: "subgroupBarrier();"); |
14588 | else |
14589 | statement(ts: "barrier();"); |
14590 | } |
14591 | break; |
14592 | } |
14593 | |
14594 | case OpExtInst: |
14595 | { |
14596 | uint32_t extension_set = ops[2]; |
14597 | auto ext = get<SPIRExtension>(id: extension_set).ext; |
14598 | |
14599 | if (ext == SPIRExtension::GLSL) |
14600 | { |
14601 | emit_glsl_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length: length - 4); |
14602 | } |
14603 | else if (ext == SPIRExtension::SPV_AMD_shader_ballot) |
14604 | { |
14605 | emit_spv_amd_shader_ballot_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
14606 | } |
14607 | else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) |
14608 | { |
14609 | emit_spv_amd_shader_explicit_vertex_parameter_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
14610 | } |
14611 | else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) |
14612 | { |
14613 | emit_spv_amd_shader_trinary_minmax_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
14614 | } |
14615 | else if (ext == SPIRExtension::SPV_AMD_gcn_shader) |
14616 | { |
14617 | emit_spv_amd_gcn_shader_op(result_type: ops[0], id: ops[1], eop: ops[3], args: &ops[4], length - 4); |
14618 | } |
14619 | else if (ext == SPIRExtension::SPV_debug_info || |
14620 | ext == SPIRExtension::NonSemanticShaderDebugInfo || |
14621 | ext == SPIRExtension::NonSemanticGeneric) |
14622 | { |
14623 | break; // Ignore SPIR-V debug information extended instructions. |
14624 | } |
14625 | else if (ext == SPIRExtension::NonSemanticDebugPrintf) |
14626 | { |
14627 | // Operation 1 is printf. |
14628 | if (ops[3] == 1) |
14629 | { |
14630 | if (!options.vulkan_semantics) |
14631 | SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); |
14632 | require_extension_internal(ext: "GL_EXT_debug_printf"); |
14633 | auto &format_string = get<SPIRString>(id: ops[4]).str; |
14634 | string expr = join(ts: "debugPrintfEXT(\"", ts&: format_string, ts: "\""); |
14635 | for (uint32_t i = 5; i < length; i++) |
14636 | { |
14637 | expr += ", "; |
14638 | expr += to_expression(id: ops[i]); |
14639 | } |
14640 | statement(ts&: expr, ts: ");"); |
14641 | } |
14642 | } |
14643 | else |
14644 | { |
14645 | statement(ts: "// unimplemented ext op ", ts: instruction.op); |
14646 | break; |
14647 | } |
14648 | |
14649 | break; |
14650 | } |
14651 | |
14652 | // Legacy sub-group stuff ... |
14653 | case OpSubgroupBallotKHR: |
14654 | { |
14655 | uint32_t result_type = ops[0]; |
14656 | uint32_t id = ops[1]; |
14657 | string expr; |
14658 | expr = join(ts: "uvec4(unpackUint2x32(ballotARB("+ to_expression(id: ops[2]) + ")), 0u, 0u)"); |
14659 | emit_op(result_type, result_id: id, rhs: expr, forwarding: should_forward(id: ops[2])); |
14660 | |
14661 | require_extension_internal(ext: "GL_ARB_shader_ballot"); |
14662 | inherit_expression_dependencies(dst: id, source: ops[2]); |
14663 | register_control_dependent_expression(expr: ops[1]); |
14664 | break; |
14665 | } |
14666 | |
14667 | case OpSubgroupFirstInvocationKHR: |
14668 | { |
14669 | uint32_t result_type = ops[0]; |
14670 | uint32_t id = ops[1]; |
14671 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "readFirstInvocationARB"); |
14672 | |
14673 | require_extension_internal(ext: "GL_ARB_shader_ballot"); |
14674 | register_control_dependent_expression(expr: ops[1]); |
14675 | break; |
14676 | } |
14677 | |
14678 | case OpSubgroupReadInvocationKHR: |
14679 | { |
14680 | uint32_t result_type = ops[0]; |
14681 | uint32_t id = ops[1]; |
14682 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "readInvocationARB"); |
14683 | |
14684 | require_extension_internal(ext: "GL_ARB_shader_ballot"); |
14685 | register_control_dependent_expression(expr: ops[1]); |
14686 | break; |
14687 | } |
14688 | |
14689 | case OpSubgroupAllKHR: |
14690 | { |
14691 | uint32_t result_type = ops[0]; |
14692 | uint32_t id = ops[1]; |
14693 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsARB"); |
14694 | |
14695 | require_extension_internal(ext: "GL_ARB_shader_group_vote"); |
14696 | register_control_dependent_expression(expr: ops[1]); |
14697 | break; |
14698 | } |
14699 | |
14700 | case OpSubgroupAnyKHR: |
14701 | { |
14702 | uint32_t result_type = ops[0]; |
14703 | uint32_t id = ops[1]; |
14704 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "anyInvocationARB"); |
14705 | |
14706 | require_extension_internal(ext: "GL_ARB_shader_group_vote"); |
14707 | register_control_dependent_expression(expr: ops[1]); |
14708 | break; |
14709 | } |
14710 | |
14711 | case OpSubgroupAllEqualKHR: |
14712 | { |
14713 | uint32_t result_type = ops[0]; |
14714 | uint32_t id = ops[1]; |
14715 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "allInvocationsEqualARB"); |
14716 | |
14717 | require_extension_internal(ext: "GL_ARB_shader_group_vote"); |
14718 | register_control_dependent_expression(expr: ops[1]); |
14719 | break; |
14720 | } |
14721 | |
14722 | case OpGroupIAddNonUniformAMD: |
14723 | case OpGroupFAddNonUniformAMD: |
14724 | { |
14725 | uint32_t result_type = ops[0]; |
14726 | uint32_t id = ops[1]; |
14727 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "addInvocationsNonUniformAMD"); |
14728 | |
14729 | require_extension_internal(ext: "GL_AMD_shader_ballot"); |
14730 | register_control_dependent_expression(expr: ops[1]); |
14731 | break; |
14732 | } |
14733 | |
14734 | case OpGroupFMinNonUniformAMD: |
14735 | case OpGroupUMinNonUniformAMD: |
14736 | case OpGroupSMinNonUniformAMD: |
14737 | { |
14738 | uint32_t result_type = ops[0]; |
14739 | uint32_t id = ops[1]; |
14740 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "minInvocationsNonUniformAMD"); |
14741 | |
14742 | require_extension_internal(ext: "GL_AMD_shader_ballot"); |
14743 | register_control_dependent_expression(expr: ops[1]); |
14744 | break; |
14745 | } |
14746 | |
14747 | case OpGroupFMaxNonUniformAMD: |
14748 | case OpGroupUMaxNonUniformAMD: |
14749 | case OpGroupSMaxNonUniformAMD: |
14750 | { |
14751 | uint32_t result_type = ops[0]; |
14752 | uint32_t id = ops[1]; |
14753 | emit_unary_func_op(result_type, result_id: id, op0: ops[4], op: "maxInvocationsNonUniformAMD"); |
14754 | |
14755 | require_extension_internal(ext: "GL_AMD_shader_ballot"); |
14756 | register_control_dependent_expression(expr: ops[1]); |
14757 | break; |
14758 | } |
14759 | |
14760 | case OpFragmentMaskFetchAMD: |
14761 | { |
14762 | auto &type = expression_type(id: ops[2]); |
14763 | uint32_t result_type = ops[0]; |
14764 | uint32_t id = ops[1]; |
14765 | |
14766 | if (type.image.dim == spv::DimSubpassData) |
14767 | { |
14768 | emit_unary_func_op(result_type, result_id: id, op0: ops[2], op: "fragmentMaskFetchAMD"); |
14769 | } |
14770 | else |
14771 | { |
14772 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op: "fragmentMaskFetchAMD"); |
14773 | } |
14774 | |
14775 | require_extension_internal(ext: "GL_AMD_shader_fragment_mask"); |
14776 | break; |
14777 | } |
14778 | |
14779 | case OpFragmentFetchAMD: |
14780 | { |
14781 | auto &type = expression_type(id: ops[2]); |
14782 | uint32_t result_type = ops[0]; |
14783 | uint32_t id = ops[1]; |
14784 | |
14785 | if (type.image.dim == spv::DimSubpassData) |
14786 | { |
14787 | emit_binary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[4], op: "fragmentFetchAMD"); |
14788 | } |
14789 | else |
14790 | { |
14791 | emit_trinary_func_op(result_type, result_id: id, op0: ops[2], op1: ops[3], op2: ops[4], op: "fragmentFetchAMD"); |
14792 | } |
14793 | |
14794 | require_extension_internal(ext: "GL_AMD_shader_fragment_mask"); |
14795 | break; |
14796 | } |
14797 | |
14798 | // Vulkan 1.1 sub-group stuff ... |
14799 | case OpGroupNonUniformElect: |
14800 | case OpGroupNonUniformBroadcast: |
14801 | case OpGroupNonUniformBroadcastFirst: |
14802 | case OpGroupNonUniformBallot: |
14803 | case OpGroupNonUniformInverseBallot: |
14804 | case OpGroupNonUniformBallotBitExtract: |
14805 | case OpGroupNonUniformBallotBitCount: |
14806 | case OpGroupNonUniformBallotFindLSB: |
14807 | case OpGroupNonUniformBallotFindMSB: |
14808 | case OpGroupNonUniformShuffle: |
14809 | case OpGroupNonUniformShuffleXor: |
14810 | case OpGroupNonUniformShuffleUp: |
14811 | case OpGroupNonUniformShuffleDown: |
14812 | case OpGroupNonUniformAll: |
14813 | case OpGroupNonUniformAny: |
14814 | case OpGroupNonUniformAllEqual: |
14815 | case OpGroupNonUniformFAdd: |
14816 | case OpGroupNonUniformIAdd: |
14817 | case OpGroupNonUniformFMul: |
14818 | case OpGroupNonUniformIMul: |
14819 | case OpGroupNonUniformFMin: |
14820 | case OpGroupNonUniformFMax: |
14821 | case OpGroupNonUniformSMin: |
14822 | case OpGroupNonUniformSMax: |
14823 | case OpGroupNonUniformUMin: |
14824 | case OpGroupNonUniformUMax: |
14825 | case OpGroupNonUniformBitwiseAnd: |
14826 | case OpGroupNonUniformBitwiseOr: |
14827 | case OpGroupNonUniformBitwiseXor: |
14828 | case OpGroupNonUniformLogicalAnd: |
14829 | case OpGroupNonUniformLogicalOr: |
14830 | case OpGroupNonUniformLogicalXor: |
14831 | case OpGroupNonUniformQuadSwap: |
14832 | case OpGroupNonUniformQuadBroadcast: |
14833 | emit_subgroup_op(i: instruction); |
14834 | break; |
14835 | |
14836 | case OpFUnordEqual: |
14837 | case OpFUnordLessThan: |
14838 | case OpFUnordGreaterThan: |
14839 | case OpFUnordLessThanEqual: |
14840 | case OpFUnordGreaterThanEqual: |
14841 | { |
14842 | // GLSL doesn't specify if floating point comparisons are ordered or unordered, |
14843 | // but glslang always emits ordered floating point compares for GLSL. |
14844 | // To get unordered compares, we can test the opposite thing and invert the result. |
14845 | // This way, we force true when there is any NaN present. |
14846 | uint32_t op0 = ops[2]; |
14847 | uint32_t op1 = ops[3]; |
14848 | |
14849 | string expr; |
14850 | if (expression_type(id: op0).vecsize > 1) |
14851 | { |
14852 | const char *comp_op = nullptr; |
14853 | switch (opcode) |
14854 | { |
14855 | case OpFUnordEqual: |
14856 | comp_op = "notEqual"; |
14857 | break; |
14858 | |
14859 | case OpFUnordLessThan: |
14860 | comp_op = "greaterThanEqual"; |
14861 | break; |
14862 | |
14863 | case OpFUnordLessThanEqual: |
14864 | comp_op = "greaterThan"; |
14865 | break; |
14866 | |
14867 | case OpFUnordGreaterThan: |
14868 | comp_op = "lessThanEqual"; |
14869 | break; |
14870 | |
14871 | case OpFUnordGreaterThanEqual: |
14872 | comp_op = "lessThan"; |
14873 | break; |
14874 | |
14875 | default: |
14876 | assert(0); |
14877 | break; |
14878 | } |
14879 | |
14880 | expr = join(ts: "not(", ts&: comp_op, ts: "(", ts: to_unpacked_expression(id: op0), ts: ", ", ts: to_unpacked_expression(id: op1), ts: "))"); |
14881 | } |
14882 | else |
14883 | { |
14884 | const char *comp_op = nullptr; |
14885 | switch (opcode) |
14886 | { |
14887 | case OpFUnordEqual: |
14888 | comp_op = " != "; |
14889 | break; |
14890 | |
14891 | case OpFUnordLessThan: |
14892 | comp_op = " >= "; |
14893 | break; |
14894 | |
14895 | case OpFUnordLessThanEqual: |
14896 | comp_op = " > "; |
14897 | break; |
14898 | |
14899 | case OpFUnordGreaterThan: |
14900 | comp_op = " <= "; |
14901 | break; |
14902 | |
14903 | case OpFUnordGreaterThanEqual: |
14904 | comp_op = " < "; |
14905 | break; |
14906 | |
14907 | default: |
14908 | assert(0); |
14909 | break; |
14910 | } |
14911 | |
14912 | expr = join(ts: "!(", ts: to_enclosed_unpacked_expression(id: op0), ts&: comp_op, ts: to_enclosed_unpacked_expression(id: op1), ts: ")"); |
14913 | } |
14914 | |
14915 | emit_op(result_type: ops[0], result_id: ops[1], rhs: expr, forwarding: should_forward(id: op0) && should_forward(id: op1)); |
14916 | inherit_expression_dependencies(dst: ops[1], source: op0); |
14917 | inherit_expression_dependencies(dst: ops[1], source: op1); |
14918 | break; |
14919 | } |
14920 | |
14921 | case OpReportIntersectionKHR: |
14922 | // NV is same opcode. |
14923 | forced_temporaries.insert(x: ops[1]); |
14924 | if (ray_tracing_is_khr) |
14925 | GLSL_BFOP(reportIntersectionEXT); |
14926 | else |
14927 | GLSL_BFOP(reportIntersectionNV); |
14928 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14929 | break; |
14930 | case OpIgnoreIntersectionNV: |
14931 | // KHR variant is a terminator. |
14932 | statement(ts: "ignoreIntersectionNV();"); |
14933 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14934 | break; |
14935 | case OpTerminateRayNV: |
14936 | // KHR variant is a terminator. |
14937 | statement(ts: "terminateRayNV();"); |
14938 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14939 | break; |
14940 | case OpTraceNV: |
14941 | statement(ts: "traceNV(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ", |
14942 | ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ", |
14943 | ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ", |
14944 | ts: to_expression(id: ops[9]), ts: ", ", ts: to_expression(id: ops[10]), ts: ");"); |
14945 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14946 | break; |
14947 | case OpTraceRayKHR: |
14948 | if (!has_decoration(id: ops[10], decoration: DecorationLocation)) |
14949 | SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); |
14950 | statement(ts: "traceRayEXT(", ts: to_non_uniform_aware_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", ts: to_expression(id: ops[2]), ts: ", ", |
14951 | ts: to_expression(id: ops[3]), ts: ", ", ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ", |
14952 | ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ", ", ts: to_expression(id: ops[8]), ts: ", ", |
14953 | ts: to_expression(id: ops[9]), ts: ", ", ts: get_decoration(id: ops[10], decoration: DecorationLocation), ts: ");"); |
14954 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14955 | break; |
14956 | case OpExecuteCallableNV: |
14957 | statement(ts: "executeCallableNV(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");"); |
14958 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14959 | break; |
14960 | case OpExecuteCallableKHR: |
14961 | if (!has_decoration(id: ops[1], decoration: DecorationLocation)) |
14962 | SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR."); |
14963 | statement(ts: "executeCallableEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: get_decoration(id: ops[1], decoration: DecorationLocation), ts: ");"); |
14964 | flush_control_dependent_expressions(block: current_emitting_block->self); |
14965 | break; |
14966 | |
14967 | // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. |
14968 | case OpRayQueryInitializeKHR: |
14969 | flush_variable_declaration(id: ops[0]); |
14970 | statement(ts: "rayQueryInitializeEXT(", |
14971 | ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ", ", |
14972 | ts: to_expression(id: ops[2]), ts: ", ", ts: to_expression(id: ops[3]), ts: ", ", |
14973 | ts: to_expression(id: ops[4]), ts: ", ", ts: to_expression(id: ops[5]), ts: ", ", |
14974 | ts: to_expression(id: ops[6]), ts: ", ", ts: to_expression(id: ops[7]), ts: ");"); |
14975 | break; |
14976 | case OpRayQueryProceedKHR: |
14977 | flush_variable_declaration(id: ops[0]); |
14978 | emit_op(result_type: ops[0], result_id: ops[1], rhs: join(ts: "rayQueryProceedEXT(", ts: to_expression(id: ops[2]), ts: ")"), forwarding: false); |
14979 | break; |
14980 | case OpRayQueryTerminateKHR: |
14981 | flush_variable_declaration(id: ops[0]); |
14982 | statement(ts: "rayQueryTerminateEXT(", ts: to_expression(id: ops[0]), ts: ");"); |
14983 | break; |
14984 | case OpRayQueryGenerateIntersectionKHR: |
14985 | flush_variable_declaration(id: ops[0]); |
14986 | statement(ts: "rayQueryGenerateIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ", ", ts: to_expression(id: ops[1]), ts: ");"); |
14987 | break; |
14988 | case OpRayQueryConfirmIntersectionKHR: |
14989 | flush_variable_declaration(id: ops[0]); |
14990 | statement(ts: "rayQueryConfirmIntersectionEXT(", ts: to_expression(id: ops[0]), ts: ");"); |
14991 | break; |
14992 | #define GLSL_RAY_QUERY_GET_OP(op) \ |
14993 | case OpRayQueryGet##op##KHR: \ |
14994 | flush_variable_declaration(ops[2]); \ |
14995 | emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ |
14996 | break |
14997 | #define GLSL_RAY_QUERY_GET_OP2(op) \ |
14998 | case OpRayQueryGet##op##KHR: \ |
14999 | flush_variable_declaration(ops[2]); \ |
15000 | emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ |
15001 | break |
15002 | GLSL_RAY_QUERY_GET_OP(RayTMin); |
15003 | GLSL_RAY_QUERY_GET_OP(RayFlags); |
15004 | GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); |
15005 | GLSL_RAY_QUERY_GET_OP(WorldRayDirection); |
15006 | GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); |
15007 | GLSL_RAY_QUERY_GET_OP2(IntersectionType); |
15008 | GLSL_RAY_QUERY_GET_OP2(IntersectionT); |
15009 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); |
15010 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); |
15011 | GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); |
15012 | GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); |
15013 | GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); |
15014 | GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); |
15015 | GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); |
15016 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); |
15017 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); |
15018 | GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); |
15019 | GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); |
15020 | #undef GLSL_RAY_QUERY_GET_OP |
15021 | #undef GLSL_RAY_QUERY_GET_OP2 |
15022 | |
15023 | case OpConvertUToAccelerationStructureKHR: |
15024 | { |
15025 | require_extension_internal(ext: "GL_EXT_ray_tracing"); |
15026 | |
15027 | bool elide_temporary = should_forward(id: ops[2]) && forced_temporaries.count(x: ops[1]) == 0 && |
15028 | !hoisted_temporaries.count(x: ops[1]); |
15029 | |
15030 | if (elide_temporary) |
15031 | { |
15032 | GLSL_UFOP(accelerationStructureEXT); |
15033 | } |
15034 | else |
15035 | { |
15036 | // Force this path in subsequent iterations. |
15037 | forced_temporaries.insert(x: ops[1]); |
15038 | |
15039 | // We cannot declare a temporary acceleration structure in GLSL. |
15040 | // If we get to this point, we'll have to emit a temporary uvec2, |
15041 | // and cast to RTAS on demand. |
15042 | statement(ts: declare_temporary(result_type: expression_type_id(id: ops[2]), result_id: ops[1]), ts: to_unpacked_expression(id: ops[2]), ts: ";"); |
15043 | // Use raw SPIRExpression interface to block all usage tracking. |
15044 | set<SPIRExpression>(id: ops[1], args: join(ts: "accelerationStructureEXT(", ts: to_name(id: ops[1]), ts: ")"), args: ops[0], args: true); |
15045 | } |
15046 | break; |
15047 | } |
15048 | |
15049 | case OpConvertUToPtr: |
15050 | { |
15051 | auto &type = get<SPIRType>(id: ops[0]); |
15052 | if (type.storage != StorageClassPhysicalStorageBufferEXT) |
15053 | SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); |
15054 | |
15055 | auto &in_type = expression_type(id: ops[2]); |
15056 | if (in_type.vecsize == 2) |
15057 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2"); |
15058 | |
15059 | auto op = type_to_glsl(type); |
15060 | emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str()); |
15061 | break; |
15062 | } |
15063 | |
15064 | case OpConvertPtrToU: |
15065 | { |
15066 | auto &type = get<SPIRType>(id: ops[0]); |
15067 | auto &ptr_type = expression_type(id: ops[2]); |
15068 | if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) |
15069 | SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); |
15070 | |
15071 | if (type.vecsize == 2) |
15072 | require_extension_internal(ext: "GL_EXT_buffer_reference_uvec2"); |
15073 | |
15074 | auto op = type_to_glsl(type); |
15075 | emit_unary_func_op(result_type: ops[0], result_id: ops[1], op0: ops[2], op: op.c_str()); |
15076 | break; |
15077 | } |
15078 | |
15079 | case OpUndef: |
15080 | // Undefined value has been declared. |
15081 | break; |
15082 | |
15083 | case OpLine: |
15084 | { |
15085 | emit_line_directive(file_id: ops[0], line_literal: ops[1]); |
15086 | break; |
15087 | } |
15088 | |
15089 | case OpNoLine: |
15090 | break; |
15091 | |
15092 | case OpDemoteToHelperInvocationEXT: |
15093 | if (!options.vulkan_semantics) |
15094 | SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); |
15095 | require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation"); |
15096 | statement(ts&: backend.demote_literal, ts: ";"); |
15097 | break; |
15098 | |
15099 | case OpIsHelperInvocationEXT: |
15100 | if (!options.vulkan_semantics) |
15101 | SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); |
15102 | require_extension_internal(ext: "GL_EXT_demote_to_helper_invocation"); |
15103 | // Helper lane state with demote is volatile by nature. |
15104 | // Do not forward this. |
15105 | emit_op(result_type: ops[0], result_id: ops[1], rhs: "helperInvocationEXT()", forwarding: false); |
15106 | break; |
15107 | |
15108 | case OpBeginInvocationInterlockEXT: |
15109 | // If the interlock is complex, we emit this elsewhere. |
15110 | if (!interlocked_is_complex) |
15111 | { |
15112 | statement(ts: "SPIRV_Cross_beginInvocationInterlock();"); |
15113 | flush_all_active_variables(); |
15114 | // Make sure forwarding doesn't propagate outside interlock region. |
15115 | } |
15116 | break; |
15117 | |
15118 | case OpEndInvocationInterlockEXT: |
15119 | // If the interlock is complex, we emit this elsewhere. |
15120 | if (!interlocked_is_complex) |
15121 | { |
15122 | statement(ts: "SPIRV_Cross_endInvocationInterlock();"); |
15123 | flush_all_active_variables(); |
15124 | // Make sure forwarding doesn't propagate outside interlock region. |
15125 | } |
15126 | break; |
15127 | |
15128 | case OpSetMeshOutputsEXT: |
15129 | statement(ts: "SetMeshOutputsEXT(", ts: to_unpacked_expression(id: ops[0]), ts: ", ", ts: to_unpacked_expression(id: ops[1]), ts: ");"); |
15130 | break; |
15131 | |
15132 | case OpReadClockKHR: |
15133 | { |
15134 | auto &type = get<SPIRType>(id: ops[0]); |
15135 | auto scope = static_cast<Scope>(evaluate_constant_u32(id: ops[2])); |
15136 | const char *op = nullptr; |
15137 | // Forwarding clock statements leads to a scenario where an SSA value can take on different |
15138 | // values every time it's evaluated. Block any forwarding attempt. |
15139 | // We also might want to invalidate all expressions to function as a sort of optimization |
15140 | // barrier, but might be overkill for now. |
15141 | if (scope == ScopeDevice) |
15142 | { |
15143 | require_extension_internal(ext: "GL_EXT_shader_realtime_clock"); |
15144 | if (type.basetype == SPIRType::BaseType::UInt64) |
15145 | op = "clockRealtimeEXT()"; |
15146 | else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) |
15147 | op = "clockRealtime2x32EXT()"; |
15148 | else |
15149 | SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); |
15150 | } |
15151 | else if (scope == ScopeSubgroup) |
15152 | { |
15153 | require_extension_internal(ext: "GL_ARB_shader_clock"); |
15154 | if (type.basetype == SPIRType::BaseType::UInt64) |
15155 | op = "clockARB()"; |
15156 | else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) |
15157 | op = "clock2x32ARB()"; |
15158 | else |
15159 | SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); |
15160 | } |
15161 | else |
15162 | SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode."); |
15163 | |
15164 | emit_op(result_type: ops[0], result_id: ops[1], rhs: op, forwarding: false); |
15165 | break; |
15166 | } |
15167 | |
15168 | default: |
15169 | statement(ts: "// unimplemented op ", ts: instruction.op); |
15170 | break; |
15171 | } |
15172 | } |
15173 | |
15174 | // Appends function arguments, mapped from global variables, beyond the specified arg index. |
15175 | // This is used when a function call uses fewer arguments than the function defines. |
15176 | // This situation may occur if the function signature has been dynamically modified to |
15177 | // extract global variables referenced from within the function, and convert them to |
15178 | // function arguments. This is necessary for shader languages that do not support global |
15179 | // access to shader input content from within a function (eg. Metal). Each additional |
15180 | // function args uses the name of the global variable. Function nesting will modify the |
15181 | // functions and function calls all the way up the nesting chain. |
15182 | void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist) |
15183 | { |
15184 | auto &args = func.arguments; |
15185 | uint32_t arg_cnt = uint32_t(args.size()); |
15186 | for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) |
15187 | { |
15188 | auto &arg = args[arg_idx]; |
15189 | assert(arg.alias_global_variable); |
15190 | |
15191 | // If the underlying variable needs to be declared |
15192 | // (ie. a local variable with deferred declaration), do so now. |
15193 | uint32_t var_id = get<SPIRVariable>(id: arg.id).basevariable; |
15194 | if (var_id) |
15195 | flush_variable_declaration(id: var_id); |
15196 | |
15197 | arglist.push_back(t: to_func_call_arg(arg, id: arg.id)); |
15198 | } |
15199 | } |
15200 | |
15201 | string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) |
15202 | { |
15203 | if (type.type_alias != TypeID(0) && |
15204 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
15205 | { |
15206 | return to_member_name(type: get<SPIRType>(id: type.type_alias), index); |
15207 | } |
15208 | |
15209 | auto &memb = ir.meta[type.self].members; |
15210 | if (index < memb.size() && !memb[index].alias.empty()) |
15211 | return memb[index].alias; |
15212 | else |
15213 | return join(ts: "_m", ts&: index); |
15214 | } |
15215 | |
15216 | string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) |
15217 | { |
15218 | return join(ts: ".", ts: to_member_name(type, index)); |
15219 | } |
15220 | |
15221 | string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices) |
15222 | { |
15223 | string ret; |
15224 | auto *member_type = &type; |
15225 | for (auto &index : indices) |
15226 | { |
15227 | ret += join(ts: ".", ts: to_member_name(type: *member_type, index)); |
15228 | member_type = &get<SPIRType>(id: member_type->member_types[index]); |
15229 | } |
15230 | return ret; |
15231 | } |
15232 | |
15233 | void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) |
15234 | { |
15235 | auto &memb = ir.meta[type.self].members; |
15236 | if (index < memb.size() && !memb[index].alias.empty()) |
15237 | { |
15238 | auto &name = memb[index].alias; |
15239 | if (name.empty()) |
15240 | return; |
15241 | |
15242 | ParsedIR::sanitize_identifier(str&: name, member: true, allow_reserved_prefixes: true); |
15243 | update_name_cache(cache&: type.member_name_cache, name); |
15244 | } |
15245 | } |
15246 | |
15247 | // Checks whether the ID is a row_major matrix that requires conversion before use |
15248 | bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) |
15249 | { |
15250 | // Natively supported row-major matrices do not need to be converted. |
15251 | // Legacy targets do not support row major. |
15252 | if (backend.native_row_major_matrix && !is_legacy()) |
15253 | return false; |
15254 | |
15255 | auto *e = maybe_get<SPIRExpression>(id); |
15256 | if (e) |
15257 | return e->need_transpose; |
15258 | else |
15259 | return has_decoration(id, decoration: DecorationRowMajor); |
15260 | } |
15261 | |
15262 | // Checks whether the member is a row_major matrix that requires conversion before use |
15263 | bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) |
15264 | { |
15265 | // Natively supported row-major matrices do not need to be converted. |
15266 | if (backend.native_row_major_matrix && !is_legacy()) |
15267 | return false; |
15268 | |
15269 | // Non-matrix or column-major matrix types do not need to be converted. |
15270 | if (!has_member_decoration(id: type.self, index, decoration: DecorationRowMajor)) |
15271 | return false; |
15272 | |
15273 | // Only square row-major matrices can be converted at this time. |
15274 | // Converting non-square matrices will require defining custom GLSL function that |
15275 | // swaps matrix elements while retaining the original dimensional form of the matrix. |
15276 | const auto mbr_type = get<SPIRType>(id: type.member_types[index]); |
15277 | if (mbr_type.columns != mbr_type.vecsize) |
15278 | SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); |
15279 | |
15280 | return true; |
15281 | } |
15282 | |
15283 | // Checks if we need to remap physical type IDs when declaring the type in a buffer. |
15284 | bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const |
15285 | { |
15286 | return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypeID); |
15287 | } |
15288 | |
15289 | // Checks whether the member is in packed data type, that might need to be unpacked. |
15290 | bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const |
15291 | { |
15292 | return has_extended_member_decoration(type: type.self, index, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
15293 | } |
15294 | |
15295 | // Wraps the expression string in a function call that converts the |
15296 | // row_major matrix result of the expression to a column_major matrix. |
15297 | // Base implementation uses the standard library transpose() function. |
15298 | // Subclasses may override to use a different function. |
15299 | string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, |
15300 | bool /*is_packed*/, bool relaxed) |
15301 | { |
15302 | strip_enclosed_expression(expr&: exp_str); |
15303 | if (!is_matrix(type: exp_type)) |
15304 | { |
15305 | auto column_index = exp_str.find_last_of(c: '['); |
15306 | if (column_index == string::npos) |
15307 | return exp_str; |
15308 | |
15309 | auto column_expr = exp_str.substr(pos: column_index); |
15310 | exp_str.resize(n: column_index); |
15311 | |
15312 | auto end_deferred_index = column_expr.find_last_of(c: ']'); |
15313 | if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size()) |
15314 | { |
15315 | // If we have any data member fixups, it must be transposed so that it refers to this index. |
15316 | // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, |
15317 | // and needs to be [1].data[0] instead. |
15318 | end_deferred_index++; |
15319 | column_expr = column_expr.substr(pos: end_deferred_index) + |
15320 | column_expr.substr(pos: 0, n: end_deferred_index); |
15321 | } |
15322 | |
15323 | auto transposed_expr = type_to_glsl_constructor(type: exp_type) + "("; |
15324 | |
15325 | // Loading a column from a row-major matrix. Unroll the load. |
15326 | for (uint32_t c = 0; c < exp_type.vecsize; c++) |
15327 | { |
15328 | transposed_expr += join(ts&: exp_str, ts: '[', ts&: c, ts: ']', ts&: column_expr); |
15329 | if (c + 1 < exp_type.vecsize) |
15330 | transposed_expr += ", "; |
15331 | } |
15332 | |
15333 | transposed_expr += ")"; |
15334 | return transposed_expr; |
15335 | } |
15336 | else if (options.version < 120) |
15337 | { |
15338 | // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that |
15339 | // these GLSL versions do not support non-square matrices. |
15340 | if (exp_type.vecsize == 2 && exp_type.columns == 2) |
15341 | require_polyfill(polyfill: PolyfillTranspose2x2, relaxed); |
15342 | else if (exp_type.vecsize == 3 && exp_type.columns == 3) |
15343 | require_polyfill(polyfill: PolyfillTranspose3x3, relaxed); |
15344 | else if (exp_type.vecsize == 4 && exp_type.columns == 4) |
15345 | require_polyfill(polyfill: PolyfillTranspose4x4, relaxed); |
15346 | else |
15347 | SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); |
15348 | return join(ts: "spvTranspose", ts: (options.es && relaxed) ? "MP": "", ts: "(", ts&: exp_str, ts: ")"); |
15349 | } |
15350 | else |
15351 | return join(ts: "transpose(", ts&: exp_str, ts: ")"); |
15352 | } |
15353 | |
15354 | string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) |
15355 | { |
15356 | string type_name = type_to_glsl(type, id); |
15357 | remap_variable_type_name(type, var_name: name, type_name); |
15358 | return join(ts&: type_name, ts: " ", ts: name, ts: type_to_array_glsl(type, variable_id: id)); |
15359 | } |
15360 | |
15361 | bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const |
15362 | { |
15363 | return var.storage == storage; |
15364 | } |
15365 | |
15366 | // Emit a structure member. Subclasses may override to modify output, |
15367 | // or to dynamically add a padding member if needed. |
15368 | void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
15369 | const string &qualifier, uint32_t) |
15370 | { |
15371 | auto &membertype = get<SPIRType>(id: member_type_id); |
15372 | |
15373 | Bitset memberflags; |
15374 | auto &memb = ir.meta[type.self].members; |
15375 | if (index < memb.size()) |
15376 | memberflags = memb[index].decoration_flags; |
15377 | |
15378 | string qualifiers; |
15379 | bool is_block = ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBlock) || |
15380 | ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock); |
15381 | |
15382 | if (is_block) |
15383 | qualifiers = to_interpolation_qualifiers(flags: memberflags); |
15384 | |
15385 | statement(ts: layout_for_member(type, index), ts&: qualifiers, ts: qualifier, ts: flags_to_qualifiers_glsl(type: membertype, flags: memberflags), |
15386 | ts: variable_decl(type: membertype, name: to_member_name(type, index)), ts: ";"); |
15387 | } |
15388 | |
15389 | void CompilerGLSL::emit_struct_padding_target(const SPIRType &) |
15390 | { |
15391 | } |
15392 | |
15393 | string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) |
15394 | { |
15395 | // GL_EXT_buffer_reference variables can be marked as restrict. |
15396 | if (flags.get(bit: DecorationRestrictPointerEXT)) |
15397 | return "restrict "; |
15398 | |
15399 | string qual; |
15400 | |
15401 | if (type_is_floating_point(type) && flags.get(bit: DecorationNoContraction) && backend.support_precise_qualifier) |
15402 | qual = "precise "; |
15403 | |
15404 | // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). |
15405 | bool type_supports_precision = |
15406 | type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || |
15407 | type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || |
15408 | type.basetype == SPIRType::Sampler; |
15409 | |
15410 | if (!type_supports_precision) |
15411 | return qual; |
15412 | |
15413 | if (options.es) |
15414 | { |
15415 | auto &execution = get_entry_point(); |
15416 | |
15417 | if (type.basetype == SPIRType::UInt && is_legacy_es()) |
15418 | { |
15419 | // HACK: This is a bool. See comment in type_to_glsl(). |
15420 | qual += "lowp "; |
15421 | } |
15422 | else if (flags.get(bit: DecorationRelaxedPrecision)) |
15423 | { |
15424 | bool implied_fmediump = type.basetype == SPIRType::Float && |
15425 | options.fragment.default_float_precision == Options::Mediump && |
15426 | execution.model == ExecutionModelFragment; |
15427 | |
15428 | bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && |
15429 | options.fragment.default_int_precision == Options::Mediump && |
15430 | execution.model == ExecutionModelFragment; |
15431 | |
15432 | qual += (implied_fmediump || implied_imediump) ? "": "mediump "; |
15433 | } |
15434 | else |
15435 | { |
15436 | bool implied_fhighp = |
15437 | type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && |
15438 | execution.model == ExecutionModelFragment) || |
15439 | (execution.model != ExecutionModelFragment)); |
15440 | |
15441 | bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && |
15442 | ((options.fragment.default_int_precision == Options::Highp && |
15443 | execution.model == ExecutionModelFragment) || |
15444 | (execution.model != ExecutionModelFragment)); |
15445 | |
15446 | qual += (implied_fhighp || implied_ihighp) ? "": "highp "; |
15447 | } |
15448 | } |
15449 | else if (backend.allow_precision_qualifiers) |
15450 | { |
15451 | // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. |
15452 | // The default is highp however, so only emit mediump in the rare case that a shader has these. |
15453 | if (flags.get(bit: DecorationRelaxedPrecision)) |
15454 | qual += "mediump "; |
15455 | } |
15456 | |
15457 | return qual; |
15458 | } |
15459 | |
15460 | string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) |
15461 | { |
15462 | auto &type = expression_type(id); |
15463 | bool use_precision_qualifiers = backend.allow_precision_qualifiers; |
15464 | if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) |
15465 | { |
15466 | // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. |
15467 | auto &result_type = get<SPIRType>(id: type.image.type); |
15468 | if (result_type.width < 32) |
15469 | return "mediump "; |
15470 | } |
15471 | return flags_to_qualifiers_glsl(type, flags: ir.meta[id].decoration.decoration_flags); |
15472 | } |
15473 | |
15474 | void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) |
15475 | { |
15476 | // Works around weird behavior in glslangValidator where |
15477 | // a patch out block is translated to just block members getting the decoration. |
15478 | // To make glslang not complain when we compile again, we have to transform this back to a case where |
15479 | // the variable itself has Patch decoration, and not members. |
15480 | // Same for perprimitiveEXT. |
15481 | auto &type = get<SPIRType>(id: var.basetype); |
15482 | if (has_decoration(id: type.self, decoration: DecorationBlock)) |
15483 | { |
15484 | uint32_t member_count = uint32_t(type.member_types.size()); |
15485 | Decoration promoted_decoration = {}; |
15486 | bool do_promote_decoration = false; |
15487 | for (uint32_t i = 0; i < member_count; i++) |
15488 | { |
15489 | if (has_member_decoration(id: type.self, index: i, decoration: DecorationPatch)) |
15490 | { |
15491 | promoted_decoration = DecorationPatch; |
15492 | do_promote_decoration = true; |
15493 | break; |
15494 | } |
15495 | else if (has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT)) |
15496 | { |
15497 | promoted_decoration = DecorationPerPrimitiveEXT; |
15498 | do_promote_decoration = true; |
15499 | break; |
15500 | } |
15501 | } |
15502 | |
15503 | if (do_promote_decoration) |
15504 | { |
15505 | set_decoration(id: var.self, decoration: promoted_decoration); |
15506 | for (uint32_t i = 0; i < member_count; i++) |
15507 | unset_member_decoration(id: type.self, index: i, decoration: promoted_decoration); |
15508 | } |
15509 | } |
15510 | } |
15511 | |
15512 | string CompilerGLSL::to_qualifiers_glsl(uint32_t id) |
15513 | { |
15514 | auto &flags = get_decoration_bitset(id); |
15515 | string res; |
15516 | |
15517 | auto *var = maybe_get<SPIRVariable>(id); |
15518 | |
15519 | if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) |
15520 | res += "shared "; |
15521 | else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied) |
15522 | res += "taskPayloadSharedEXT "; |
15523 | |
15524 | res += to_interpolation_qualifiers(flags); |
15525 | if (var) |
15526 | res += to_storage_qualifiers_glsl(var: *var); |
15527 | |
15528 | auto &type = expression_type(id); |
15529 | if (type.image.dim != DimSubpassData && type.image.sampled == 2) |
15530 | { |
15531 | if (flags.get(bit: DecorationCoherent)) |
15532 | res += "coherent "; |
15533 | if (flags.get(bit: DecorationRestrict)) |
15534 | res += "restrict "; |
15535 | |
15536 | if (flags.get(bit: DecorationNonWritable)) |
15537 | res += "readonly "; |
15538 | |
15539 | bool formatted_load = type.image.format == ImageFormatUnknown; |
15540 | if (flags.get(bit: DecorationNonReadable)) |
15541 | { |
15542 | res += "writeonly "; |
15543 | formatted_load = false; |
15544 | } |
15545 | |
15546 | if (formatted_load) |
15547 | { |
15548 | if (!options.es) |
15549 | require_extension_internal(ext: "GL_EXT_shader_image_load_formatted"); |
15550 | else |
15551 | SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); |
15552 | } |
15553 | } |
15554 | |
15555 | res += to_precision_qualifiers_glsl(id); |
15556 | |
15557 | return res; |
15558 | } |
15559 | |
15560 | string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) |
15561 | { |
15562 | // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... |
15563 | auto &type = expression_type(id: arg.id); |
15564 | const char *direction = ""; |
15565 | |
15566 | if (type.pointer) |
15567 | { |
15568 | if (arg.write_count && arg.read_count) |
15569 | direction = "inout "; |
15570 | else if (arg.write_count) |
15571 | direction = "out "; |
15572 | } |
15573 | |
15574 | return join(ts&: direction, ts: to_qualifiers_glsl(id: arg.id), ts: variable_decl(type, name: to_name(id: arg.id), id: arg.id)); |
15575 | } |
15576 | |
15577 | string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) |
15578 | { |
15579 | return to_unpacked_expression(id: var.initializer); |
15580 | } |
15581 | |
15582 | string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) |
15583 | { |
15584 | #ifndef NDEBUG |
15585 | auto &type = get<SPIRType>(id: type_id); |
15586 | assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || |
15587 | type.storage == StorageClassGeneric); |
15588 | #endif |
15589 | uint32_t id = ir.increase_bound_by(count: 1); |
15590 | ir.make_constant_null(id, type: type_id, add_to_typed_id_set: false); |
15591 | return constant_expression(c: get<SPIRConstant>(id)); |
15592 | } |
15593 | |
15594 | bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const |
15595 | { |
15596 | if (type.pointer) |
15597 | return false; |
15598 | |
15599 | if (!type.array.empty() && options.flatten_multidimensional_arrays) |
15600 | return false; |
15601 | |
15602 | for (auto &literal : type.array_size_literal) |
15603 | if (!literal) |
15604 | return false; |
15605 | |
15606 | for (auto &memb : type.member_types) |
15607 | if (!type_can_zero_initialize(type: get<SPIRType>(id: memb))) |
15608 | return false; |
15609 | |
15610 | return true; |
15611 | } |
15612 | |
15613 | string CompilerGLSL::variable_decl(const SPIRVariable &variable) |
15614 | { |
15615 | // Ignore the pointer type since GLSL doesn't have pointers. |
15616 | auto &type = get_variable_data_type(var: variable); |
15617 | |
15618 | if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer) |
15619 | SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); |
15620 | |
15621 | auto res = join(ts: to_qualifiers_glsl(id: variable.self), ts: variable_decl(type, name: to_name(id: variable.self), id: variable.self)); |
15622 | |
15623 | if (variable.loop_variable && variable.static_expression) |
15624 | { |
15625 | uint32_t expr = variable.static_expression; |
15626 | if (ir.ids[expr].get_type() != TypeUndef) |
15627 | res += join(ts: " = ", ts: to_unpacked_expression(id: variable.static_expression)); |
15628 | else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
15629 | res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable))); |
15630 | } |
15631 | else if (variable.initializer && !variable_decl_is_remapped_storage(var: variable, storage: StorageClassWorkgroup)) |
15632 | { |
15633 | uint32_t expr = variable.initializer; |
15634 | if (ir.ids[expr].get_type() != TypeUndef) |
15635 | res += join(ts: " = ", ts: to_initializer_expression(var: variable)); |
15636 | else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
15637 | res += join(ts: " = ", ts: to_zero_initialized_expression(type_id: get_variable_data_type_id(var: variable))); |
15638 | } |
15639 | |
15640 | return res; |
15641 | } |
15642 | |
15643 | const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) |
15644 | { |
15645 | auto &flags = get_decoration_bitset(id: variable.self); |
15646 | if (flags.get(bit: DecorationRelaxedPrecision)) |
15647 | return "mediump "; |
15648 | else |
15649 | return "highp "; |
15650 | } |
15651 | |
15652 | string CompilerGLSL::pls_decl(const PlsRemap &var) |
15653 | { |
15654 | auto &variable = get<SPIRVariable>(id: var.id); |
15655 | |
15656 | auto op_and_basetype = pls_format_to_basetype(format: var.format); |
15657 | |
15658 | SPIRType type { op_and_basetype.first }; |
15659 | type.basetype = op_and_basetype.second; |
15660 | auto vecsize = pls_format_to_components(format: var.format); |
15661 | if (vecsize > 1) |
15662 | { |
15663 | type.op = OpTypeVector; |
15664 | type.vecsize = vecsize; |
15665 | } |
15666 | |
15667 | return join(ts: to_pls_layout(format: var.format), ts: to_pls_qualifiers_glsl(variable), ts: type_to_glsl(type), ts: " ", |
15668 | ts: to_name(id: variable.self)); |
15669 | } |
15670 | |
15671 | uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const |
15672 | { |
15673 | return to_array_size_literal(type, index: uint32_t(type.array.size() - 1)); |
15674 | } |
15675 | |
15676 | uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const |
15677 | { |
15678 | assert(type.array.size() == type.array_size_literal.size()); |
15679 | |
15680 | if (type.array_size_literal[index]) |
15681 | { |
15682 | return type.array[index]; |
15683 | } |
15684 | else |
15685 | { |
15686 | // Use the default spec constant value. |
15687 | // This is the best we can do. |
15688 | return evaluate_constant_u32(id: type.array[index]); |
15689 | } |
15690 | } |
15691 | |
15692 | string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) |
15693 | { |
15694 | assert(type.array.size() == type.array_size_literal.size()); |
15695 | |
15696 | auto &size = type.array[index]; |
15697 | if (!type.array_size_literal[index]) |
15698 | return to_expression(id: size); |
15699 | else if (size) |
15700 | return convert_to_string(t: size); |
15701 | else if (!backend.unsized_array_supported) |
15702 | { |
15703 | // For runtime-sized arrays, we can work around |
15704 | // lack of standard support for this by simply having |
15705 | // a single element array. |
15706 | // |
15707 | // Runtime length arrays must always be the last element |
15708 | // in an interface block. |
15709 | return "1"; |
15710 | } |
15711 | else |
15712 | return ""; |
15713 | } |
15714 | |
15715 | string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t) |
15716 | { |
15717 | if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) |
15718 | { |
15719 | // We are using a wrapped pointer type, and we should not emit any array declarations here. |
15720 | return ""; |
15721 | } |
15722 | |
15723 | if (type.array.empty()) |
15724 | return ""; |
15725 | |
15726 | if (options.flatten_multidimensional_arrays) |
15727 | { |
15728 | string res; |
15729 | res += "["; |
15730 | for (auto i = uint32_t(type.array.size()); i; i--) |
15731 | { |
15732 | res += enclose_expression(expr: to_array_size(type, index: i - 1)); |
15733 | if (i > 1) |
15734 | res += " * "; |
15735 | } |
15736 | res += "]"; |
15737 | return res; |
15738 | } |
15739 | else |
15740 | { |
15741 | if (type.array.size() > 1) |
15742 | { |
15743 | if (!options.es && options.version < 430) |
15744 | require_extension_internal(ext: "GL_ARB_arrays_of_arrays"); |
15745 | else if (options.es && options.version < 310) |
15746 | SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " |
15747 | "Try using --flatten-multidimensional-arrays or set " |
15748 | "options.flatten_multidimensional_arrays to true."); |
15749 | } |
15750 | |
15751 | string res; |
15752 | for (auto i = uint32_t(type.array.size()); i; i--) |
15753 | { |
15754 | res += "["; |
15755 | res += to_array_size(type, index: i - 1); |
15756 | res += "]"; |
15757 | } |
15758 | return res; |
15759 | } |
15760 | } |
15761 | |
15762 | string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/) |
15763 | { |
15764 | auto &imagetype = get<SPIRType>(id: type.image.type); |
15765 | string res; |
15766 | |
15767 | switch (imagetype.basetype) |
15768 | { |
15769 | case SPIRType::Int64: |
15770 | res = "i64"; |
15771 | require_extension_internal(ext: "GL_EXT_shader_image_int64"); |
15772 | break; |
15773 | case SPIRType::UInt64: |
15774 | res = "u64"; |
15775 | require_extension_internal(ext: "GL_EXT_shader_image_int64"); |
15776 | break; |
15777 | case SPIRType::Int: |
15778 | case SPIRType::Short: |
15779 | case SPIRType::SByte: |
15780 | res = "i"; |
15781 | break; |
15782 | case SPIRType::UInt: |
15783 | case SPIRType::UShort: |
15784 | case SPIRType::UByte: |
15785 | res = "u"; |
15786 | break; |
15787 | default: |
15788 | break; |
15789 | } |
15790 | |
15791 | // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. |
15792 | // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. |
15793 | |
15794 | if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) |
15795 | return res + "subpassInput"+ (type.image.ms ? "MS": ""); |
15796 | else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && |
15797 | subpass_input_is_framebuffer_fetch(id)) |
15798 | { |
15799 | SPIRType sampled_type = get<SPIRType>(id: type.image.type); |
15800 | sampled_type.vecsize = 4; |
15801 | return type_to_glsl(type: sampled_type); |
15802 | } |
15803 | |
15804 | // If we're emulating subpassInput with samplers, force sampler2D |
15805 | // so we don't have to specify format. |
15806 | if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) |
15807 | { |
15808 | // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. |
15809 | if (type.image.dim == DimBuffer && type.image.sampled == 1) |
15810 | res += "sampler"; |
15811 | else |
15812 | res += type.image.sampled == 2 ? "image": "texture"; |
15813 | } |
15814 | else |
15815 | res += "sampler"; |
15816 | |
15817 | switch (type.image.dim) |
15818 | { |
15819 | case Dim1D: |
15820 | // ES doesn't support 1D. Fake it with 2D. |
15821 | res += options.es ? "2D": "1D"; |
15822 | break; |
15823 | case Dim2D: |
15824 | res += "2D"; |
15825 | break; |
15826 | case Dim3D: |
15827 | res += "3D"; |
15828 | break; |
15829 | case DimCube: |
15830 | res += "Cube"; |
15831 | break; |
15832 | case DimRect: |
15833 | if (options.es) |
15834 | SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); |
15835 | |
15836 | if (is_legacy_desktop()) |
15837 | require_extension_internal(ext: "GL_ARB_texture_rectangle"); |
15838 | |
15839 | res += "2DRect"; |
15840 | break; |
15841 | |
15842 | case DimBuffer: |
15843 | if (options.es && options.version < 320) |
15844 | require_extension_internal(ext: "GL_EXT_texture_buffer"); |
15845 | else if (!options.es && options.version < 300) |
15846 | require_extension_internal(ext: "GL_EXT_texture_buffer_object"); |
15847 | res += "Buffer"; |
15848 | break; |
15849 | |
15850 | case DimSubpassData: |
15851 | res += "2D"; |
15852 | break; |
15853 | default: |
15854 | SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); |
15855 | } |
15856 | |
15857 | if (type.image.ms) |
15858 | res += "MS"; |
15859 | if (type.image.arrayed) |
15860 | { |
15861 | if (is_legacy_desktop()) |
15862 | require_extension_internal(ext: "GL_EXT_texture_array"); |
15863 | res += "Array"; |
15864 | } |
15865 | |
15866 | // "Shadow" state in GLSL only exists for samplers and combined image samplers. |
15867 | if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && |
15868 | is_depth_image(type, id)) |
15869 | { |
15870 | res += "Shadow"; |
15871 | |
15872 | if (type.image.dim == DimCube && is_legacy()) |
15873 | { |
15874 | if (!options.es) |
15875 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
15876 | else |
15877 | { |
15878 | require_extension_internal(ext: "GL_NV_shadow_samplers_cube"); |
15879 | res += "NV"; |
15880 | } |
15881 | } |
15882 | } |
15883 | |
15884 | return res; |
15885 | } |
15886 | |
15887 | string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) |
15888 | { |
15889 | if (backend.use_array_constructor && type.array.size() > 1) |
15890 | { |
15891 | if (options.flatten_multidimensional_arrays) |
15892 | SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " |
15893 | "e.g. float[][]()."); |
15894 | else if (!options.es && options.version < 430) |
15895 | require_extension_internal(ext: "GL_ARB_arrays_of_arrays"); |
15896 | else if (options.es && options.version < 310) |
15897 | SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); |
15898 | } |
15899 | |
15900 | auto e = type_to_glsl(type); |
15901 | if (backend.use_array_constructor) |
15902 | { |
15903 | for (uint32_t i = 0; i < type.array.size(); i++) |
15904 | e += "[]"; |
15905 | } |
15906 | return e; |
15907 | } |
15908 | |
15909 | // The optional id parameter indicates the object whose type we are trying |
15910 | // to find the description for. It is optional. Most type descriptions do not |
15911 | // depend on a specific object's use of that type. |
15912 | string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) |
15913 | { |
15914 | if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type)) |
15915 | { |
15916 | // Need to create a magic type name which compacts the entire type information. |
15917 | auto *parent = &get_pointee_type(type); |
15918 | string name = type_to_glsl(type: *parent); |
15919 | |
15920 | uint32_t array_stride = get_decoration(id: type.parent_type, decoration: DecorationArrayStride); |
15921 | |
15922 | // Resolve all array dimensions in one go since once we lose the pointer type, |
15923 | // array information is left to to_array_type_glsl. The base type loses array information. |
15924 | while (is_array(type: *parent)) |
15925 | { |
15926 | if (parent->array_size_literal.back()) |
15927 | name += join(ts: type.array.back(), ts: "_"); |
15928 | else |
15929 | name += join(ts: "id", ts: type.array.back(), ts: "_"); |
15930 | |
15931 | name += "stride_"+ std::to_string(val: array_stride); |
15932 | |
15933 | array_stride = get_decoration(id: parent->parent_type, decoration: DecorationArrayStride); |
15934 | parent = &get<SPIRType>(id: parent->parent_type); |
15935 | } |
15936 | |
15937 | name += "Pointer"; |
15938 | return name; |
15939 | } |
15940 | |
15941 | switch (type.basetype) |
15942 | { |
15943 | case SPIRType::Struct: |
15944 | // Need OpName lookup here to get a "sensible" name for a struct. |
15945 | if (backend.explicit_struct_type) |
15946 | return join(ts: "struct ", ts: to_name(id: type.self)); |
15947 | else |
15948 | return to_name(id: type.self); |
15949 | |
15950 | case SPIRType::Image: |
15951 | case SPIRType::SampledImage: |
15952 | return image_type_glsl(type, id); |
15953 | |
15954 | case SPIRType::Sampler: |
15955 | // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing |
15956 | // this distinction into the type system. |
15957 | return comparison_ids.count(x: id) ? "samplerShadow": "sampler"; |
15958 | |
15959 | case SPIRType::AccelerationStructure: |
15960 | return ray_tracing_is_khr ? "accelerationStructureEXT": "accelerationStructureNV"; |
15961 | |
15962 | case SPIRType::RayQuery: |
15963 | return "rayQueryEXT"; |
15964 | |
15965 | case SPIRType::Void: |
15966 | return "void"; |
15967 | |
15968 | default: |
15969 | break; |
15970 | } |
15971 | |
15972 | if (type.basetype == SPIRType::UInt && is_legacy()) |
15973 | { |
15974 | if (options.es) |
15975 | // HACK: spirv-cross changes bools into uints and generates code which compares them to |
15976 | // zero. Input code will have already been validated as not to have contained any uints, |
15977 | // so any remaining uints must in fact be bools. However, simply returning "bool" here |
15978 | // will result in invalid code. Instead, return an int. |
15979 | return backend.basic_int_type; |
15980 | else |
15981 | require_extension_internal(ext: "GL_EXT_gpu_shader4"); |
15982 | } |
15983 | |
15984 | if (type.basetype == SPIRType::AtomicCounter) |
15985 | { |
15986 | if (options.es && options.version < 310) |
15987 | SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters."); |
15988 | else if (!options.es && options.version < 420) |
15989 | require_extension_internal(ext: "GL_ARB_shader_atomic_counters"); |
15990 | } |
15991 | |
15992 | if (type.vecsize == 1 && type.columns == 1) // Scalar builtin |
15993 | { |
15994 | switch (type.basetype) |
15995 | { |
15996 | case SPIRType::Boolean: |
15997 | return "bool"; |
15998 | case SPIRType::SByte: |
15999 | return backend.basic_int8_type; |
16000 | case SPIRType::UByte: |
16001 | return backend.basic_uint8_type; |
16002 | case SPIRType::Short: |
16003 | return backend.basic_int16_type; |
16004 | case SPIRType::UShort: |
16005 | return backend.basic_uint16_type; |
16006 | case SPIRType::Int: |
16007 | return backend.basic_int_type; |
16008 | case SPIRType::UInt: |
16009 | return backend.basic_uint_type; |
16010 | case SPIRType::AtomicCounter: |
16011 | return "atomic_uint"; |
16012 | case SPIRType::Half: |
16013 | return "float16_t"; |
16014 | case SPIRType::Float: |
16015 | return "float"; |
16016 | case SPIRType::Double: |
16017 | return "double"; |
16018 | case SPIRType::Int64: |
16019 | return "int64_t"; |
16020 | case SPIRType::UInt64: |
16021 | return "uint64_t"; |
16022 | default: |
16023 | return "???"; |
16024 | } |
16025 | } |
16026 | else if (type.vecsize > 1 && type.columns == 1) // Vector builtin |
16027 | { |
16028 | switch (type.basetype) |
16029 | { |
16030 | case SPIRType::Boolean: |
16031 | return join(ts: "bvec", ts: type.vecsize); |
16032 | case SPIRType::SByte: |
16033 | return join(ts: "i8vec", ts: type.vecsize); |
16034 | case SPIRType::UByte: |
16035 | return join(ts: "u8vec", ts: type.vecsize); |
16036 | case SPIRType::Short: |
16037 | return join(ts: "i16vec", ts: type.vecsize); |
16038 | case SPIRType::UShort: |
16039 | return join(ts: "u16vec", ts: type.vecsize); |
16040 | case SPIRType::Int: |
16041 | return join(ts: "ivec", ts: type.vecsize); |
16042 | case SPIRType::UInt: |
16043 | return join(ts: "uvec", ts: type.vecsize); |
16044 | case SPIRType::Half: |
16045 | return join(ts: "f16vec", ts: type.vecsize); |
16046 | case SPIRType::Float: |
16047 | return join(ts: "vec", ts: type.vecsize); |
16048 | case SPIRType::Double: |
16049 | return join(ts: "dvec", ts: type.vecsize); |
16050 | case SPIRType::Int64: |
16051 | return join(ts: "i64vec", ts: type.vecsize); |
16052 | case SPIRType::UInt64: |
16053 | return join(ts: "u64vec", ts: type.vecsize); |
16054 | default: |
16055 | return "???"; |
16056 | } |
16057 | } |
16058 | else if (type.vecsize == type.columns) // Simple Matrix builtin |
16059 | { |
16060 | switch (type.basetype) |
16061 | { |
16062 | case SPIRType::Boolean: |
16063 | return join(ts: "bmat", ts: type.vecsize); |
16064 | case SPIRType::Int: |
16065 | return join(ts: "imat", ts: type.vecsize); |
16066 | case SPIRType::UInt: |
16067 | return join(ts: "umat", ts: type.vecsize); |
16068 | case SPIRType::Half: |
16069 | return join(ts: "f16mat", ts: type.vecsize); |
16070 | case SPIRType::Float: |
16071 | return join(ts: "mat", ts: type.vecsize); |
16072 | case SPIRType::Double: |
16073 | return join(ts: "dmat", ts: type.vecsize); |
16074 | // Matrix types not supported for int64/uint64. |
16075 | default: |
16076 | return "???"; |
16077 | } |
16078 | } |
16079 | else |
16080 | { |
16081 | switch (type.basetype) |
16082 | { |
16083 | case SPIRType::Boolean: |
16084 | return join(ts: "bmat", ts: type.columns, ts: "x", ts: type.vecsize); |
16085 | case SPIRType::Int: |
16086 | return join(ts: "imat", ts: type.columns, ts: "x", ts: type.vecsize); |
16087 | case SPIRType::UInt: |
16088 | return join(ts: "umat", ts: type.columns, ts: "x", ts: type.vecsize); |
16089 | case SPIRType::Half: |
16090 | return join(ts: "f16mat", ts: type.columns, ts: "x", ts: type.vecsize); |
16091 | case SPIRType::Float: |
16092 | return join(ts: "mat", ts: type.columns, ts: "x", ts: type.vecsize); |
16093 | case SPIRType::Double: |
16094 | return join(ts: "dmat", ts: type.columns, ts: "x", ts: type.vecsize); |
16095 | // Matrix types not supported for int64/uint64. |
16096 | default: |
16097 | return "???"; |
16098 | } |
16099 | } |
16100 | } |
16101 | |
16102 | void CompilerGLSL::add_variable(unordered_set<string> &variables_primary, |
16103 | const unordered_set<string> &variables_secondary, string &name) |
16104 | { |
16105 | if (name.empty()) |
16106 | return; |
16107 | |
16108 | ParsedIR::sanitize_underscores(str&: name); |
16109 | if (ParsedIR::is_globally_reserved_identifier(str&: name, allow_reserved_prefixes: true)) |
16110 | { |
16111 | name.clear(); |
16112 | return; |
16113 | } |
16114 | |
16115 | update_name_cache(cache_primary&: variables_primary, cache_secondary: variables_secondary, name); |
16116 | } |
16117 | |
16118 | void CompilerGLSL::add_local_variable_name(uint32_t id) |
16119 | { |
16120 | add_variable(variables_primary&: local_variable_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias); |
16121 | } |
16122 | |
16123 | void CompilerGLSL::add_resource_name(uint32_t id) |
16124 | { |
16125 | add_variable(variables_primary&: resource_names, variables_secondary: block_names, name&: ir.meta[id].decoration.alias); |
16126 | } |
16127 | |
16128 | void CompilerGLSL::add_header_line(const std::string &line) |
16129 | { |
16130 | header_lines.push_back(t: line); |
16131 | } |
16132 | |
16133 | bool CompilerGLSL::has_extension(const std::string &ext) const |
16134 | { |
16135 | auto itr = find(first: begin(cont: forced_extensions), last: end(cont: forced_extensions), val: ext); |
16136 | return itr != end(cont: forced_extensions); |
16137 | } |
16138 | |
16139 | void CompilerGLSL::require_extension(const std::string &ext) |
16140 | { |
16141 | if (!has_extension(ext)) |
16142 | forced_extensions.push_back(t: ext); |
16143 | } |
16144 | |
16145 | const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const |
16146 | { |
16147 | return forced_extensions; |
16148 | } |
16149 | |
16150 | void CompilerGLSL::require_extension_internal(const string &ext) |
16151 | { |
16152 | if (backend.supports_extensions && !has_extension(ext)) |
16153 | { |
16154 | forced_extensions.push_back(t: ext); |
16155 | force_recompile(); |
16156 | } |
16157 | } |
16158 | |
16159 | void CompilerGLSL::flatten_buffer_block(VariableID id) |
16160 | { |
16161 | auto &var = get<SPIRVariable>(id); |
16162 | auto &type = get<SPIRType>(id: var.basetype); |
16163 | auto name = to_name(id: type.self, allow_alias: false); |
16164 | auto &flags = get_decoration_bitset(id: type.self); |
16165 | |
16166 | if (!type.array.empty()) |
16167 | SPIRV_CROSS_THROW(name + " is an array of UBOs."); |
16168 | if (type.basetype != SPIRType::Struct) |
16169 | SPIRV_CROSS_THROW(name + " is not a struct."); |
16170 | if (!flags.get(bit: DecorationBlock)) |
16171 | SPIRV_CROSS_THROW(name + " is not a block."); |
16172 | if (type.member_types.empty()) |
16173 | SPIRV_CROSS_THROW(name + " is an empty struct."); |
16174 | |
16175 | flattened_buffer_blocks.insert(x: id); |
16176 | } |
16177 | |
16178 | bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const |
16179 | { |
16180 | return false; // GLSL itself does not need to translate array builtin types to non-array builtin types |
16181 | } |
16182 | |
16183 | bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const |
16184 | { |
16185 | return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources. |
16186 | } |
16187 | |
16188 | bool CompilerGLSL::check_atomic_image(uint32_t id) |
16189 | { |
16190 | auto &type = expression_type(id); |
16191 | if (type.storage == StorageClassImage) |
16192 | { |
16193 | if (options.es && options.version < 320) |
16194 | require_extension_internal(ext: "GL_OES_shader_image_atomic"); |
16195 | |
16196 | auto *var = maybe_get_backing_variable(chain: id); |
16197 | if (var) |
16198 | { |
16199 | if (has_decoration(id: var->self, decoration: DecorationNonWritable) || has_decoration(id: var->self, decoration: DecorationNonReadable)) |
16200 | { |
16201 | unset_decoration(id: var->self, decoration: DecorationNonWritable); |
16202 | unset_decoration(id: var->self, decoration: DecorationNonReadable); |
16203 | force_recompile(); |
16204 | } |
16205 | } |
16206 | return true; |
16207 | } |
16208 | else |
16209 | return false; |
16210 | } |
16211 | |
16212 | void CompilerGLSL::add_function_overload(const SPIRFunction &func) |
16213 | { |
16214 | Hasher hasher; |
16215 | for (auto &arg : func.arguments) |
16216 | { |
16217 | // Parameters can vary with pointer type or not, |
16218 | // but that will not change the signature in GLSL/HLSL, |
16219 | // so strip the pointer type before hashing. |
16220 | uint32_t type_id = get_pointee_type_id(type_id: arg.type); |
16221 | auto &type = get<SPIRType>(id: type_id); |
16222 | |
16223 | if (!combined_image_samplers.empty()) |
16224 | { |
16225 | // If we have combined image samplers, we cannot really trust the image and sampler arguments |
16226 | // we pass down to callees, because they may be shuffled around. |
16227 | // Ignore these arguments, to make sure that functions need to differ in some other way |
16228 | // to be considered different overloads. |
16229 | if (type.basetype == SPIRType::SampledImage || |
16230 | (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) |
16231 | { |
16232 | continue; |
16233 | } |
16234 | } |
16235 | |
16236 | hasher.u32(value: type_id); |
16237 | } |
16238 | uint64_t types_hash = hasher.get(); |
16239 | |
16240 | auto function_name = to_name(id: func.self); |
16241 | auto itr = function_overloads.find(x: function_name); |
16242 | if (itr != end(cont&: function_overloads)) |
16243 | { |
16244 | // There exists a function with this name already. |
16245 | auto &overloads = itr->second; |
16246 | if (overloads.count(x: types_hash) != 0) |
16247 | { |
16248 | // Overload conflict, assign a new name. |
16249 | add_resource_name(id: func.self); |
16250 | function_overloads[to_name(id: func.self)].insert(x: types_hash); |
16251 | } |
16252 | else |
16253 | { |
16254 | // Can reuse the name. |
16255 | overloads.insert(x: types_hash); |
16256 | } |
16257 | } |
16258 | else |
16259 | { |
16260 | // First time we see this function name. |
16261 | add_resource_name(id: func.self); |
16262 | function_overloads[to_name(id: func.self)].insert(x: types_hash); |
16263 | } |
16264 | } |
16265 | |
16266 | void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) |
16267 | { |
16268 | if (func.self != ir.default_entry_point) |
16269 | add_function_overload(func); |
16270 | |
16271 | // Avoid shadow declarations. |
16272 | local_variable_names = resource_names; |
16273 | |
16274 | string decl; |
16275 | |
16276 | auto &type = get<SPIRType>(id: func.return_type); |
16277 | decl += flags_to_qualifiers_glsl(type, flags: return_flags); |
16278 | decl += type_to_glsl(type); |
16279 | decl += type_to_array_glsl(type, 0); |
16280 | decl += " "; |
16281 | |
16282 | if (func.self == ir.default_entry_point) |
16283 | { |
16284 | // If we need complex fallback in GLSL, we just wrap main() in a function |
16285 | // and interlock the entire shader ... |
16286 | if (interlocked_is_complex) |
16287 | decl += "spvMainInterlockedBody"; |
16288 | else |
16289 | decl += "main"; |
16290 | |
16291 | processing_entry_point = true; |
16292 | } |
16293 | else |
16294 | decl += to_name(id: func.self); |
16295 | |
16296 | decl += "("; |
16297 | SmallVector<string> arglist; |
16298 | for (auto &arg : func.arguments) |
16299 | { |
16300 | // Do not pass in separate images or samplers if we're remapping |
16301 | // to combined image samplers. |
16302 | if (skip_argument(id: arg.id)) |
16303 | continue; |
16304 | |
16305 | // Might change the variable name if it already exists in this function. |
16306 | // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
16307 | // to use same name for variables. |
16308 | // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
16309 | add_local_variable_name(id: arg.id); |
16310 | |
16311 | arglist.push_back(t: argument_decl(arg)); |
16312 | |
16313 | // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
16314 | auto *var = maybe_get<SPIRVariable>(id: arg.id); |
16315 | if (var) |
16316 | var->parameter = &arg; |
16317 | } |
16318 | |
16319 | for (auto &arg : func.shadow_arguments) |
16320 | { |
16321 | // Might change the variable name if it already exists in this function. |
16322 | // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
16323 | // to use same name for variables. |
16324 | // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
16325 | add_local_variable_name(id: arg.id); |
16326 | |
16327 | arglist.push_back(t: argument_decl(arg)); |
16328 | |
16329 | // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
16330 | auto *var = maybe_get<SPIRVariable>(id: arg.id); |
16331 | if (var) |
16332 | var->parameter = &arg; |
16333 | } |
16334 | |
16335 | decl += merge(list: arglist); |
16336 | decl += ")"; |
16337 | statement(ts&: decl); |
16338 | } |
16339 | |
16340 | void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) |
16341 | { |
16342 | // Avoid potential cycles. |
16343 | if (func.active) |
16344 | return; |
16345 | func.active = true; |
16346 | |
16347 | // If we depend on a function, emit that function before we emit our own function. |
16348 | for (auto block : func.blocks) |
16349 | { |
16350 | auto &b = get<SPIRBlock>(id: block); |
16351 | for (auto &i : b.ops) |
16352 | { |
16353 | auto ops = stream(instr: i); |
16354 | auto op = static_cast<Op>(i.op); |
16355 | |
16356 | if (op == OpFunctionCall) |
16357 | { |
16358 | // Recursively emit functions which are called. |
16359 | uint32_t id = ops[2]; |
16360 | emit_function(func&: get<SPIRFunction>(id), return_flags: ir.meta[ops[1]].decoration.decoration_flags); |
16361 | } |
16362 | } |
16363 | } |
16364 | |
16365 | if (func.entry_line.file_id != 0) |
16366 | emit_line_directive(file_id: func.entry_line.file_id, line_literal: func.entry_line.line_literal); |
16367 | emit_function_prototype(func, return_flags); |
16368 | begin_scope(); |
16369 | |
16370 | if (func.self == ir.default_entry_point) |
16371 | emit_entry_point_declarations(); |
16372 | |
16373 | current_function = &func; |
16374 | auto &entry_block = get<SPIRBlock>(id: func.entry_block); |
16375 | |
16376 | sort(first: begin(cont&: func.constant_arrays_needed_on_stack), last: end(cont&: func.constant_arrays_needed_on_stack)); |
16377 | for (auto &array : func.constant_arrays_needed_on_stack) |
16378 | { |
16379 | auto &c = get<SPIRConstant>(id: array); |
16380 | auto &type = get<SPIRType>(id: c.constant_type); |
16381 | statement(ts: variable_decl(type, name: join(ts: "_", ts&: array, ts: "_array_copy")), ts: " = ", ts: constant_expression(c), ts: ";"); |
16382 | } |
16383 | |
16384 | for (auto &v : func.local_variables) |
16385 | { |
16386 | auto &var = get<SPIRVariable>(id: v); |
16387 | var.deferred_declaration = false; |
16388 | |
16389 | if (variable_decl_is_remapped_storage(var, storage: StorageClassWorkgroup)) |
16390 | { |
16391 | // Special variable type which cannot have initializer, |
16392 | // need to be declared as standalone variables. |
16393 | // Comes from MSL which can push global variables as local variables in main function. |
16394 | add_local_variable_name(id: var.self); |
16395 | statement(ts: variable_decl(variable: var), ts: ";"); |
16396 | var.deferred_declaration = false; |
16397 | } |
16398 | else if (var.storage == StorageClassPrivate) |
16399 | { |
16400 | // These variables will not have had their CFG usage analyzed, so move it to the entry block. |
16401 | // Comes from MSL which can push global variables as local variables in main function. |
16402 | // We could just declare them right now, but we would miss out on an important initialization case which is |
16403 | // LUT declaration in MSL. |
16404 | // If we don't declare the variable when it is assigned we're forced to go through a helper function |
16405 | // which copies elements one by one. |
16406 | add_local_variable_name(id: var.self); |
16407 | |
16408 | if (var.initializer) |
16409 | { |
16410 | statement(ts: variable_decl(variable: var), ts: ";"); |
16411 | var.deferred_declaration = false; |
16412 | } |
16413 | else |
16414 | { |
16415 | auto &dominated = entry_block.dominated_variables; |
16416 | if (find(first: begin(cont&: dominated), last: end(cont&: dominated), val: var.self) == end(cont&: dominated)) |
16417 | entry_block.dominated_variables.push_back(t: var.self); |
16418 | var.deferred_declaration = true; |
16419 | } |
16420 | } |
16421 | else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) |
16422 | { |
16423 | // No need to declare this variable, it has a static expression. |
16424 | var.deferred_declaration = false; |
16425 | } |
16426 | else if (expression_is_lvalue(id: v)) |
16427 | { |
16428 | add_local_variable_name(id: var.self); |
16429 | |
16430 | // Loop variables should never be declared early, they are explicitly emitted in a loop. |
16431 | if (var.initializer && !var.loop_variable) |
16432 | statement(ts: variable_decl_function_local(var), ts: ";"); |
16433 | else |
16434 | { |
16435 | // Don't declare variable until first use to declutter the GLSL output quite a lot. |
16436 | // If we don't touch the variable before first branch, |
16437 | // declare it then since we need variable declaration to be in top scope. |
16438 | var.deferred_declaration = true; |
16439 | } |
16440 | } |
16441 | else |
16442 | { |
16443 | // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. |
16444 | // For these types (non-lvalue), we enforce forwarding through a shadowed variable. |
16445 | // This means that when we OpStore to these variables, we just write in the expression ID directly. |
16446 | // This breaks any kind of branching, since the variable must be statically assigned. |
16447 | // Branching on samplers and images would be pretty much impossible to fake in GLSL. |
16448 | var.statically_assigned = true; |
16449 | } |
16450 | |
16451 | var.loop_variable_enable = false; |
16452 | |
16453 | // Loop variables are never declared outside their for-loop, so block any implicit declaration. |
16454 | if (var.loop_variable) |
16455 | { |
16456 | var.deferred_declaration = false; |
16457 | // Need to reset the static expression so we can fallback to initializer if need be. |
16458 | var.static_expression = 0; |
16459 | } |
16460 | } |
16461 | |
16462 | // Enforce declaration order for regression testing purposes. |
16463 | for (auto &block_id : func.blocks) |
16464 | { |
16465 | auto &block = get<SPIRBlock>(id: block_id); |
16466 | sort(first: begin(cont&: block.dominated_variables), last: end(cont&: block.dominated_variables)); |
16467 | } |
16468 | |
16469 | for (auto &line : current_function->fixup_hooks_in) |
16470 | line(); |
16471 | |
16472 | emit_block_chain(block&: entry_block); |
16473 | |
16474 | end_scope(); |
16475 | processing_entry_point = false; |
16476 | statement(ts: ""); |
16477 | |
16478 | // Make sure deferred declaration state for local variables is cleared when we are done with function. |
16479 | // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. |
16480 | for (auto &v : func.local_variables) |
16481 | { |
16482 | auto &var = get<SPIRVariable>(id: v); |
16483 | var.deferred_declaration = false; |
16484 | } |
16485 | } |
16486 | |
16487 | void CompilerGLSL::emit_fixup() |
16488 | { |
16489 | if (is_vertex_like_shader()) |
16490 | { |
16491 | if (options.vertex.fixup_clipspace) |
16492 | { |
16493 | const char *suffix = backend.float_literal_suffix ? "f": ""; |
16494 | statement(ts: "gl_Position.z = 2.0", ts&: suffix, ts: " * gl_Position.z - gl_Position.w;"); |
16495 | } |
16496 | |
16497 | if (options.vertex.flip_vert_y) |
16498 | statement(ts: "gl_Position.y = -gl_Position.y;"); |
16499 | } |
16500 | } |
16501 | |
16502 | void CompilerGLSL::flush_phi(BlockID from, BlockID to) |
16503 | { |
16504 | auto &child = get<SPIRBlock>(id: to); |
16505 | if (child.ignore_phi_from_block == from) |
16506 | return; |
16507 | |
16508 | unordered_set<uint32_t> temporary_phi_variables; |
16509 | |
16510 | for (auto itr = begin(cont&: child.phi_variables); itr != end(cont&: child.phi_variables); ++itr) |
16511 | { |
16512 | auto &phi = *itr; |
16513 | |
16514 | if (phi.parent == from) |
16515 | { |
16516 | auto &var = get<SPIRVariable>(id: phi.function_variable); |
16517 | |
16518 | // A Phi variable might be a loop variable, so flush to static expression. |
16519 | if (var.loop_variable && !var.loop_variable_enable) |
16520 | var.static_expression = phi.local_variable; |
16521 | else |
16522 | { |
16523 | flush_variable_declaration(id: phi.function_variable); |
16524 | |
16525 | // Check if we are going to write to a Phi variable that another statement will read from |
16526 | // as part of another Phi node in our target block. |
16527 | // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. |
16528 | // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. |
16529 | bool need_saved_temporary = |
16530 | find_if(first: itr + 1, last: end(cont&: child.phi_variables), pred: [&](const SPIRBlock::Phi &future_phi) -> bool { |
16531 | return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; |
16532 | }) != end(cont&: child.phi_variables); |
16533 | |
16534 | if (need_saved_temporary) |
16535 | { |
16536 | // Need to make sure we declare the phi variable with a copy at the right scope. |
16537 | // We cannot safely declare a temporary here since we might be inside a continue block. |
16538 | if (!var.allocate_temporary_copy) |
16539 | { |
16540 | var.allocate_temporary_copy = true; |
16541 | force_recompile(); |
16542 | } |
16543 | statement(ts: "_", ts&: phi.function_variable, ts: "_copy", ts: " = ", ts: to_name(id: phi.function_variable), ts: ";"); |
16544 | temporary_phi_variables.insert(x: phi.function_variable); |
16545 | } |
16546 | |
16547 | // This might be called in continue block, so make sure we |
16548 | // use this to emit ESSL 1.0 compliant increments/decrements. |
16549 | auto lhs = to_expression(id: phi.function_variable); |
16550 | |
16551 | string rhs; |
16552 | if (temporary_phi_variables.count(x: phi.local_variable)) |
16553 | rhs = join(ts: "_", ts&: phi.local_variable, ts: "_copy"); |
16554 | else |
16555 | rhs = to_pointer_expression(id: phi.local_variable); |
16556 | |
16557 | if (!optimize_read_modify_write(type: get<SPIRType>(id: var.basetype), lhs, rhs)) |
16558 | statement(ts&: lhs, ts: " = ", ts&: rhs, ts: ";"); |
16559 | } |
16560 | |
16561 | register_write(chain: phi.function_variable); |
16562 | } |
16563 | } |
16564 | } |
16565 | |
16566 | void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) |
16567 | { |
16568 | auto &to_block = get<SPIRBlock>(id: to); |
16569 | if (from == to) |
16570 | return; |
16571 | |
16572 | assert(is_continue(to)); |
16573 | if (to_block.complex_continue) |
16574 | { |
16575 | // Just emit the whole block chain as is. |
16576 | auto usage_counts = expression_usage_counts; |
16577 | |
16578 | emit_block_chain(block&: to_block); |
16579 | |
16580 | // Expression usage counts are moot after returning from the continue block. |
16581 | expression_usage_counts = usage_counts; |
16582 | } |
16583 | else |
16584 | { |
16585 | auto &from_block = get<SPIRBlock>(id: from); |
16586 | bool outside_control_flow = false; |
16587 | uint32_t loop_dominator = 0; |
16588 | |
16589 | // FIXME: Refactor this to not use the old loop_dominator tracking. |
16590 | if (from_block.merge_block) |
16591 | { |
16592 | // If we are a loop header, we don't set the loop dominator, |
16593 | // so just use "self" here. |
16594 | loop_dominator = from; |
16595 | } |
16596 | else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
16597 | { |
16598 | loop_dominator = from_block.loop_dominator; |
16599 | } |
16600 | |
16601 | if (loop_dominator != 0) |
16602 | { |
16603 | auto &cfg = get_cfg_for_current_function(); |
16604 | |
16605 | // For non-complex continue blocks, we implicitly branch to the continue block |
16606 | // by having the continue block be part of the loop header in for (; ; continue-block). |
16607 | outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: loop_dominator, to: from); |
16608 | } |
16609 | |
16610 | // Some simplification for for-loops. We always end up with a useless continue; |
16611 | // statement since we branch to a loop block. |
16612 | // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, |
16613 | // we can avoid writing out an explicit continue statement. |
16614 | // Similar optimization to return statements if we know we're outside flow control. |
16615 | if (!outside_control_flow) |
16616 | statement(ts: "continue;"); |
16617 | } |
16618 | } |
16619 | |
16620 | void CompilerGLSL::branch(BlockID from, BlockID to) |
16621 | { |
16622 | flush_phi(from, to); |
16623 | flush_control_dependent_expressions(block: from); |
16624 | |
16625 | bool to_is_continue = is_continue(next: to); |
16626 | |
16627 | // This is only a continue if we branch to our loop dominator. |
16628 | if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(id: from).loop_dominator == to) |
16629 | { |
16630 | // This can happen if we had a complex continue block which was emitted. |
16631 | // Once the continue block tries to branch to the loop header, just emit continue; |
16632 | // and end the chain here. |
16633 | statement(ts: "continue;"); |
16634 | } |
16635 | else if (from != to && is_break(next: to)) |
16636 | { |
16637 | // We cannot break to ourselves, so check explicitly for from != to. |
16638 | // This case can trigger if a loop header is all three of these things: |
16639 | // - Continue block |
16640 | // - Loop header |
16641 | // - Break merge target all at once ... |
16642 | |
16643 | // Very dirty workaround. |
16644 | // Switch constructs are able to break, but they cannot break out of a loop at the same time, |
16645 | // yet SPIR-V allows it. |
16646 | // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, |
16647 | // write to the ladder here, and defer the break. |
16648 | // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. |
16649 | if (is_loop_break(next: to)) |
16650 | { |
16651 | for (size_t n = current_emitting_switch_stack.size(); n; n--) |
16652 | { |
16653 | auto *current_emitting_switch = current_emitting_switch_stack[n - 1]; |
16654 | |
16655 | if (current_emitting_switch && |
16656 | current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && |
16657 | get<SPIRBlock>(id: current_emitting_switch->loop_dominator).merge_block == to) |
16658 | { |
16659 | if (!current_emitting_switch->need_ladder_break) |
16660 | { |
16661 | force_recompile(); |
16662 | current_emitting_switch->need_ladder_break = true; |
16663 | } |
16664 | |
16665 | statement(ts: "_", ts&: current_emitting_switch->self, ts: "_ladder_break = true;"); |
16666 | } |
16667 | else |
16668 | break; |
16669 | } |
16670 | } |
16671 | statement(ts: "break;"); |
16672 | } |
16673 | else if (to_is_continue || from == to) |
16674 | { |
16675 | // For from == to case can happen for a do-while loop which branches into itself. |
16676 | // We don't mark these cases as continue blocks, but the only possible way to branch into |
16677 | // ourselves is through means of continue blocks. |
16678 | |
16679 | // If we are merging to a continue block, there is no need to emit the block chain for continue here. |
16680 | // We can branch to the continue block after we merge execution. |
16681 | |
16682 | // Here we make use of structured control flow rules from spec: |
16683 | // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block |
16684 | // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG |
16685 | // If we are branching to a merge block, we must be inside a construct which dominates the merge block. |
16686 | auto &block_meta = ir.block_meta[to]; |
16687 | bool branching_to_merge = |
16688 | (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | |
16689 | ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; |
16690 | if (!to_is_continue || !branching_to_merge) |
16691 | branch_to_continue(from, to); |
16692 | } |
16693 | else if (!is_conditional(next: to)) |
16694 | emit_block_chain(block&: get<SPIRBlock>(id: to)); |
16695 | |
16696 | // It is important that we check for break before continue. |
16697 | // A block might serve two purposes, a break block for the inner scope, and |
16698 | // a continue block in the outer scope. |
16699 | // Inner scope always takes precedence. |
16700 | } |
16701 | |
16702 | void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) |
16703 | { |
16704 | auto &from_block = get<SPIRBlock>(id: from); |
16705 | BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); |
16706 | |
16707 | // If we branch directly to our selection merge target, we don't need a code path. |
16708 | bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, to: true_block); |
16709 | bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, to: false_block); |
16710 | |
16711 | if (!true_block_needs_code && !false_block_needs_code) |
16712 | return; |
16713 | |
16714 | // We might have a loop merge here. Only consider selection flattening constructs. |
16715 | // Loop hints are handled explicitly elsewhere. |
16716 | if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten) |
16717 | emit_block_hints(block: from_block); |
16718 | |
16719 | if (true_block_needs_code) |
16720 | { |
16721 | statement(ts: "if (", ts: to_expression(id: cond), ts: ")"); |
16722 | begin_scope(); |
16723 | branch(from, to: true_block); |
16724 | end_scope(); |
16725 | |
16726 | if (false_block_needs_code) |
16727 | { |
16728 | statement(ts: "else"); |
16729 | begin_scope(); |
16730 | branch(from, to: false_block); |
16731 | end_scope(); |
16732 | } |
16733 | } |
16734 | else if (false_block_needs_code) |
16735 | { |
16736 | // Only need false path, use negative conditional. |
16737 | statement(ts: "if (!", ts: to_enclosed_expression(id: cond), ts: ")"); |
16738 | begin_scope(); |
16739 | branch(from, to: false_block); |
16740 | end_scope(); |
16741 | } |
16742 | } |
16743 | |
16744 | // FIXME: This currently cannot handle complex continue blocks |
16745 | // as in do-while. |
16746 | // This should be seen as a "trivial" continue block. |
16747 | string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) |
16748 | { |
16749 | auto *block = &get<SPIRBlock>(id: continue_block); |
16750 | |
16751 | // While emitting the continue block, declare_temporary will check this |
16752 | // if we have to emit temporaries. |
16753 | current_continue_block = block; |
16754 | |
16755 | SmallVector<string> statements; |
16756 | |
16757 | // Capture all statements into our list. |
16758 | auto *old = redirect_statement; |
16759 | redirect_statement = &statements; |
16760 | |
16761 | // Stamp out all blocks one after each other. |
16762 | while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) |
16763 | { |
16764 | // Write out all instructions we have in this block. |
16765 | emit_block_instructions(block&: *block); |
16766 | |
16767 | // For plain branchless for/while continue blocks. |
16768 | if (block->next_block) |
16769 | { |
16770 | flush_phi(from: continue_block, to: block->next_block); |
16771 | block = &get<SPIRBlock>(id: block->next_block); |
16772 | } |
16773 | // For do while blocks. The last block will be a select block. |
16774 | else if (block->true_block && follow_true_block) |
16775 | { |
16776 | flush_phi(from: continue_block, to: block->true_block); |
16777 | block = &get<SPIRBlock>(id: block->true_block); |
16778 | } |
16779 | else if (block->false_block && follow_false_block) |
16780 | { |
16781 | flush_phi(from: continue_block, to: block->false_block); |
16782 | block = &get<SPIRBlock>(id: block->false_block); |
16783 | } |
16784 | else |
16785 | { |
16786 | SPIRV_CROSS_THROW("Invalid continue block detected!"); |
16787 | } |
16788 | } |
16789 | |
16790 | // Restore old pointer. |
16791 | redirect_statement = old; |
16792 | |
16793 | // Somewhat ugly, strip off the last ';' since we use ',' instead. |
16794 | // Ideally, we should select this behavior in statement(). |
16795 | for (auto &s : statements) |
16796 | { |
16797 | if (!s.empty() && s.back() == ';') |
16798 | s.erase(pos: s.size() - 1, n: 1); |
16799 | } |
16800 | |
16801 | current_continue_block = nullptr; |
16802 | return merge(list: statements); |
16803 | } |
16804 | |
16805 | void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) |
16806 | { |
16807 | // While loops do not take initializers, so declare all of them outside. |
16808 | for (auto &loop_var : block.loop_variables) |
16809 | { |
16810 | auto &var = get<SPIRVariable>(id: loop_var); |
16811 | statement(ts: variable_decl(variable: var), ts: ";"); |
16812 | } |
16813 | } |
16814 | |
16815 | string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) |
16816 | { |
16817 | if (block.loop_variables.empty()) |
16818 | return ""; |
16819 | |
16820 | bool same_types = for_loop_initializers_are_same_type(block); |
16821 | // We can only declare for loop initializers if all variables are of same type. |
16822 | // If we cannot do this, declare individual variables before the loop header. |
16823 | |
16824 | // We might have a loop variable candidate which was not assigned to for some reason. |
16825 | uint32_t missing_initializers = 0; |
16826 | for (auto &variable : block.loop_variables) |
16827 | { |
16828 | uint32_t expr = get<SPIRVariable>(id: variable).static_expression; |
16829 | |
16830 | // Sometimes loop variables are initialized with OpUndef, but we can just declare |
16831 | // a plain variable without initializer in this case. |
16832 | if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) |
16833 | missing_initializers++; |
16834 | } |
16835 | |
16836 | if (block.loop_variables.size() == 1 && missing_initializers == 0) |
16837 | { |
16838 | return variable_decl(variable: get<SPIRVariable>(id: block.loop_variables.front())); |
16839 | } |
16840 | else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) |
16841 | { |
16842 | for (auto &loop_var : block.loop_variables) |
16843 | statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";"); |
16844 | return ""; |
16845 | } |
16846 | else |
16847 | { |
16848 | // We have a mix of loop variables, either ones with a clear initializer, or ones without. |
16849 | // Separate the two streams. |
16850 | string expr; |
16851 | |
16852 | for (auto &loop_var : block.loop_variables) |
16853 | { |
16854 | uint32_t static_expr = get<SPIRVariable>(id: loop_var).static_expression; |
16855 | if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) |
16856 | { |
16857 | statement(ts: variable_decl(variable: get<SPIRVariable>(id: loop_var)), ts: ";"); |
16858 | } |
16859 | else |
16860 | { |
16861 | auto &var = get<SPIRVariable>(id: loop_var); |
16862 | auto &type = get_variable_data_type(var); |
16863 | if (expr.empty()) |
16864 | { |
16865 | // For loop initializers are of the form <type id = value, id = value, id = value, etc ... |
16866 | expr = join(ts: to_qualifiers_glsl(id: var.self), ts: type_to_glsl(type), ts: " "); |
16867 | } |
16868 | else |
16869 | { |
16870 | expr += ", "; |
16871 | // In MSL, being based on C++, the asterisk marking a pointer |
16872 | // binds to the identifier, not the type. |
16873 | if (type.pointer) |
16874 | expr += "* "; |
16875 | } |
16876 | |
16877 | expr += join(ts: to_name(id: loop_var), ts: " = ", ts: to_pointer_expression(id: var.static_expression)); |
16878 | } |
16879 | } |
16880 | return expr; |
16881 | } |
16882 | } |
16883 | |
16884 | bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) |
16885 | { |
16886 | if (block.loop_variables.size() <= 1) |
16887 | return true; |
16888 | |
16889 | uint32_t expected = 0; |
16890 | Bitset expected_flags; |
16891 | for (auto &var : block.loop_variables) |
16892 | { |
16893 | // Don't care about uninitialized variables as they will not be part of the initializers. |
16894 | uint32_t expr = get<SPIRVariable>(id: var).static_expression; |
16895 | if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) |
16896 | continue; |
16897 | |
16898 | if (expected == 0) |
16899 | { |
16900 | expected = get<SPIRVariable>(id: var).basetype; |
16901 | expected_flags = get_decoration_bitset(id: var); |
16902 | } |
16903 | else if (expected != get<SPIRVariable>(id: var).basetype) |
16904 | return false; |
16905 | |
16906 | // Precision flags and things like that must also match. |
16907 | if (expected_flags != get_decoration_bitset(id: var)) |
16908 | return false; |
16909 | } |
16910 | |
16911 | return true; |
16912 | } |
16913 | |
16914 | void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block) |
16915 | { |
16916 | // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise, |
16917 | // which breaks loop optimizations. |
16918 | // Any line directive would be declared outside the loop body, which would just be confusing either way. |
16919 | bool old_block_debug_directives = block_debug_directives; |
16920 | block_debug_directives = true; |
16921 | emit_block_instructions(block); |
16922 | block_debug_directives = old_block_debug_directives; |
16923 | } |
16924 | |
16925 | bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) |
16926 | { |
16927 | SPIRBlock::ContinueBlockType continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block)); |
16928 | |
16929 | if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) |
16930 | { |
16931 | uint32_t current_count = statement_count; |
16932 | // If we're trying to create a true for loop, |
16933 | // we need to make sure that all opcodes before branch statement do not actually emit any code. |
16934 | // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. |
16935 | emit_block_instructions_with_masked_debug(block); |
16936 | |
16937 | bool condition_is_temporary = forced_temporaries.find(x: block.condition) == end(cont&: forced_temporaries); |
16938 | |
16939 | bool flushes_phi = flush_phi_required(from: block.self, to: block.true_block) || |
16940 | flush_phi_required(from: block.self, to: block.false_block); |
16941 | |
16942 | // This can work! We only did trivial things which could be forwarded in block body! |
16943 | if (!flushes_phi && current_count == statement_count && condition_is_temporary) |
16944 | { |
16945 | switch (continue_type) |
16946 | { |
16947 | case SPIRBlock::ForLoop: |
16948 | { |
16949 | // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. |
16950 | flush_undeclared_variables(block); |
16951 | |
16952 | // Important that we do this in this order because |
16953 | // emitting the continue block can invalidate the condition expression. |
16954 | auto initializer = emit_for_loop_initializers(block); |
16955 | auto condition = to_expression(id: block.condition); |
16956 | |
16957 | // Condition might have to be inverted. |
16958 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
16959 | condition = join(ts: "!", ts: enclose_expression(expr: condition)); |
16960 | |
16961 | emit_block_hints(block); |
16962 | if (method != SPIRBlock::MergeToSelectContinueForLoop) |
16963 | { |
16964 | auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false); |
16965 | statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")"); |
16966 | } |
16967 | else |
16968 | statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; )"); |
16969 | break; |
16970 | } |
16971 | |
16972 | case SPIRBlock::WhileLoop: |
16973 | { |
16974 | // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. |
16975 | flush_undeclared_variables(block); |
16976 | emit_while_loop_initializers(block); |
16977 | emit_block_hints(block); |
16978 | |
16979 | auto condition = to_expression(id: block.condition); |
16980 | // Condition might have to be inverted. |
16981 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
16982 | condition = join(ts: "!", ts: enclose_expression(expr: condition)); |
16983 | |
16984 | statement(ts: "while (", ts&: condition, ts: ")"); |
16985 | break; |
16986 | } |
16987 | |
16988 | default: |
16989 | block.disable_block_optimization = true; |
16990 | force_recompile(); |
16991 | begin_scope(); // We'll see an end_scope() later. |
16992 | return false; |
16993 | } |
16994 | |
16995 | begin_scope(); |
16996 | return true; |
16997 | } |
16998 | else |
16999 | { |
17000 | block.disable_block_optimization = true; |
17001 | force_recompile(); |
17002 | begin_scope(); // We'll see an end_scope() later. |
17003 | return false; |
17004 | } |
17005 | } |
17006 | else if (method == SPIRBlock::MergeToDirectForLoop) |
17007 | { |
17008 | auto &child = get<SPIRBlock>(id: block.next_block); |
17009 | |
17010 | // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. |
17011 | flush_undeclared_variables(block&: child); |
17012 | |
17013 | uint32_t current_count = statement_count; |
17014 | |
17015 | // If we're trying to create a true for loop, |
17016 | // we need to make sure that all opcodes before branch statement do not actually emit any code. |
17017 | // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. |
17018 | emit_block_instructions_with_masked_debug(block&: child); |
17019 | |
17020 | bool condition_is_temporary = forced_temporaries.find(x: child.condition) == end(cont&: forced_temporaries); |
17021 | |
17022 | bool flushes_phi = flush_phi_required(from: child.self, to: child.true_block) || |
17023 | flush_phi_required(from: child.self, to: child.false_block); |
17024 | |
17025 | if (!flushes_phi && current_count == statement_count && condition_is_temporary) |
17026 | { |
17027 | uint32_t target_block = child.true_block; |
17028 | |
17029 | switch (continue_type) |
17030 | { |
17031 | case SPIRBlock::ForLoop: |
17032 | { |
17033 | // Important that we do this in this order because |
17034 | // emitting the continue block can invalidate the condition expression. |
17035 | auto initializer = emit_for_loop_initializers(block); |
17036 | auto condition = to_expression(id: child.condition); |
17037 | |
17038 | // Condition might have to be inverted. |
17039 | if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
17040 | { |
17041 | condition = join(ts: "!", ts: enclose_expression(expr: condition)); |
17042 | target_block = child.false_block; |
17043 | } |
17044 | |
17045 | auto continue_block = emit_continue_block(continue_block: block.continue_block, follow_true_block: false, follow_false_block: false); |
17046 | emit_block_hints(block); |
17047 | statement(ts: "for (", ts&: initializer, ts: "; ", ts&: condition, ts: "; ", ts&: continue_block, ts: ")"); |
17048 | break; |
17049 | } |
17050 | |
17051 | case SPIRBlock::WhileLoop: |
17052 | { |
17053 | emit_while_loop_initializers(block); |
17054 | emit_block_hints(block); |
17055 | |
17056 | auto condition = to_expression(id: child.condition); |
17057 | // Condition might have to be inverted. |
17058 | if (execution_is_noop(from: get<SPIRBlock>(id: child.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
17059 | { |
17060 | condition = join(ts: "!", ts: enclose_expression(expr: condition)); |
17061 | target_block = child.false_block; |
17062 | } |
17063 | |
17064 | statement(ts: "while (", ts&: condition, ts: ")"); |
17065 | break; |
17066 | } |
17067 | |
17068 | default: |
17069 | block.disable_block_optimization = true; |
17070 | force_recompile(); |
17071 | begin_scope(); // We'll see an end_scope() later. |
17072 | return false; |
17073 | } |
17074 | |
17075 | begin_scope(); |
17076 | branch(from: child.self, to: target_block); |
17077 | return true; |
17078 | } |
17079 | else |
17080 | { |
17081 | block.disable_block_optimization = true; |
17082 | force_recompile(); |
17083 | begin_scope(); // We'll see an end_scope() later. |
17084 | return false; |
17085 | } |
17086 | } |
17087 | else |
17088 | return false; |
17089 | } |
17090 | |
17091 | void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) |
17092 | { |
17093 | for (auto &v : block.dominated_variables) |
17094 | flush_variable_declaration(id: v); |
17095 | } |
17096 | |
17097 | void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries) |
17098 | { |
17099 | // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. |
17100 | // Need to sort these to ensure that reference output is stable. |
17101 | sort(first: begin(cont&: temporaries), last: end(cont&: temporaries), |
17102 | comp: [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; }); |
17103 | |
17104 | for (auto &tmp : temporaries) |
17105 | { |
17106 | auto &type = get<SPIRType>(id: tmp.first); |
17107 | |
17108 | // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries. |
17109 | // This should be ignored unless we're doing actual variable pointers and backend supports it. |
17110 | // Access chains cannot normally be lowered to temporaries in GLSL and HLSL. |
17111 | if (type.pointer && !backend.native_pointers) |
17112 | continue; |
17113 | |
17114 | add_local_variable_name(id: tmp.second); |
17115 | auto &flags = get_decoration_bitset(id: tmp.second); |
17116 | |
17117 | // Not all targets support pointer literals, so don't bother with that case. |
17118 | string initializer; |
17119 | if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
17120 | initializer = join(ts: " = ", ts: to_zero_initialized_expression(type_id: tmp.first)); |
17121 | |
17122 | statement(ts: flags_to_qualifiers_glsl(type, flags), ts: variable_decl(type, name: to_name(id: tmp.second)), ts&: initializer, ts: ";"); |
17123 | |
17124 | hoisted_temporaries.insert(x: tmp.second); |
17125 | forced_temporaries.insert(x: tmp.second); |
17126 | |
17127 | // The temporary might be read from before it's assigned, set up the expression now. |
17128 | set<SPIRExpression>(id: tmp.second, args: to_name(id: tmp.second), args&: tmp.first, args: true); |
17129 | |
17130 | // If we have hoisted temporaries in multi-precision contexts, emit that here too ... |
17131 | // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. |
17132 | auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: tmp.second); |
17133 | if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) |
17134 | { |
17135 | uint32_t mirror_id = mirrored_precision_itr->second; |
17136 | auto &mirror_flags = get_decoration_bitset(id: mirror_id); |
17137 | statement(ts: flags_to_qualifiers_glsl(type, flags: mirror_flags), |
17138 | ts: variable_decl(type, name: to_name(id: mirror_id)), |
17139 | ts&: initializer, ts: ";"); |
17140 | // The temporary might be read from before it's assigned, set up the expression now. |
17141 | set<SPIRExpression>(id: mirror_id, args: to_name(id: mirror_id), args&: tmp.first, args: true); |
17142 | hoisted_temporaries.insert(x: mirror_id); |
17143 | } |
17144 | } |
17145 | } |
17146 | |
17147 | void CompilerGLSL::emit_block_chain(SPIRBlock &block) |
17148 | { |
17149 | bool select_branch_to_true_block = false; |
17150 | bool select_branch_to_false_block = false; |
17151 | bool skip_direct_branch = false; |
17152 | bool emitted_loop_header_variables = false; |
17153 | bool force_complex_continue_block = false; |
17154 | ValueSaver<uint32_t> loop_level_saver(current_loop_level); |
17155 | |
17156 | if (block.merge == SPIRBlock::MergeLoop) |
17157 | add_loop_level(); |
17158 | |
17159 | // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries. |
17160 | for (auto var_id : block.dominated_variables) |
17161 | { |
17162 | auto &var = get<SPIRVariable>(id: var_id); |
17163 | if (var.phi_variable) |
17164 | { |
17165 | auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(x: var_id); |
17166 | if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() && |
17167 | find_if(first: block.declare_temporary.begin(), last: block.declare_temporary.end(), |
17168 | pred: [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) { |
17169 | return p.second == mirrored_precision_itr->second; |
17170 | }) == block.declare_temporary.end()) |
17171 | { |
17172 | block.declare_temporary.push_back(t: { var.basetype, mirrored_precision_itr->second }); |
17173 | } |
17174 | } |
17175 | } |
17176 | |
17177 | emit_hoisted_temporaries(temporaries&: block.declare_temporary); |
17178 | |
17179 | SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; |
17180 | if (block.continue_block) |
17181 | { |
17182 | continue_type = continue_block_type(continue_block: get<SPIRBlock>(id: block.continue_block)); |
17183 | // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. |
17184 | if (continue_type == SPIRBlock::ComplexLoop) |
17185 | block.complex_continue = true; |
17186 | } |
17187 | |
17188 | // If we have loop variables, stop masking out access to the variable now. |
17189 | for (auto var_id : block.loop_variables) |
17190 | { |
17191 | auto &var = get<SPIRVariable>(id: var_id); |
17192 | var.loop_variable_enable = true; |
17193 | // We're not going to declare the variable directly, so emit a copy here. |
17194 | emit_variable_temporary_copies(var); |
17195 | } |
17196 | |
17197 | // Remember deferred declaration state. We will restore it before returning. |
17198 | SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size()); |
17199 | for (size_t i = 0; i < block.dominated_variables.size(); i++) |
17200 | { |
17201 | uint32_t var_id = block.dominated_variables[i]; |
17202 | auto &var = get<SPIRVariable>(id: var_id); |
17203 | rearm_dominated_variables[i] = var.deferred_declaration; |
17204 | } |
17205 | |
17206 | // This is the method often used by spirv-opt to implement loops. |
17207 | // The loop header goes straight into the continue block. |
17208 | // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, |
17209 | // it *MUST* be used in the continue block. This loop method will not work. |
17210 | if (!is_legacy_es() && block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectContinueForLoop)) |
17211 | { |
17212 | flush_undeclared_variables(block); |
17213 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectContinueForLoop)) |
17214 | { |
17215 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
17216 | select_branch_to_false_block = true; |
17217 | else |
17218 | select_branch_to_true_block = true; |
17219 | |
17220 | emitted_loop_header_variables = true; |
17221 | force_complex_continue_block = true; |
17222 | } |
17223 | } |
17224 | // This is the older loop behavior in glslang which branches to loop body directly from the loop header. |
17225 | else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToSelectForLoop)) |
17226 | { |
17227 | flush_undeclared_variables(block); |
17228 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToSelectForLoop)) |
17229 | { |
17230 | // The body of while, is actually just the true (or false) block, so always branch there unconditionally. |
17231 | if (execution_is_noop(from: get<SPIRBlock>(id: block.true_block), to: get<SPIRBlock>(id: block.merge_block))) |
17232 | select_branch_to_false_block = true; |
17233 | else |
17234 | select_branch_to_true_block = true; |
17235 | |
17236 | emitted_loop_header_variables = true; |
17237 | } |
17238 | } |
17239 | // This is the newer loop behavior in glslang which branches from Loop header directly to |
17240 | // a new block, which in turn has a OpBranchSelection without a selection merge. |
17241 | else if (block_is_loop_candidate(block, method: SPIRBlock::MergeToDirectForLoop)) |
17242 | { |
17243 | flush_undeclared_variables(block); |
17244 | if (attempt_emit_loop_header(block, method: SPIRBlock::MergeToDirectForLoop)) |
17245 | { |
17246 | skip_direct_branch = true; |
17247 | emitted_loop_header_variables = true; |
17248 | } |
17249 | } |
17250 | else if (continue_type == SPIRBlock::DoWhileLoop) |
17251 | { |
17252 | flush_undeclared_variables(block); |
17253 | emit_while_loop_initializers(block); |
17254 | emitted_loop_header_variables = true; |
17255 | // We have some temporaries where the loop header is the dominator. |
17256 | // We risk a case where we have code like: |
17257 | // for (;;) { create-temporary; break; } consume-temporary; |
17258 | // so force-declare temporaries here. |
17259 | emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary); |
17260 | statement(ts: "do"); |
17261 | begin_scope(); |
17262 | |
17263 | emit_block_instructions(block); |
17264 | } |
17265 | else if (block.merge == SPIRBlock::MergeLoop) |
17266 | { |
17267 | flush_undeclared_variables(block); |
17268 | emit_while_loop_initializers(block); |
17269 | emitted_loop_header_variables = true; |
17270 | |
17271 | // We have a generic loop without any distinguishable pattern like for, while or do while. |
17272 | get<SPIRBlock>(id: block.continue_block).complex_continue = true; |
17273 | continue_type = SPIRBlock::ComplexLoop; |
17274 | |
17275 | // We have some temporaries where the loop header is the dominator. |
17276 | // We risk a case where we have code like: |
17277 | // for (;;) { create-temporary; break; } consume-temporary; |
17278 | // so force-declare temporaries here. |
17279 | emit_hoisted_temporaries(temporaries&: block.potential_declare_temporary); |
17280 | emit_block_hints(block); |
17281 | statement(ts: "for (;;)"); |
17282 | begin_scope(); |
17283 | |
17284 | emit_block_instructions(block); |
17285 | } |
17286 | else |
17287 | { |
17288 | emit_block_instructions(block); |
17289 | } |
17290 | |
17291 | // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem |
17292 | // as writes to said loop variables might have been masked out, we need a recompile. |
17293 | if (!emitted_loop_header_variables && !block.loop_variables.empty()) |
17294 | { |
17295 | force_recompile_guarantee_forward_progress(); |
17296 | for (auto var : block.loop_variables) |
17297 | get<SPIRVariable>(id: var).loop_variable = false; |
17298 | block.loop_variables.clear(); |
17299 | } |
17300 | |
17301 | flush_undeclared_variables(block); |
17302 | bool emit_next_block = true; |
17303 | |
17304 | // Handle end of block. |
17305 | switch (block.terminator) |
17306 | { |
17307 | case SPIRBlock::Direct: |
17308 | // True when emitting complex continue block. |
17309 | if (block.loop_dominator == block.next_block) |
17310 | { |
17311 | branch(from: block.self, to: block.next_block); |
17312 | emit_next_block = false; |
17313 | } |
17314 | // True if MergeToDirectForLoop succeeded. |
17315 | else if (skip_direct_branch) |
17316 | emit_next_block = false; |
17317 | else if (is_continue(next: block.next_block) || is_break(next: block.next_block) || is_conditional(next: block.next_block)) |
17318 | { |
17319 | branch(from: block.self, to: block.next_block); |
17320 | emit_next_block = false; |
17321 | } |
17322 | break; |
17323 | |
17324 | case SPIRBlock::Select: |
17325 | // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. |
17326 | if (select_branch_to_true_block) |
17327 | { |
17328 | if (force_complex_continue_block) |
17329 | { |
17330 | assert(block.true_block == block.continue_block); |
17331 | |
17332 | // We're going to emit a continue block directly here, so make sure it's marked as complex. |
17333 | auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue; |
17334 | bool old_complex = complex_continue; |
17335 | complex_continue = true; |
17336 | branch(from: block.self, to: block.true_block); |
17337 | complex_continue = old_complex; |
17338 | } |
17339 | else |
17340 | branch(from: block.self, to: block.true_block); |
17341 | } |
17342 | else if (select_branch_to_false_block) |
17343 | { |
17344 | if (force_complex_continue_block) |
17345 | { |
17346 | assert(block.false_block == block.continue_block); |
17347 | |
17348 | // We're going to emit a continue block directly here, so make sure it's marked as complex. |
17349 | auto &complex_continue = get<SPIRBlock>(id: block.continue_block).complex_continue; |
17350 | bool old_complex = complex_continue; |
17351 | complex_continue = true; |
17352 | branch(from: block.self, to: block.false_block); |
17353 | complex_continue = old_complex; |
17354 | } |
17355 | else |
17356 | branch(from: block.self, to: block.false_block); |
17357 | } |
17358 | else |
17359 | branch(from: block.self, cond: block.condition, true_block: block.true_block, false_block: block.false_block); |
17360 | break; |
17361 | |
17362 | case SPIRBlock::MultiSelect: |
17363 | { |
17364 | auto &type = expression_type(id: block.condition); |
17365 | bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || |
17366 | type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64; |
17367 | |
17368 | if (block.merge == SPIRBlock::MergeNone) |
17369 | SPIRV_CROSS_THROW("Switch statement is not structured"); |
17370 | |
17371 | if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)) |
17372 | { |
17373 | // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. |
17374 | SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); |
17375 | } |
17376 | |
17377 | const char *label_suffix = ""; |
17378 | if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) |
17379 | label_suffix = "u"; |
17380 | else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch) |
17381 | label_suffix = "l"; |
17382 | else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch) |
17383 | label_suffix = "ul"; |
17384 | else if (type.basetype == SPIRType::UShort) |
17385 | label_suffix = backend.uint16_t_literal_suffix; |
17386 | else if (type.basetype == SPIRType::Short) |
17387 | label_suffix = backend.int16_t_literal_suffix; |
17388 | |
17389 | current_emitting_switch_stack.push_back(t: &block); |
17390 | |
17391 | if (block.need_ladder_break) |
17392 | statement(ts: "bool _", ts&: block.self, ts: "_ladder_break = false;"); |
17393 | |
17394 | // Find all unique case constructs. |
17395 | unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs; |
17396 | SmallVector<uint32_t> block_declaration_order; |
17397 | SmallVector<uint64_t> literals_to_merge; |
17398 | |
17399 | // If a switch case branches to the default block for some reason, we can just remove that literal from consideration |
17400 | // and let the default: block handle it. |
17401 | // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. |
17402 | // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. |
17403 | auto &cases = get_case_list(block); |
17404 | for (auto &c : cases) |
17405 | { |
17406 | if (c.block != block.next_block && c.block != block.default_block) |
17407 | { |
17408 | if (!case_constructs.count(x: c.block)) |
17409 | block_declaration_order.push_back(t: c.block); |
17410 | case_constructs[c.block].push_back(t: c.value); |
17411 | } |
17412 | else if (c.block == block.next_block && block.default_block != block.next_block) |
17413 | { |
17414 | // We might have to flush phi inside specific case labels. |
17415 | // If we can piggyback on default:, do so instead. |
17416 | literals_to_merge.push_back(t: c.value); |
17417 | } |
17418 | } |
17419 | |
17420 | // Empty literal array -> default. |
17421 | if (block.default_block != block.next_block) |
17422 | { |
17423 | auto &default_block = get<SPIRBlock>(id: block.default_block); |
17424 | |
17425 | // We need to slide in the default block somewhere in this chain |
17426 | // if there are fall-through scenarios since the default is declared separately in OpSwitch. |
17427 | // Only consider trivial fall-through cases here. |
17428 | size_t num_blocks = block_declaration_order.size(); |
17429 | bool injected_block = false; |
17430 | |
17431 | for (size_t i = 0; i < num_blocks; i++) |
17432 | { |
17433 | auto &case_block = get<SPIRBlock>(id: block_declaration_order[i]); |
17434 | if (execution_is_direct_branch(from: case_block, to: default_block)) |
17435 | { |
17436 | // Fallthrough to default block, we must inject the default block here. |
17437 | block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i + 1, value: block.default_block); |
17438 | injected_block = true; |
17439 | break; |
17440 | } |
17441 | else if (execution_is_direct_branch(from: default_block, to: case_block)) |
17442 | { |
17443 | // Default case is falling through to another case label, we must inject the default block here. |
17444 | block_declaration_order.insert(itr: begin(cont&: block_declaration_order) + i, value: block.default_block); |
17445 | injected_block = true; |
17446 | break; |
17447 | } |
17448 | } |
17449 | |
17450 | // Order does not matter. |
17451 | if (!injected_block) |
17452 | block_declaration_order.push_back(t: block.default_block); |
17453 | else if (is_legacy_es()) |
17454 | SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0."); |
17455 | |
17456 | case_constructs[block.default_block] = {}; |
17457 | } |
17458 | |
17459 | size_t num_blocks = block_declaration_order.size(); |
17460 | |
17461 | const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string |
17462 | { |
17463 | if (is_unsigned_case) |
17464 | return convert_to_string(t: literal); |
17465 | |
17466 | // For smaller cases, the literals are compiled as 32 bit wide |
17467 | // literals so we don't need to care for all sizes specifically. |
17468 | if (width <= 32) |
17469 | { |
17470 | return convert_to_string(t: int64_t(int32_t(literal))); |
17471 | } |
17472 | |
17473 | return convert_to_string(t: int64_t(literal)); |
17474 | }; |
17475 | |
17476 | const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels, |
17477 | const char *suffix) -> string { |
17478 | string ret; |
17479 | size_t count = labels.size(); |
17480 | for (size_t i = 0; i < count; i++) |
17481 | { |
17482 | if (i) |
17483 | ret += " || "; |
17484 | ret += join(ts: count > 1 ? "(": "", ts: to_enclosed_expression(id: condition), ts: " == ", ts: labels[i], ts&: suffix, |
17485 | ts: count > 1 ? ")": ""); |
17486 | } |
17487 | return ret; |
17488 | }; |
17489 | |
17490 | // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, |
17491 | // we need to flush phi nodes outside the switch block in a branch, |
17492 | // and skip any Phi handling inside the case label to make fall-through work as expected. |
17493 | // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this |
17494 | // inside the case label if at all possible. |
17495 | for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) |
17496 | { |
17497 | if (flush_phi_required(from: block.self, to: block_declaration_order[i]) && |
17498 | flush_phi_required(from: block_declaration_order[i - 1], to: block_declaration_order[i])) |
17499 | { |
17500 | uint32_t target_block = block_declaration_order[i]; |
17501 | |
17502 | // Make sure we flush Phi, it might have been marked to be ignored earlier. |
17503 | get<SPIRBlock>(id: target_block).ignore_phi_from_block = 0; |
17504 | |
17505 | auto &literals = case_constructs[target_block]; |
17506 | |
17507 | if (literals.empty()) |
17508 | { |
17509 | // Oh boy, gotta make a complete negative test instead! o.o |
17510 | // Find all possible literals that would *not* make us enter the default block. |
17511 | // If none of those literals match, we flush Phi ... |
17512 | SmallVector<string> conditions; |
17513 | for (size_t j = 0; j < num_blocks; j++) |
17514 | { |
17515 | auto &negative_literals = case_constructs[block_declaration_order[j]]; |
17516 | for (auto &case_label : negative_literals) |
17517 | conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition), |
17518 | ts: " != ", ts: to_case_label(case_label, type.width, unsigned_case))); |
17519 | } |
17520 | |
17521 | statement(ts: "if (", ts: merge(list: conditions, between: " && "), ts: ")"); |
17522 | begin_scope(); |
17523 | flush_phi(from: block.self, to: target_block); |
17524 | end_scope(); |
17525 | } |
17526 | else |
17527 | { |
17528 | SmallVector<string> conditions; |
17529 | conditions.reserve(count: literals.size()); |
17530 | for (auto &case_label : literals) |
17531 | conditions.push_back(t: join(ts: to_enclosed_expression(id: block.condition), |
17532 | ts: " == ", ts: to_case_label(case_label, type.width, unsigned_case))); |
17533 | statement(ts: "if (", ts: merge(list: conditions, between: " || "), ts: ")"); |
17534 | begin_scope(); |
17535 | flush_phi(from: block.self, to: target_block); |
17536 | end_scope(); |
17537 | } |
17538 | |
17539 | // Mark the block so that we don't flush Phi from header to case label. |
17540 | get<SPIRBlock>(id: target_block).ignore_phi_from_block = block.self; |
17541 | } |
17542 | } |
17543 | |
17544 | // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate |
17545 | // non-structured exits with the help of a switch block. |
17546 | // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. |
17547 | bool block_like_switch = cases.empty(); |
17548 | |
17549 | // If this is true, the switch is completely meaningless, and we should just avoid it. |
17550 | bool collapsed_switch = block_like_switch && block.default_block == block.next_block; |
17551 | |
17552 | if (!collapsed_switch) |
17553 | { |
17554 | if (block_like_switch || is_legacy_es()) |
17555 | { |
17556 | // ESSL 1.0 is not guaranteed to support do/while. |
17557 | if (is_legacy_es()) |
17558 | { |
17559 | uint32_t counter = statement_count; |
17560 | statement(ts: "for (int spvDummy", ts&: counter, ts: " = 0; spvDummy", ts&: counter, ts: " < 1; spvDummy", ts&: counter, |
17561 | ts: "++)"); |
17562 | } |
17563 | else |
17564 | statement(ts: "do"); |
17565 | } |
17566 | else |
17567 | { |
17568 | emit_block_hints(block); |
17569 | statement(ts: "switch (", ts: to_unpacked_expression(id: block.condition), ts: ")"); |
17570 | } |
17571 | begin_scope(); |
17572 | } |
17573 | |
17574 | for (size_t i = 0; i < num_blocks; i++) |
17575 | { |
17576 | uint32_t target_block = block_declaration_order[i]; |
17577 | auto &literals = case_constructs[target_block]; |
17578 | |
17579 | if (literals.empty()) |
17580 | { |
17581 | // Default case. |
17582 | if (!block_like_switch) |
17583 | { |
17584 | if (is_legacy_es()) |
17585 | statement(ts: "else"); |
17586 | else |
17587 | statement(ts: "default:"); |
17588 | } |
17589 | } |
17590 | else |
17591 | { |
17592 | if (is_legacy_es()) |
17593 | { |
17594 | statement(ts: (i ? "else ": ""), ts: "if (", ts: to_legacy_case_label(block.condition, literals, label_suffix), |
17595 | ts: ")"); |
17596 | } |
17597 | else |
17598 | { |
17599 | for (auto &case_literal : literals) |
17600 | { |
17601 | // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. |
17602 | statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":"); |
17603 | } |
17604 | } |
17605 | } |
17606 | |
17607 | auto &case_block = get<SPIRBlock>(id: target_block); |
17608 | if (backend.support_case_fallthrough && i + 1 < num_blocks && |
17609 | execution_is_direct_branch(from: case_block, to: get<SPIRBlock>(id: block_declaration_order[i + 1]))) |
17610 | { |
17611 | // We will fall through here, so just terminate the block chain early. |
17612 | // We still need to deal with Phi potentially. |
17613 | // No need for a stack-like thing here since we only do fall-through when there is a |
17614 | // single trivial branch to fall-through target.. |
17615 | current_emitting_switch_fallthrough = true; |
17616 | } |
17617 | else |
17618 | current_emitting_switch_fallthrough = false; |
17619 | |
17620 | if (!block_like_switch) |
17621 | begin_scope(); |
17622 | branch(from: block.self, to: target_block); |
17623 | if (!block_like_switch) |
17624 | end_scope(); |
17625 | |
17626 | current_emitting_switch_fallthrough = false; |
17627 | } |
17628 | |
17629 | // Might still have to flush phi variables if we branch from loop header directly to merge target. |
17630 | // This is supposed to emit all cases where we branch from header to merge block directly. |
17631 | // There are two main scenarios where cannot rely on default fallthrough. |
17632 | // - There is an explicit default: label already. |
17633 | // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block. |
17634 | // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. |
17635 | bool header_merge_requires_phi = flush_phi_required(from: block.self, to: block.next_block); |
17636 | bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); |
17637 | if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())) |
17638 | { |
17639 | for (auto &case_literal : literals_to_merge) |
17640 | statement(ts: "case ", ts: to_case_label(case_literal, type.width, unsigned_case), ts&: label_suffix, ts: ":"); |
17641 | |
17642 | if (block.default_block == block.next_block) |
17643 | { |
17644 | if (is_legacy_es()) |
17645 | statement(ts: "else"); |
17646 | else |
17647 | statement(ts: "default:"); |
17648 | } |
17649 | |
17650 | begin_scope(); |
17651 | flush_phi(from: block.self, to: block.next_block); |
17652 | statement(ts: "break;"); |
17653 | end_scope(); |
17654 | } |
17655 | |
17656 | if (!collapsed_switch) |
17657 | { |
17658 | if (block_like_switch && !is_legacy_es()) |
17659 | end_scope_decl(decl: "while(false)"); |
17660 | else |
17661 | end_scope(); |
17662 | } |
17663 | else |
17664 | flush_phi(from: block.self, to: block.next_block); |
17665 | |
17666 | if (block.need_ladder_break) |
17667 | { |
17668 | statement(ts: "if (_", ts&: block.self, ts: "_ladder_break)"); |
17669 | begin_scope(); |
17670 | statement(ts: "break;"); |
17671 | end_scope(); |
17672 | } |
17673 | |
17674 | current_emitting_switch_stack.pop_back(); |
17675 | break; |
17676 | } |
17677 | |
17678 | case SPIRBlock::Return: |
17679 | { |
17680 | for (auto &line : current_function->fixup_hooks_out) |
17681 | line(); |
17682 | |
17683 | if (processing_entry_point) |
17684 | emit_fixup(); |
17685 | |
17686 | auto &cfg = get_cfg_for_current_function(); |
17687 | |
17688 | if (block.return_value) |
17689 | { |
17690 | auto &type = expression_type(id: block.return_value); |
17691 | if (!type.array.empty() && !backend.can_return_array) |
17692 | { |
17693 | // If we cannot return arrays, we will have a special out argument we can write to instead. |
17694 | // The backend is responsible for setting this up, and redirection the return values as appropriate. |
17695 | if (ir.ids[block.return_value].get_type() != TypeUndef) |
17696 | { |
17697 | emit_array_copy(expr: "spvReturnValue", lhs_id: 0, rhs_id: block.return_value, lhs_storage: StorageClassFunction, |
17698 | rhs_storage: get_expression_effective_storage_class(ptr: block.return_value)); |
17699 | } |
17700 | |
17701 | if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) || |
17702 | block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
17703 | { |
17704 | statement(ts: "return;"); |
17705 | } |
17706 | } |
17707 | else |
17708 | { |
17709 | // OpReturnValue can return Undef, so don't emit anything for this case. |
17710 | if (ir.ids[block.return_value].get_type() != TypeUndef) |
17711 | statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";"); |
17712 | } |
17713 | } |
17714 | else if (!cfg.node_terminates_control_flow_in_sub_graph(from: current_function->entry_block, to: block.self) || |
17715 | block.loop_dominator != BlockID(SPIRBlock::NoDominator)) |
17716 | { |
17717 | // If this block is the very final block and not called from control flow, |
17718 | // we do not need an explicit return which looks out of place. Just end the function here. |
17719 | // In the very weird case of for(;;) { return; } executing return is unconditional, |
17720 | // but we actually need a return here ... |
17721 | statement(ts: "return;"); |
17722 | } |
17723 | break; |
17724 | } |
17725 | |
17726 | // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement. |
17727 | case SPIRBlock::Kill: |
17728 | statement(ts&: backend.discard_literal, ts: ";"); |
17729 | if (block.return_value) |
17730 | statement(ts: "return ", ts: to_unpacked_expression(id: block.return_value), ts: ";"); |
17731 | break; |
17732 | |
17733 | case SPIRBlock::Unreachable: |
17734 | { |
17735 | // Avoid emitting false fallthrough, which can happen for |
17736 | // if (cond) break; else discard; inside a case label. |
17737 | // Discard is not always implementable as a terminator. |
17738 | |
17739 | auto &cfg = get_cfg_for_current_function(); |
17740 | bool inner_dominator_is_switch = false; |
17741 | ID id = block.self; |
17742 | |
17743 | while (id) |
17744 | { |
17745 | auto &iter_block = get<SPIRBlock>(id); |
17746 | if (iter_block.terminator == SPIRBlock::MultiSelect || |
17747 | iter_block.merge == SPIRBlock::MergeLoop) |
17748 | { |
17749 | ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? |
17750 | iter_block.merge_block : iter_block.next_block; |
17751 | bool outside_construct = next_block && cfg.find_common_dominator(a: next_block, b: block.self) == next_block; |
17752 | if (!outside_construct) |
17753 | { |
17754 | inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect; |
17755 | break; |
17756 | } |
17757 | } |
17758 | |
17759 | if (cfg.get_preceding_edges(block: id).empty()) |
17760 | break; |
17761 | |
17762 | id = cfg.get_immediate_dominator(block: id); |
17763 | } |
17764 | |
17765 | if (inner_dominator_is_switch) |
17766 | statement(ts: "break; // unreachable workaround"); |
17767 | |
17768 | emit_next_block = false; |
17769 | break; |
17770 | } |
17771 | |
17772 | case SPIRBlock::IgnoreIntersection: |
17773 | statement(ts: "ignoreIntersectionEXT;"); |
17774 | break; |
17775 | |
17776 | case SPIRBlock::TerminateRay: |
17777 | statement(ts: "terminateRayEXT;"); |
17778 | break; |
17779 | |
17780 | case SPIRBlock::EmitMeshTasks: |
17781 | emit_mesh_tasks(block); |
17782 | break; |
17783 | |
17784 | default: |
17785 | SPIRV_CROSS_THROW("Unimplemented block terminator."); |
17786 | } |
17787 | |
17788 | if (block.next_block && emit_next_block) |
17789 | { |
17790 | // If we hit this case, we're dealing with an unconditional branch, which means we will output |
17791 | // that block after this. If we had selection merge, we already flushed phi variables. |
17792 | if (block.merge != SPIRBlock::MergeSelection) |
17793 | { |
17794 | flush_phi(from: block.self, to: block.next_block); |
17795 | // For a direct branch, need to remember to invalidate expressions in the next linear block instead. |
17796 | get<SPIRBlock>(id: block.next_block).invalidate_expressions = block.invalidate_expressions; |
17797 | } |
17798 | |
17799 | // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. |
17800 | if (!current_emitting_switch_fallthrough) |
17801 | { |
17802 | // For merge selects we might have ignored the fact that a merge target |
17803 | // could have been a break; or continue; |
17804 | // We will need to deal with it here. |
17805 | if (is_loop_break(next: block.next_block)) |
17806 | { |
17807 | // Cannot check for just break, because switch statements will also use break. |
17808 | assert(block.merge == SPIRBlock::MergeSelection); |
17809 | statement(ts: "break;"); |
17810 | } |
17811 | else if (is_continue(next: block.next_block)) |
17812 | { |
17813 | assert(block.merge == SPIRBlock::MergeSelection); |
17814 | branch_to_continue(from: block.self, to: block.next_block); |
17815 | } |
17816 | else if (BlockID(block.self) != block.next_block) |
17817 | emit_block_chain(block&: get<SPIRBlock>(id: block.next_block)); |
17818 | } |
17819 | } |
17820 | |
17821 | if (block.merge == SPIRBlock::MergeLoop) |
17822 | { |
17823 | if (continue_type == SPIRBlock::DoWhileLoop) |
17824 | { |
17825 | // Make sure that we run the continue block to get the expressions set, but this |
17826 | // should become an empty string. |
17827 | // We have no fallbacks if we cannot forward everything to temporaries ... |
17828 | const auto &continue_block = get<SPIRBlock>(id: block.continue_block); |
17829 | bool positive_test = execution_is_noop(from: get<SPIRBlock>(id: continue_block.true_block), |
17830 | to: get<SPIRBlock>(id: continue_block.loop_dominator)); |
17831 | |
17832 | uint32_t current_count = statement_count; |
17833 | auto statements = emit_continue_block(continue_block: block.continue_block, follow_true_block: positive_test, follow_false_block: !positive_test); |
17834 | if (statement_count != current_count) |
17835 | { |
17836 | // The DoWhile block has side effects, force ComplexLoop pattern next pass. |
17837 | get<SPIRBlock>(id: block.continue_block).complex_continue = true; |
17838 | force_recompile(); |
17839 | } |
17840 | |
17841 | // Might have to invert the do-while test here. |
17842 | auto condition = to_expression(id: continue_block.condition); |
17843 | if (!positive_test) |
17844 | condition = join(ts: "!", ts: enclose_expression(expr: condition)); |
17845 | |
17846 | end_scope_decl(decl: join(ts: "while (", ts&: condition, ts: ")")); |
17847 | } |
17848 | else |
17849 | end_scope(); |
17850 | |
17851 | loop_level_saver.release(); |
17852 | |
17853 | // We cannot break out of two loops at once, so don't check for break; here. |
17854 | // Using block.self as the "from" block isn't quite right, but it has the same scope |
17855 | // and dominance structure, so it's fine. |
17856 | if (is_continue(next: block.merge_block)) |
17857 | branch_to_continue(from: block.self, to: block.merge_block); |
17858 | else |
17859 | emit_block_chain(block&: get<SPIRBlock>(id: block.merge_block)); |
17860 | } |
17861 | |
17862 | // Forget about control dependent expressions now. |
17863 | block.invalidate_expressions.clear(); |
17864 | |
17865 | // After we return, we must be out of scope, so if we somehow have to re-emit this function, |
17866 | // re-declare variables if necessary. |
17867 | assert(rearm_dominated_variables.size() == block.dominated_variables.size()); |
17868 | for (size_t i = 0; i < block.dominated_variables.size(); i++) |
17869 | { |
17870 | uint32_t var = block.dominated_variables[i]; |
17871 | get<SPIRVariable>(id: var).deferred_declaration = rearm_dominated_variables[i]; |
17872 | } |
17873 | |
17874 | // Just like for deferred declaration, we need to forget about loop variable enable |
17875 | // if our block chain is reinstantiated later. |
17876 | for (auto &var_id : block.loop_variables) |
17877 | get<SPIRVariable>(id: var_id).loop_variable_enable = false; |
17878 | } |
17879 | |
17880 | void CompilerGLSL::begin_scope() |
17881 | { |
17882 | statement(ts: "{"); |
17883 | indent++; |
17884 | } |
17885 | |
17886 | void CompilerGLSL::end_scope() |
17887 | { |
17888 | if (!indent) |
17889 | SPIRV_CROSS_THROW("Popping empty indent stack."); |
17890 | indent--; |
17891 | statement(ts: "}"); |
17892 | } |
17893 | |
17894 | void CompilerGLSL::end_scope(const string &trailer) |
17895 | { |
17896 | if (!indent) |
17897 | SPIRV_CROSS_THROW("Popping empty indent stack."); |
17898 | indent--; |
17899 | statement(ts: "}", ts: trailer); |
17900 | } |
17901 | |
17902 | void CompilerGLSL::end_scope_decl() |
17903 | { |
17904 | if (!indent) |
17905 | SPIRV_CROSS_THROW("Popping empty indent stack."); |
17906 | indent--; |
17907 | statement(ts: "};"); |
17908 | } |
17909 | |
17910 | void CompilerGLSL::end_scope_decl(const string &decl) |
17911 | { |
17912 | if (!indent) |
17913 | SPIRV_CROSS_THROW("Popping empty indent stack."); |
17914 | indent--; |
17915 | statement(ts: "} ", ts: decl, ts: ";"); |
17916 | } |
17917 | |
17918 | void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) |
17919 | { |
17920 | // If our variable is remapped, and we rely on type-remapping information as |
17921 | // well, then we cannot pass the variable as a function parameter. |
17922 | // Fixing this is non-trivial without stamping out variants of the same function, |
17923 | // so for now warn about this and suggest workarounds instead. |
17924 | for (uint32_t i = 0; i < length; i++) |
17925 | { |
17926 | auto *var = maybe_get<SPIRVariable>(id: args[i]); |
17927 | if (!var || !var->remapped_variable) |
17928 | continue; |
17929 | |
17930 | auto &type = get<SPIRType>(id: var->basetype); |
17931 | if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) |
17932 | { |
17933 | SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " |
17934 | "This will not work correctly because type-remapping information is lost. " |
17935 | "To workaround, please consider not passing the subpass input as a function parameter, " |
17936 | "or use in/out variables instead which do not need type remapping information."); |
17937 | } |
17938 | } |
17939 | } |
17940 | |
17941 | const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) |
17942 | { |
17943 | // FIXME: This is kind of hacky. There should be a cleaner way. |
17944 | auto offset = uint32_t(&instr - current_emitting_block->ops.data()); |
17945 | if ((offset + 1) < current_emitting_block->ops.size()) |
17946 | return ¤t_emitting_block->ops[offset + 1]; |
17947 | else |
17948 | return nullptr; |
17949 | } |
17950 | |
17951 | uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) |
17952 | { |
17953 | return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | |
17954 | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | |
17955 | MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); |
17956 | } |
17957 | |
17958 | bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass) |
17959 | { |
17960 | string lhs; |
17961 | if (expr) |
17962 | lhs = expr; |
17963 | else |
17964 | lhs = to_expression(id: lhs_id); |
17965 | |
17966 | statement(ts&: lhs, ts: " = ", ts: to_expression(id: rhs_id), ts: ";"); |
17967 | return true; |
17968 | } |
17969 | |
17970 | bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id) |
17971 | { |
17972 | if (!backend.force_gl_in_out_block) |
17973 | return false; |
17974 | // This path is only relevant for GL backends. |
17975 | |
17976 | auto *var = maybe_get<SPIRVariable>(id: target_id); |
17977 | if (!var || var->storage != StorageClassOutput) |
17978 | return false; |
17979 | |
17980 | if (!is_builtin_variable(var: *var) || BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)) != BuiltInSampleMask) |
17981 | return false; |
17982 | |
17983 | auto &type = expression_type(id: source_id); |
17984 | string array_expr; |
17985 | if (type.array_size_literal.back()) |
17986 | { |
17987 | array_expr = convert_to_string(t: type.array.back()); |
17988 | if (type.array.back() == 0) |
17989 | SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); |
17990 | } |
17991 | else |
17992 | array_expr = to_expression(id: type.array.back()); |
17993 | |
17994 | SPIRType target_type { OpTypeInt }; |
17995 | target_type.basetype = SPIRType::Int; |
17996 | |
17997 | statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)"); |
17998 | begin_scope(); |
17999 | statement(ts: to_expression(id: target_id), ts: "[i] = ", |
18000 | ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts: to_expression(id: source_id), ts: "[i]")), |
18001 | ts: ";"); |
18002 | end_scope(); |
18003 | |
18004 | return true; |
18005 | } |
18006 | |
18007 | void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) |
18008 | { |
18009 | if (!backend.force_gl_in_out_block) |
18010 | return; |
18011 | // This path is only relevant for GL backends. |
18012 | |
18013 | auto *var = maybe_get<SPIRVariable>(id: source_id); |
18014 | if (!var) |
18015 | return; |
18016 | |
18017 | if (var->storage != StorageClassInput && var->storage != StorageClassOutput) |
18018 | return; |
18019 | |
18020 | auto &type = get_variable_data_type(var: *var); |
18021 | if (type.array.empty()) |
18022 | return; |
18023 | |
18024 | auto builtin = BuiltIn(get_decoration(id: var->self, decoration: DecorationBuiltIn)); |
18025 | bool is_builtin = is_builtin_variable(var: *var) && |
18026 | (builtin == BuiltInPointSize || |
18027 | builtin == BuiltInPosition || |
18028 | builtin == BuiltInSampleMask); |
18029 | bool is_tess = is_tessellation_shader(); |
18030 | bool is_patch = has_decoration(id: var->self, decoration: DecorationPatch); |
18031 | bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; |
18032 | |
18033 | // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. |
18034 | // We must unroll the array load. |
18035 | // For builtins, we couldn't catch this case normally, |
18036 | // because this is resolved in the OpAccessChain in most cases. |
18037 | // If we load the entire array, we have no choice but to unroll here. |
18038 | if (!is_patch && (is_builtin || is_tess)) |
18039 | { |
18040 | auto new_expr = join(ts: "_", ts&: target_id, ts: "_unrolled"); |
18041 | statement(ts: variable_decl(type, name: new_expr, id: target_id), ts: ";"); |
18042 | string array_expr; |
18043 | if (type.array_size_literal.back()) |
18044 | { |
18045 | array_expr = convert_to_string(t: type.array.back()); |
18046 | if (type.array.back() == 0) |
18047 | SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); |
18048 | } |
18049 | else |
18050 | array_expr = to_expression(id: type.array.back()); |
18051 | |
18052 | // The array size might be a specialization constant, so use a for-loop instead. |
18053 | statement(ts: "for (int i = 0; i < int(", ts&: array_expr, ts: "); i++)"); |
18054 | begin_scope(); |
18055 | if (is_builtin && !is_sample_mask) |
18056 | statement(ts&: new_expr, ts: "[i] = gl_in[i].", ts&: expr, ts: ";"); |
18057 | else if (is_sample_mask) |
18058 | { |
18059 | SPIRType target_type { OpTypeInt }; |
18060 | target_type.basetype = SPIRType::Int; |
18061 | statement(ts&: new_expr, ts: "[i] = ", ts: bitcast_expression(target_type, expr_type: type.basetype, expr: join(ts&: expr, ts: "[i]")), ts: ";"); |
18062 | } |
18063 | else |
18064 | statement(ts&: new_expr, ts: "[i] = ", ts&: expr, ts: "[i];"); |
18065 | end_scope(); |
18066 | |
18067 | expr = std::move(new_expr); |
18068 | } |
18069 | } |
18070 | |
18071 | void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) |
18072 | { |
18073 | // We will handle array cases elsewhere. |
18074 | if (!expr_type.array.empty()) |
18075 | return; |
18076 | |
18077 | auto *var = maybe_get_backing_variable(chain: source_id); |
18078 | if (var) |
18079 | source_id = var->self; |
18080 | |
18081 | // Only interested in standalone builtin variables. |
18082 | if (!has_decoration(id: source_id, decoration: DecorationBuiltIn)) |
18083 | { |
18084 | // Except for int attributes in legacy GLSL, which are cast from float. |
18085 | if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput) |
18086 | expr = join(ts: type_to_glsl(type: expr_type), ts: "(", ts&: expr, ts: ")"); |
18087 | return; |
18088 | } |
18089 | |
18090 | auto builtin = static_cast<BuiltIn>(get_decoration(id: source_id, decoration: DecorationBuiltIn)); |
18091 | auto expected_type = expr_type.basetype; |
18092 | |
18093 | // TODO: Fill in for more builtins. |
18094 | switch (builtin) |
18095 | { |
18096 | case BuiltInLayer: |
18097 | case BuiltInPrimitiveId: |
18098 | case BuiltInViewportIndex: |
18099 | case BuiltInInstanceId: |
18100 | case BuiltInInstanceIndex: |
18101 | case BuiltInVertexId: |
18102 | case BuiltInVertexIndex: |
18103 | case BuiltInSampleId: |
18104 | case BuiltInBaseVertex: |
18105 | case BuiltInBaseInstance: |
18106 | case BuiltInDrawIndex: |
18107 | case BuiltInFragStencilRefEXT: |
18108 | case BuiltInInstanceCustomIndexNV: |
18109 | case BuiltInSampleMask: |
18110 | case BuiltInPrimitiveShadingRateKHR: |
18111 | case BuiltInShadingRateKHR: |
18112 | expected_type = SPIRType::Int; |
18113 | break; |
18114 | |
18115 | case BuiltInGlobalInvocationId: |
18116 | case BuiltInLocalInvocationId: |
18117 | case BuiltInWorkgroupId: |
18118 | case BuiltInLocalInvocationIndex: |
18119 | case BuiltInWorkgroupSize: |
18120 | case BuiltInNumWorkgroups: |
18121 | case BuiltInIncomingRayFlagsNV: |
18122 | case BuiltInLaunchIdNV: |
18123 | case BuiltInLaunchSizeNV: |
18124 | case BuiltInPrimitiveTriangleIndicesEXT: |
18125 | case BuiltInPrimitiveLineIndicesEXT: |
18126 | case BuiltInPrimitivePointIndicesEXT: |
18127 | expected_type = SPIRType::UInt; |
18128 | break; |
18129 | |
18130 | default: |
18131 | break; |
18132 | } |
18133 | |
18134 | if (expected_type != expr_type.basetype) |
18135 | expr = bitcast_expression(target_type: expr_type, expr_type: expected_type, expr); |
18136 | } |
18137 | |
18138 | SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type) |
18139 | { |
18140 | // TODO: Fill in for more builtins. |
18141 | switch (builtin) |
18142 | { |
18143 | case BuiltInLayer: |
18144 | case BuiltInPrimitiveId: |
18145 | case BuiltInViewportIndex: |
18146 | case BuiltInFragStencilRefEXT: |
18147 | case BuiltInSampleMask: |
18148 | case BuiltInPrimitiveShadingRateKHR: |
18149 | case BuiltInShadingRateKHR: |
18150 | return SPIRType::Int; |
18151 | |
18152 | default: |
18153 | return default_type; |
18154 | } |
18155 | } |
18156 | |
18157 | void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) |
18158 | { |
18159 | auto *var = maybe_get_backing_variable(chain: target_id); |
18160 | if (var) |
18161 | target_id = var->self; |
18162 | |
18163 | // Only interested in standalone builtin variables. |
18164 | if (!has_decoration(id: target_id, decoration: DecorationBuiltIn)) |
18165 | return; |
18166 | |
18167 | auto builtin = static_cast<BuiltIn>(get_decoration(id: target_id, decoration: DecorationBuiltIn)); |
18168 | auto expected_type = get_builtin_basetype(builtin, default_type: expr_type.basetype); |
18169 | |
18170 | if (expected_type != expr_type.basetype) |
18171 | { |
18172 | auto type = expr_type; |
18173 | type.basetype = expected_type; |
18174 | expr = bitcast_expression(target_type: type, expr_type: expr_type.basetype, expr); |
18175 | } |
18176 | } |
18177 | |
18178 | void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) |
18179 | { |
18180 | if (*backend.nonuniform_qualifier == '\0') |
18181 | return; |
18182 | |
18183 | auto *var = maybe_get_backing_variable(chain: ptr_id); |
18184 | if (!var) |
18185 | return; |
18186 | |
18187 | if (var->storage != StorageClassUniformConstant && |
18188 | var->storage != StorageClassStorageBuffer && |
18189 | var->storage != StorageClassUniform) |
18190 | return; |
18191 | |
18192 | auto &backing_type = get<SPIRType>(id: var->basetype); |
18193 | if (backing_type.array.empty()) |
18194 | return; |
18195 | |
18196 | // If we get here, we know we're accessing an arrayed resource which |
18197 | // might require nonuniform qualifier. |
18198 | |
18199 | auto start_array_index = expr.find_first_of(c: '['); |
18200 | |
18201 | if (start_array_index == string::npos) |
18202 | return; |
18203 | |
18204 | // We've opened a bracket, track expressions until we can close the bracket. |
18205 | // This must be our resource index. |
18206 | size_t end_array_index = string::npos; |
18207 | unsigned bracket_count = 1; |
18208 | for (size_t index = start_array_index + 1; index < expr.size(); index++) |
18209 | { |
18210 | if (expr[index] == ']') |
18211 | { |
18212 | if (--bracket_count == 0) |
18213 | { |
18214 | end_array_index = index; |
18215 | break; |
18216 | } |
18217 | } |
18218 | else if (expr[index] == '[') |
18219 | bracket_count++; |
18220 | } |
18221 | |
18222 | assert(bracket_count == 0); |
18223 | |
18224 | // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's |
18225 | // nothing we can do here to express that. |
18226 | if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) |
18227 | return; |
18228 | |
18229 | start_array_index++; |
18230 | |
18231 | expr = join(ts: expr.substr(pos: 0, n: start_array_index), ts&: backend.nonuniform_qualifier, ts: "(", |
18232 | ts: expr.substr(pos: start_array_index, n: end_array_index - start_array_index), ts: ")", |
18233 | ts: expr.substr(pos: end_array_index, n: string::npos)); |
18234 | } |
18235 | |
18236 | void CompilerGLSL::emit_block_hints(const SPIRBlock &block) |
18237 | { |
18238 | if ((options.es && options.version < 310) || (!options.es && options.version < 140)) |
18239 | return; |
18240 | |
18241 | switch (block.hint) |
18242 | { |
18243 | case SPIRBlock::HintFlatten: |
18244 | require_extension_internal(ext: "GL_EXT_control_flow_attributes"); |
18245 | statement(ts: "SPIRV_CROSS_FLATTEN"); |
18246 | break; |
18247 | case SPIRBlock::HintDontFlatten: |
18248 | require_extension_internal(ext: "GL_EXT_control_flow_attributes"); |
18249 | statement(ts: "SPIRV_CROSS_BRANCH"); |
18250 | break; |
18251 | case SPIRBlock::HintUnroll: |
18252 | require_extension_internal(ext: "GL_EXT_control_flow_attributes"); |
18253 | statement(ts: "SPIRV_CROSS_UNROLL"); |
18254 | break; |
18255 | case SPIRBlock::HintDontUnroll: |
18256 | require_extension_internal(ext: "GL_EXT_control_flow_attributes"); |
18257 | statement(ts: "SPIRV_CROSS_LOOP"); |
18258 | break; |
18259 | default: |
18260 | break; |
18261 | } |
18262 | } |
18263 | |
18264 | void CompilerGLSL::preserve_alias_on_reset(uint32_t id) |
18265 | { |
18266 | preserved_aliases[id] = get_name(id); |
18267 | } |
18268 | |
18269 | void CompilerGLSL::reset_name_caches() |
18270 | { |
18271 | for (auto &preserved : preserved_aliases) |
18272 | set_name(id: preserved.first, name: preserved.second); |
18273 | |
18274 | preserved_aliases.clear(); |
18275 | resource_names.clear(); |
18276 | block_input_names.clear(); |
18277 | block_output_names.clear(); |
18278 | block_ubo_names.clear(); |
18279 | block_ssbo_names.clear(); |
18280 | block_names.clear(); |
18281 | function_overloads.clear(); |
18282 | } |
18283 | |
18284 | void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type) |
18285 | { |
18286 | if (visited.count(x: type.self)) |
18287 | return; |
18288 | visited.insert(x: type.self); |
18289 | |
18290 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
18291 | { |
18292 | auto &mbr_type = get<SPIRType>(id: type.member_types[i]); |
18293 | |
18294 | if (mbr_type.basetype == SPIRType::Struct) |
18295 | { |
18296 | // If there are multiple aliases, the output might be somewhat unpredictable, |
18297 | // but the only real alternative in that case is to do nothing, which isn't any better. |
18298 | // This check should be fine in practice. |
18299 | if (get_name(id: mbr_type.self).empty() && !get_member_name(id: type.self, index: i).empty()) |
18300 | { |
18301 | auto anon_name = join(ts: "anon_", ts: get_member_name(id: type.self, index: i)); |
18302 | ParsedIR::sanitize_underscores(str&: anon_name); |
18303 | set_name(id: mbr_type.self, name: anon_name); |
18304 | } |
18305 | |
18306 | fixup_anonymous_struct_names(visited, type: mbr_type); |
18307 | } |
18308 | } |
18309 | } |
18310 | |
18311 | void CompilerGLSL::fixup_anonymous_struct_names() |
18312 | { |
18313 | // HLSL codegen can often end up emitting anonymous structs inside blocks, which |
18314 | // breaks GL linking since all names must match ... |
18315 | // Try to emit sensible code, so attempt to find such structs and emit anon_$member. |
18316 | |
18317 | // Breaks exponential explosion with weird type trees. |
18318 | std::unordered_set<uint32_t> visited; |
18319 | |
18320 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) { |
18321 | if (type.basetype == SPIRType::Struct && |
18322 | (has_decoration(id: type.self, decoration: DecorationBlock) || |
18323 | has_decoration(id: type.self, decoration: DecorationBufferBlock))) |
18324 | { |
18325 | fixup_anonymous_struct_names(visited, type); |
18326 | } |
18327 | }); |
18328 | } |
18329 | |
18330 | void CompilerGLSL::fixup_type_alias() |
18331 | { |
18332 | // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. |
18333 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t self, SPIRType &type) { |
18334 | if (!type.type_alias) |
18335 | return; |
18336 | |
18337 | if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock)) |
18338 | { |
18339 | // Top-level block types should never alias anything else. |
18340 | type.type_alias = 0; |
18341 | } |
18342 | else if (type_is_block_like(type) && type.self == ID(self)) |
18343 | { |
18344 | // A block-like type is any type which contains Offset decoration, but not top-level blocks, |
18345 | // i.e. blocks which are placed inside buffers. |
18346 | // Become the master. |
18347 | ir.for_each_typed_id<SPIRType>(op: [&](uint32_t other_id, SPIRType &other_type) { |
18348 | if (other_id == self) |
18349 | return; |
18350 | |
18351 | if (other_type.type_alias == type.type_alias) |
18352 | other_type.type_alias = self; |
18353 | }); |
18354 | |
18355 | this->get<SPIRType>(id: type.type_alias).type_alias = self; |
18356 | type.type_alias = 0; |
18357 | } |
18358 | }); |
18359 | } |
18360 | |
18361 | void CompilerGLSL::reorder_type_alias() |
18362 | { |
18363 | // Reorder declaration of types so that the master of the type alias is always emitted first. |
18364 | // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which |
18365 | // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. |
18366 | auto loop_lock = ir.create_loop_hard_lock(); |
18367 | |
18368 | auto &type_ids = ir.ids_for_type[TypeType]; |
18369 | for (auto alias_itr = begin(cont&: type_ids); alias_itr != end(cont&: type_ids); ++alias_itr) |
18370 | { |
18371 | auto &type = get<SPIRType>(id: *alias_itr); |
18372 | if (type.type_alias != TypeID(0) && |
18373 | !has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked)) |
18374 | { |
18375 | // We will skip declaring this type, so make sure the type_alias type comes before. |
18376 | auto master_itr = find(first: begin(cont&: type_ids), last: end(cont&: type_ids), val: ID(type.type_alias)); |
18377 | assert(master_itr != end(type_ids)); |
18378 | |
18379 | if (alias_itr < master_itr) |
18380 | { |
18381 | // Must also swap the type order for the constant-type joined array. |
18382 | auto &joined_types = ir.ids_for_constant_undef_or_type; |
18383 | auto alt_alias_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *alias_itr); |
18384 | auto alt_master_itr = find(first: begin(cont&: joined_types), last: end(cont&: joined_types), val: *master_itr); |
18385 | assert(alt_alias_itr != end(joined_types)); |
18386 | assert(alt_master_itr != end(joined_types)); |
18387 | |
18388 | swap(a&: *alias_itr, b&: *master_itr); |
18389 | swap(a&: *alt_alias_itr, b&: *alt_master_itr); |
18390 | } |
18391 | } |
18392 | } |
18393 | } |
18394 | |
18395 | void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) |
18396 | { |
18397 | // If we are redirecting statements, ignore the line directive. |
18398 | // Common case here is continue blocks. |
18399 | if (redirect_statement) |
18400 | return; |
18401 | |
18402 | // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit |
18403 | // any line directives, because it's not possible. |
18404 | if (block_debug_directives) |
18405 | return; |
18406 | |
18407 | if (options.emit_line_directives) |
18408 | { |
18409 | require_extension_internal(ext: "GL_GOOGLE_cpp_style_line_directive"); |
18410 | statement_no_indent(ts: "#line ", ts&: line_literal, ts: " \"", ts&: get<SPIRString>(id: file_id).str, ts: "\""); |
18411 | } |
18412 | } |
18413 | |
18414 | void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, |
18415 | SmallVector<uint32_t> chain) |
18416 | { |
18417 | // Fully unroll all member/array indices one by one. |
18418 | |
18419 | auto &lhs_type = get<SPIRType>(id: lhs_type_id); |
18420 | auto &rhs_type = get<SPIRType>(id: rhs_type_id); |
18421 | |
18422 | if (!lhs_type.array.empty()) |
18423 | { |
18424 | // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, |
18425 | // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. |
18426 | uint32_t array_size = to_array_size_literal(type: lhs_type); |
18427 | chain.push_back(t: 0); |
18428 | |
18429 | for (uint32_t i = 0; i < array_size; i++) |
18430 | { |
18431 | chain.back() = i; |
18432 | emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.parent_type, rhs_id, rhs_type_id: rhs_type.parent_type, chain); |
18433 | } |
18434 | } |
18435 | else if (lhs_type.basetype == SPIRType::Struct) |
18436 | { |
18437 | chain.push_back(t: 0); |
18438 | uint32_t member_count = uint32_t(lhs_type.member_types.size()); |
18439 | for (uint32_t i = 0; i < member_count; i++) |
18440 | { |
18441 | chain.back() = i; |
18442 | emit_copy_logical_type(lhs_id, lhs_type_id: lhs_type.member_types[i], rhs_id, rhs_type_id: rhs_type.member_types[i], chain); |
18443 | } |
18444 | } |
18445 | else |
18446 | { |
18447 | // Need to handle unpack/packing fixups since this can differ wildly between the logical types, |
18448 | // particularly in MSL. |
18449 | // To deal with this, we emit access chains and go through emit_store_statement |
18450 | // to deal with all the special cases we can encounter. |
18451 | |
18452 | AccessChainMeta lhs_meta, rhs_meta; |
18453 | auto lhs = access_chain_internal(base: lhs_id, indices: chain.data(), count: uint32_t(chain.size()), |
18454 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &lhs_meta); |
18455 | auto rhs = access_chain_internal(base: rhs_id, indices: chain.data(), count: uint32_t(chain.size()), |
18456 | flags: ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, meta: &rhs_meta); |
18457 | |
18458 | uint32_t id = ir.increase_bound_by(count: 2); |
18459 | lhs_id = id; |
18460 | rhs_id = id + 1; |
18461 | |
18462 | { |
18463 | auto &lhs_expr = set<SPIRExpression>(id: lhs_id, args: std::move(lhs), args&: lhs_type_id, args: true); |
18464 | lhs_expr.need_transpose = lhs_meta.need_transpose; |
18465 | |
18466 | if (lhs_meta.storage_is_packed) |
18467 | set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
18468 | if (lhs_meta.storage_physical_type != 0) |
18469 | set_extended_decoration(id: lhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: lhs_meta.storage_physical_type); |
18470 | |
18471 | forwarded_temporaries.insert(x: lhs_id); |
18472 | suppressed_usage_tracking.insert(x: lhs_id); |
18473 | } |
18474 | |
18475 | { |
18476 | auto &rhs_expr = set<SPIRExpression>(id: rhs_id, args: std::move(rhs), args&: rhs_type_id, args: true); |
18477 | rhs_expr.need_transpose = rhs_meta.need_transpose; |
18478 | |
18479 | if (rhs_meta.storage_is_packed) |
18480 | set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypePacked); |
18481 | if (rhs_meta.storage_physical_type != 0) |
18482 | set_extended_decoration(id: rhs_id, decoration: SPIRVCrossDecorationPhysicalTypeID, value: rhs_meta.storage_physical_type); |
18483 | |
18484 | forwarded_temporaries.insert(x: rhs_id); |
18485 | suppressed_usage_tracking.insert(x: rhs_id); |
18486 | } |
18487 | |
18488 | emit_store_statement(lhs_expression: lhs_id, rhs_expression: rhs_id); |
18489 | } |
18490 | } |
18491 | |
18492 | bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const |
18493 | { |
18494 | if (!has_decoration(id, decoration: DecorationInputAttachmentIndex)) |
18495 | return false; |
18496 | |
18497 | uint32_t input_attachment_index = get_decoration(id, decoration: DecorationInputAttachmentIndex); |
18498 | for (auto &remap : subpass_to_framebuffer_fetch_attachment) |
18499 | if (remap.first == input_attachment_index) |
18500 | return true; |
18501 | |
18502 | return false; |
18503 | } |
18504 | |
18505 | const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const |
18506 | { |
18507 | const SPIRVariable *ret = nullptr; |
18508 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
18509 | if (has_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) && |
18510 | get_decoration(id: var.self, decoration: DecorationInputAttachmentIndex) == index) |
18511 | { |
18512 | ret = &var; |
18513 | } |
18514 | }); |
18515 | return ret; |
18516 | } |
18517 | |
18518 | const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const |
18519 | { |
18520 | const SPIRVariable *ret = nullptr; |
18521 | ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) { |
18522 | if (var.storage == StorageClassOutput && get_decoration(id: var.self, decoration: DecorationLocation) == location) |
18523 | ret = &var; |
18524 | }); |
18525 | return ret; |
18526 | } |
18527 | |
18528 | void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() |
18529 | { |
18530 | for (auto &remap : subpass_to_framebuffer_fetch_attachment) |
18531 | { |
18532 | auto *subpass_var = find_subpass_input_by_attachment_index(index: remap.first); |
18533 | auto *output_var = find_color_output_by_location(location: remap.second); |
18534 | if (!subpass_var) |
18535 | continue; |
18536 | if (!output_var) |
18537 | SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " |
18538 | "to read from it."); |
18539 | if (is_array(type: get<SPIRType>(id: output_var->basetype))) |
18540 | SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); |
18541 | |
18542 | auto &func = get<SPIRFunction>(id: get_entry_point().self); |
18543 | func.fixup_hooks_in.push_back(t: [=]() { |
18544 | if (is_legacy()) |
18545 | { |
18546 | statement(ts: to_expression(id: subpass_var->self), ts: " = ", ts: "gl_LastFragData[", |
18547 | ts: get_decoration(id: output_var->self, decoration: DecorationLocation), ts: "];"); |
18548 | } |
18549 | else |
18550 | { |
18551 | uint32_t num_rt_components = this->get<SPIRType>(id: output_var->basetype).vecsize; |
18552 | statement(ts: to_expression(id: subpass_var->self), ts: vector_swizzle(vecsize: num_rt_components, index: 0), ts: " = ", |
18553 | ts: to_expression(id: output_var->self), ts: ";"); |
18554 | } |
18555 | }); |
18556 | } |
18557 | } |
18558 | |
18559 | bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const |
18560 | { |
18561 | return is_depth_image(type: get<SPIRType>(id: get<SPIRVariable>(id).basetype), id); |
18562 | } |
18563 | |
18564 | const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) |
18565 | { |
18566 | static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", |
18567 | "GL_KHR_shader_subgroup_basic", |
18568 | "GL_KHR_shader_subgroup_vote", |
18569 | "GL_KHR_shader_subgroup_arithmetic", |
18570 | "GL_NV_gpu_shader_5", |
18571 | "GL_NV_shader_thread_group", |
18572 | "GL_NV_shader_thread_shuffle", |
18573 | "GL_ARB_shader_ballot", |
18574 | "GL_ARB_shader_group_vote", |
18575 | "GL_AMD_gcn_shader"}; |
18576 | return retval[c]; |
18577 | } |
18578 | |
18579 | SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) |
18580 | { |
18581 | switch (c) |
18582 | { |
18583 | case ARB_shader_ballot: |
18584 | return { "GL_ARB_shader_int64"}; |
18585 | case AMD_gcn_shader: |
18586 | return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5"}; |
18587 | default: |
18588 | return {}; |
18589 | } |
18590 | } |
18591 | |
18592 | const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) |
18593 | { |
18594 | switch (c) |
18595 | { |
18596 | case ARB_shader_ballot: |
18597 | return "defined(GL_ARB_shader_int64)"; |
18598 | case AMD_gcn_shader: |
18599 | return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; |
18600 | default: |
18601 | return ""; |
18602 | } |
18603 | } |
18604 | |
18605 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
18606 | get_feature_dependencies(Feature feature) |
18607 | { |
18608 | switch (feature) |
18609 | { |
18610 | case SubgroupAllEqualT: |
18611 | return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; |
18612 | case SubgroupElect: |
18613 | return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; |
18614 | case SubgroupInverseBallot_InclBitCount_ExclBitCout: |
18615 | return { SubgroupMask }; |
18616 | case SubgroupBallotBitCount: |
18617 | return { SubgroupBallot }; |
18618 | case SubgroupArithmeticIAddReduce: |
18619 | case SubgroupArithmeticIAddInclusiveScan: |
18620 | case SubgroupArithmeticFAddReduce: |
18621 | case SubgroupArithmeticFAddInclusiveScan: |
18622 | case SubgroupArithmeticIMulReduce: |
18623 | case SubgroupArithmeticIMulInclusiveScan: |
18624 | case SubgroupArithmeticFMulReduce: |
18625 | case SubgroupArithmeticFMulInclusiveScan: |
18626 | return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract }; |
18627 | case SubgroupArithmeticIAddExclusiveScan: |
18628 | case SubgroupArithmeticFAddExclusiveScan: |
18629 | case SubgroupArithmeticIMulExclusiveScan: |
18630 | case SubgroupArithmeticFMulExclusiveScan: |
18631 | return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, |
18632 | SubgroupMask, SubgroupElect, SubgroupBallotBitExtract }; |
18633 | default: |
18634 | return {}; |
18635 | } |
18636 | } |
18637 | |
18638 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: |
18639 | get_feature_dependency_mask(Feature feature) |
18640 | { |
18641 | return build_mask(features: get_feature_dependencies(feature)); |
18642 | } |
18643 | |
18644 | bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) |
18645 | { |
18646 | static const bool retval[FeatureCount] = { |
18647 | false, false, false, false, false, false, |
18648 | true, // SubgroupBalloFindLSB_MSB |
18649 | false, false, false, false, |
18650 | true, // SubgroupMemBarrier - replaced with workgroup memory barriers |
18651 | false, false, true, false, |
18652 | false, false, false, false, false, false, // iadd, fadd |
18653 | false, false, false, false, false, false, // imul , fmul |
18654 | }; |
18655 | |
18656 | return retval[feature]; |
18657 | } |
18658 | |
18659 | CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: |
18660 | get_KHR_extension_for_feature(Feature feature) |
18661 | { |
18662 | static const Candidate extensions[FeatureCount] = { |
18663 | KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, |
18664 | KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, |
18665 | KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, |
18666 | KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, |
18667 | KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, |
18668 | KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, |
18669 | KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, |
18670 | KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, |
18671 | }; |
18672 | |
18673 | return extensions[feature]; |
18674 | } |
18675 | |
18676 | void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) |
18677 | { |
18678 | feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); |
18679 | } |
18680 | |
18681 | bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const |
18682 | { |
18683 | return (feature_mask & (1u << feature)) != 0; |
18684 | } |
18685 | |
18686 | CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const |
18687 | { |
18688 | Result res; |
18689 | |
18690 | for (uint32_t i = 0u; i < FeatureCount; ++i) |
18691 | { |
18692 | if (feature_mask & (1u << i)) |
18693 | { |
18694 | auto feature = static_cast<Feature>(i); |
18695 | std::unordered_set<uint32_t> unique_candidates; |
18696 | |
18697 | auto candidates = get_candidates_for_feature(ft: feature); |
18698 | unique_candidates.insert(first: candidates.begin(), last: candidates.end()); |
18699 | |
18700 | auto deps = get_feature_dependencies(feature); |
18701 | for (Feature d : deps) |
18702 | { |
18703 | candidates = get_candidates_for_feature(ft: d); |
18704 | if (!candidates.empty()) |
18705 | unique_candidates.insert(first: candidates.begin(), last: candidates.end()); |
18706 | } |
18707 | |
18708 | for (uint32_t c : unique_candidates) |
18709 | ++res.weights[static_cast<Candidate>(c)]; |
18710 | } |
18711 | } |
18712 | |
18713 | return res; |
18714 | } |
18715 | |
18716 | CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
18717 | get_candidates_for_feature(Feature ft, const Result &r) |
18718 | { |
18719 | auto c = get_candidates_for_feature(ft); |
18720 | auto cmp = [&r](Candidate a, Candidate b) { |
18721 | if (r.weights[a] == r.weights[b]) |
18722 | return a < b; // Prefer candidates with lower enum value |
18723 | return r.weights[a] > r.weights[b]; |
18724 | }; |
18725 | std::sort(first: c.begin(), last: c.end(), comp: cmp); |
18726 | return c; |
18727 | } |
18728 | |
18729 | CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: |
18730 | get_candidates_for_feature(Feature feature) |
18731 | { |
18732 | switch (feature) |
18733 | { |
18734 | case SubgroupMask: |
18735 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; |
18736 | case SubgroupSize: |
18737 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; |
18738 | case SubgroupInvocationID: |
18739 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; |
18740 | case SubgroupID: |
18741 | return { KHR_shader_subgroup_basic, NV_shader_thread_group }; |
18742 | case NumSubgroups: |
18743 | return { KHR_shader_subgroup_basic, NV_shader_thread_group }; |
18744 | case SubgroupBroadcast_First: |
18745 | return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; |
18746 | case SubgroupBallotFindLSB_MSB: |
18747 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; |
18748 | case SubgroupAll_Any_AllEqualBool: |
18749 | return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; |
18750 | case SubgroupAllEqualT: |
18751 | return {}; // depends on other features only |
18752 | case SubgroupElect: |
18753 | return {}; // depends on other features only |
18754 | case SubgroupBallot: |
18755 | return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; |
18756 | case SubgroupBarrier: |
18757 | return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; |
18758 | case SubgroupMemBarrier: |
18759 | return { KHR_shader_subgroup_basic }; |
18760 | case SubgroupInverseBallot_InclBitCount_ExclBitCout: |
18761 | return {}; |
18762 | case SubgroupBallotBitExtract: |
18763 | return { NV_shader_thread_group }; |
18764 | case SubgroupBallotBitCount: |
18765 | return {}; |
18766 | case SubgroupArithmeticIAddReduce: |
18767 | case SubgroupArithmeticIAddExclusiveScan: |
18768 | case SubgroupArithmeticIAddInclusiveScan: |
18769 | case SubgroupArithmeticFAddReduce: |
18770 | case SubgroupArithmeticFAddExclusiveScan: |
18771 | case SubgroupArithmeticFAddInclusiveScan: |
18772 | case SubgroupArithmeticIMulReduce: |
18773 | case SubgroupArithmeticIMulExclusiveScan: |
18774 | case SubgroupArithmeticIMulInclusiveScan: |
18775 | case SubgroupArithmeticFMulReduce: |
18776 | case SubgroupArithmeticFMulExclusiveScan: |
18777 | case SubgroupArithmeticFMulInclusiveScan: |
18778 | return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle }; |
18779 | default: |
18780 | return {}; |
18781 | } |
18782 | } |
18783 | |
18784 | CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( |
18785 | const SmallVector<Feature> &features) |
18786 | { |
18787 | FeatureMask mask = 0; |
18788 | for (Feature f : features) |
18789 | mask |= FeatureMask(1) << f; |
18790 | return mask; |
18791 | } |
18792 | |
18793 | CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() |
18794 | { |
18795 | for (auto &weight : weights) |
18796 | weight = 0; |
18797 | |
18798 | // Make sure KHR_shader_subgroup extensions are always prefered. |
18799 | const uint32_t big_num = FeatureCount; |
18800 | weights[KHR_shader_subgroup_ballot] = big_num; |
18801 | weights[KHR_shader_subgroup_basic] = big_num; |
18802 | weights[KHR_shader_subgroup_vote] = big_num; |
18803 | weights[KHR_shader_subgroup_arithmetic] = big_num; |
18804 | } |
18805 | |
18806 | void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) |
18807 | { |
18808 | // Must be ordered to maintain deterministic output, so vector is appropriate. |
18809 | if (find(first: begin(cont&: workaround_ubo_load_overload_types), last: end(cont&: workaround_ubo_load_overload_types), val: id) == |
18810 | end(cont&: workaround_ubo_load_overload_types)) |
18811 | { |
18812 | force_recompile(); |
18813 | workaround_ubo_load_overload_types.push_back(t: id); |
18814 | } |
18815 | } |
18816 | |
18817 | void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) |
18818 | { |
18819 | // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. |
18820 | // To load these types correctly, we must first wrap them in a dummy function which only purpose is to |
18821 | // ensure row_major decoration is actually respected. |
18822 | auto *var = maybe_get_backing_variable(chain: ptr); |
18823 | if (!var) |
18824 | return; |
18825 | |
18826 | auto &backing_type = get<SPIRType>(id: var->basetype); |
18827 | bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && |
18828 | has_decoration(id: backing_type.self, decoration: DecorationBlock); |
18829 | if (!is_ubo) |
18830 | return; |
18831 | |
18832 | auto *type = &get<SPIRType>(id: loaded_type); |
18833 | bool rewrite = false; |
18834 | bool relaxed = options.es; |
18835 | |
18836 | if (is_matrix(type: *type)) |
18837 | { |
18838 | // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, |
18839 | // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. |
18840 | // If there is any row-major action going on, we apply the workaround. |
18841 | // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. |
18842 | // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. |
18843 | type = &backing_type; |
18844 | } |
18845 | else |
18846 | { |
18847 | // If we're loading a composite, we don't have overloads like these. |
18848 | relaxed = false; |
18849 | } |
18850 | |
18851 | if (type->basetype == SPIRType::Struct) |
18852 | { |
18853 | // If we're loading a struct where any member is a row-major matrix, apply the workaround. |
18854 | for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) |
18855 | { |
18856 | auto decorations = combined_decoration_for_member(type: *type, index: i); |
18857 | if (decorations.get(bit: DecorationRowMajor)) |
18858 | rewrite = true; |
18859 | |
18860 | // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump. |
18861 | if (!decorations.get(bit: DecorationRelaxedPrecision)) |
18862 | relaxed = false; |
18863 | } |
18864 | } |
18865 | |
18866 | if (rewrite) |
18867 | { |
18868 | request_workaround_wrapper_overload(id: loaded_type); |
18869 | expr = join(ts: "spvWorkaroundRowMajor", ts: (relaxed ? "MP": ""), ts: "(", ts&: expr, ts: ")"); |
18870 | } |
18871 | } |
18872 | |
18873 | void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) |
18874 | { |
18875 | masked_output_locations.insert(x: { .location: location, .component: component }); |
18876 | } |
18877 | |
18878 | void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) |
18879 | { |
18880 | masked_output_builtins.insert(x: builtin); |
18881 | } |
18882 | |
18883 | bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const |
18884 | { |
18885 | auto &type = get<SPIRType>(id: var.basetype); |
18886 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
18887 | // Blocks by themselves are never masked. Must be masked per-member. |
18888 | if (is_block) |
18889 | return false; |
18890 | |
18891 | bool is_builtin = has_decoration(id: var.self, decoration: DecorationBuiltIn); |
18892 | |
18893 | if (is_builtin) |
18894 | { |
18895 | return is_stage_output_builtin_masked(builtin: BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn))); |
18896 | } |
18897 | else |
18898 | { |
18899 | if (!has_decoration(id: var.self, decoration: DecorationLocation)) |
18900 | return false; |
18901 | |
18902 | return is_stage_output_location_masked( |
18903 | location: get_decoration(id: var.self, decoration: DecorationLocation), |
18904 | component: get_decoration(id: var.self, decoration: DecorationComponent)); |
18905 | } |
18906 | } |
18907 | |
18908 | bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const |
18909 | { |
18910 | auto &type = get<SPIRType>(id: var.basetype); |
18911 | bool is_block = has_decoration(id: type.self, decoration: DecorationBlock); |
18912 | if (!is_block) |
18913 | return false; |
18914 | |
18915 | BuiltIn builtin = BuiltInMax; |
18916 | if (is_member_builtin(type, index, builtin: &builtin)) |
18917 | { |
18918 | return is_stage_output_builtin_masked(builtin); |
18919 | } |
18920 | else |
18921 | { |
18922 | uint32_t location = get_declared_member_location(var, mbr_idx: index, strip_array); |
18923 | uint32_t component = get_member_decoration(id: type.self, index, decoration: DecorationComponent); |
18924 | return is_stage_output_location_masked(location, component); |
18925 | } |
18926 | } |
18927 | |
18928 | bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const |
18929 | { |
18930 | if (has_decoration(id: var.self, decoration: DecorationPerPrimitiveEXT)) |
18931 | return true; |
18932 | |
18933 | auto &type = get<SPIRType>(id: var.basetype); |
18934 | if (!has_decoration(id: type.self, decoration: DecorationBlock)) |
18935 | return false; |
18936 | |
18937 | for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++) |
18938 | if (!has_member_decoration(id: type.self, index: i, decoration: DecorationPerPrimitiveEXT)) |
18939 | return false; |
18940 | |
18941 | return true; |
18942 | } |
18943 | |
18944 | bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const |
18945 | { |
18946 | return masked_output_locations.count(x: { .location: location, .component: component }) != 0; |
18947 | } |
18948 | |
18949 | bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const |
18950 | { |
18951 | return masked_output_builtins.count(x: builtin) != 0; |
18952 | } |
18953 | |
18954 | uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const |
18955 | { |
18956 | auto &block_type = get<SPIRType>(id: var.basetype); |
18957 | if (has_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation)) |
18958 | return get_member_decoration(id: block_type.self, index: mbr_idx, decoration: DecorationLocation); |
18959 | else |
18960 | return get_accumulated_member_location(var, mbr_idx, strip_array); |
18961 | } |
18962 | |
18963 | uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const |
18964 | { |
18965 | auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); |
18966 | uint32_t location = get_decoration(id: var.self, decoration: DecorationLocation); |
18967 | |
18968 | for (uint32_t i = 0; i < mbr_idx; i++) |
18969 | { |
18970 | auto &mbr_type = get<SPIRType>(id: type.member_types[i]); |
18971 | |
18972 | // Start counting from any place we have a new location decoration. |
18973 | if (has_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation)) |
18974 | location = get_member_decoration(id: type.self, index: mbr_idx, decoration: DecorationLocation); |
18975 | |
18976 | uint32_t location_count = type_to_location_count(type: mbr_type); |
18977 | location += location_count; |
18978 | } |
18979 | |
18980 | return location; |
18981 | } |
18982 | |
18983 | StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) |
18984 | { |
18985 | auto *var = maybe_get_backing_variable(chain: ptr); |
18986 | |
18987 | // If the expression has been lowered to a temporary, we need to use the Generic storage class. |
18988 | // We're looking for the effective storage class of a given expression. |
18989 | // An access chain or forwarded OpLoads from such access chains |
18990 | // will generally have the storage class of the underlying variable, but if the load was not forwarded |
18991 | // we have lost any address space qualifiers. |
18992 | bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(id: ptr).access_chain && |
18993 | (forced_temporaries.count(x: ptr) != 0 || forwarded_temporaries.count(x: ptr) == 0); |
18994 | |
18995 | if (var && !forced_temporary) |
18996 | { |
18997 | if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassWorkgroup)) |
18998 | return StorageClassWorkgroup; |
18999 | if (variable_decl_is_remapped_storage(var: *var, storage: StorageClassStorageBuffer)) |
19000 | return StorageClassStorageBuffer; |
19001 | |
19002 | // Normalize SSBOs to StorageBuffer here. |
19003 | if (var->storage == StorageClassUniform && |
19004 | has_decoration(id: get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock)) |
19005 | return StorageClassStorageBuffer; |
19006 | else |
19007 | return var->storage; |
19008 | } |
19009 | else |
19010 | return expression_type(id: ptr).storage; |
19011 | } |
19012 | |
19013 | uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const |
19014 | { |
19015 | uint32_t count; |
19016 | if (type.basetype == SPIRType::Struct) |
19017 | { |
19018 | uint32_t mbr_count = uint32_t(type.member_types.size()); |
19019 | count = 0; |
19020 | for (uint32_t i = 0; i < mbr_count; i++) |
19021 | count += type_to_location_count(type: get<SPIRType>(id: type.member_types[i])); |
19022 | } |
19023 | else |
19024 | { |
19025 | count = type.columns > 1 ? type.columns : 1; |
19026 | } |
19027 | |
19028 | uint32_t dim_count = uint32_t(type.array.size()); |
19029 | for (uint32_t i = 0; i < dim_count; i++) |
19030 | count *= to_array_size_literal(type, index: i); |
19031 | |
19032 | return count; |
19033 | } |
19034 | |
19035 | std::string CompilerGLSL::format_float(float value) const |
19036 | { |
19037 | if (float_formatter) |
19038 | return float_formatter->format_float(value); |
19039 | |
19040 | // default behavior |
19041 | return convert_to_string(t: value, locale_radix_point: current_locale_radix_character); |
19042 | } |
19043 | |
19044 | std::string CompilerGLSL::format_double(double value) const |
19045 | { |
19046 | if (float_formatter) |
19047 | return float_formatter->format_double(value); |
19048 | |
19049 | // default behavior |
19050 | return convert_to_string(t: value, locale_radix_point: current_locale_radix_character); |
19051 | } |
19052 | |
19053 |
Definitions
- ExtraSubExpressionType
- is_unsigned_opcode
- is_unsigned_glsl_opcode
- packing_is_vec4_padded
- packing_is_hlsl
- packing_has_flexible_offset
- packing_is_scalar
- packing_to_substruct_packing
- init
- to_pls_layout
- pls_format_to_basetype
- pls_format_to_components
- vector_swizzle
- reset
- remap_pls_variables
- remap_ext_framebuffer_fetch
- location_is_framebuffer_fetch
- location_is_non_coherent_framebuffer_fetch
- find_static_extensions
- require_polyfill
- ray_tracing_khr_fixup_locations
- compile
- get_partial_source
- build_workgroup_size
- request_subgroup_feature
- emit_header
- type_is_empty
- emit_struct
- to_interpolation_qualifiers
- layout_for_member
- format_to_glsl
- type_to_packed_base_size
- type_to_packed_alignment
- type_to_packed_array_stride
- type_to_packed_size
- buffer_is_packing_standard
- can_use_io_location
- layout_for_variable
- buffer_to_packing_standard
- emit_push_constant_block
- emit_push_constant_block_vulkan
- emit_push_constant_block_glsl
- emit_buffer_block
- emit_buffer_block_legacy
- emit_buffer_reference_block
- emit_buffer_block_native
- emit_buffer_block_flattened
- to_storage_qualifiers_glsl
- emit_flattened_io_block_member
- emit_flattened_io_block_struct
- emit_flattened_io_block
- emit_interface_block
- emit_uniform
- constant_value_macro_name
- emit_specialization_constant_op
- get_constant_mapping_to_workgroup_component
- emit_constant
- emit_entry_point_declarations
- replace_illegal_names
- replace_illegal_names
- replace_fragment_output
- replace_fragment_outputs
- remap_swizzle
- emit_pls
- fixup_image_load_store_access
- is_block_builtin
- should_force_emit_builtin_block
- fixup_implicit_builtin_block_names
- emit_declared_builtin_block
- variable_is_lut
- emit_resources
- emit_output_variable_initializer
- emit_subgroup_arithmetic_workaround
- emit_extension_workarounds
- emit_polyfills
- to_func_call_arg
- force_temporary_and_recompile
- consume_temporary_in_precision_context
- handle_invalid_expression
- unpack_expression_type
- strip_enclosed_expression
- needs_enclose_expression
- enclose_expression
- dereference_expression
- address_of_expression
- to_enclosed_expression
- to_unpacked_row_major_matrix_expression
- to_unpacked_expression
- to_enclosed_unpacked_expression
- to_dereferenced_expression
- to_pointer_expression
- to_enclosed_pointer_expression
- to_extract_component_expression
- to_extract_constant_composite_expression
- to_rerolled_array_expression
- to_composite_constructor_expression
- to_non_uniform_aware_expression
- to_expression
- get_composite_constant_ids
- fill_composite_constant
- set_composite_constant
- get_composite_member_type
- constant_op_expression
- constant_expression
- convert_half_to_string
- convert_float_to_string
- convert_double_to_string
- constant_expression_vector
- emit_uninitialized_temporary_expression
- emit_uninitialized_temporary
- declare_temporary
- expression_is_forwarded
- expression_suppresses_usage_tracking
- expression_read_implies_multiple_reads
- emit_op
- emit_unary_op
- emit_unary_op_cast
- emit_mesh_tasks
- emit_binary_op
- emit_unrolled_unary_op
- emit_unrolled_binary_op
- binary_op_bitcast_helper
- emit_complex_bitcast
- emit_binary_op_cast
- emit_unary_func_op
- emit_binary_func_op
- emit_atomic_func_op
- emit_atomic_func_op
- emit_unary_func_op_cast
- emit_trinary_func_op_bitextract
- emit_trinary_func_op_cast
- emit_binary_func_op_cast_clustered
- emit_binary_func_op_cast
- emit_trinary_func_op
- emit_quaternary_func_op
- emit_bitfield_insert_op
- legacy_tex_op
- to_trivial_mix_op
- to_ternary_expression
- emit_mix_op
- to_combined_image_sampler
- is_supported_subgroup_op_in_opengl
- emit_sampled_image_op
- image_opcode_is_sample_no_dref
- emit_sparse_feedback_temporaries
- get_sparse_feedback_texel_id
- emit_texture_op
- to_texture_op
- expression_is_constant_null
- expression_is_non_value_type_array
- to_function_name
- convert_separate_image_to_expression
- to_function_args
- get_remapped_spirv_op
- get_remapped_glsl_op
- emit_glsl_op
- emit_nminmax_op
- emit_emulated_ahyper_op
- emit_spv_amd_shader_ballot_op
- emit_spv_amd_shader_explicit_vertex_parameter_op
- emit_spv_amd_shader_trinary_minmax_op
- emit_spv_amd_gcn_shader_op
- emit_subgroup_op
- bitcast_glsl_op
- bitcast_glsl
- bitcast_expression
- bitcast_expression
- builtin_to_glsl
- index_to_swizzle
- access_chain_internal_append_index
- access_chain_needs_stage_io_builtin_translation
- access_chain_internal
- check_physical_type_cast
- prepare_access_chain_for_scalar_access
- to_flattened_struct_member
- access_chain
- load_flattened_struct
- to_flattened_access_chain_expression
- store_flattened_struct
- store_flattened_struct
- flattened_access_chain
- flattened_access_chain_struct
- flattened_access_chain_matrix
- flattened_access_chain_vector
- flattened_access_chain_offset
- should_dereference
- should_forward
- should_suppress_usage_tracking
- track_expression_read
- args_will_forward
- register_impure_function_call
- register_call_out_argument
- variable_decl_function_local
- emit_variable_temporary_copies
- flush_variable_declaration
- remove_duplicate_swizzle
- remove_unity_swizzle
- build_composite_combiner
- skip_argument
- optimize_read_modify_write
- register_control_dependent_expression
- emit_block_instructions
- disallow_forwarding_in_expression_chain
- handle_store_to_invariant_variable
- emit_store_statement
- get_integer_width_for_instruction
- get_integer_width_for_glsl_instruction
- forward_relaxed_precision
- analyze_expression_precision
- analyze_precision_requirements
- opcode_is_precision_sensitive_operation
- opcode_is_precision_forwarding_instruction
- handle_instruction_precision
- emit_instruction
- append_global_func_args
- to_member_name
- to_member_reference
- to_multi_member_reference
- add_member_name
- is_non_native_row_major_matrix
- member_is_non_native_row_major_matrix
- member_is_remapped_physical_type
- member_is_packed_physical_type
- convert_row_major_matrix
- variable_decl
- variable_decl_is_remapped_storage
- emit_struct_member
- emit_struct_padding_target
- flags_to_qualifiers_glsl
- to_precision_qualifiers_glsl
- fixup_io_block_patch_primitive_qualifiers
- to_qualifiers_glsl
- argument_decl
- to_initializer_expression
- to_zero_initialized_expression
- type_can_zero_initialize
- variable_decl
- to_pls_qualifiers_glsl
- pls_decl
- to_array_size_literal
- to_array_size_literal
- to_array_size
- type_to_array_glsl
- image_type_glsl
- type_to_glsl_constructor
- type_to_glsl
- add_variable
- add_local_variable_name
- add_resource_name
- add_header_line
- has_extension
- require_extension
- get_required_extensions
- require_extension_internal
- flatten_buffer_block
- builtin_translates_to_nonarray
- is_user_type_structured
- check_atomic_image
- add_function_overload
- emit_function_prototype
- emit_function
- emit_fixup
- flush_phi
- branch_to_continue
- branch
- branch
- emit_continue_block
- emit_while_loop_initializers
- emit_for_loop_initializers
- for_loop_initializers_are_same_type
- emit_block_instructions_with_masked_debug
- attempt_emit_loop_header
- flush_undeclared_variables
- emit_hoisted_temporaries
- emit_block_chain
- begin_scope
- end_scope
- end_scope
- end_scope_decl
- end_scope_decl
- check_function_call_constraints
- get_next_instruction_in_block
- mask_relevant_memory_semantics
- emit_array_copy
- unroll_array_to_complex_store
- unroll_array_from_complex_load
- cast_from_variable_load
- get_builtin_basetype
- cast_to_variable_store
- convert_non_uniform_expression
- emit_block_hints
- preserve_alias_on_reset
- reset_name_caches
- fixup_anonymous_struct_names
- fixup_anonymous_struct_names
- fixup_type_alias
- reorder_type_alias
- emit_line_directive
- emit_copy_logical_type
- subpass_input_is_framebuffer_fetch
- find_subpass_input_by_attachment_index
- find_color_output_by_location
- emit_inout_fragment_outputs_copy_to_subpass_inputs
- variable_is_depth_or_compare
- get_extension_name
- get_extra_required_extension_names
- get_extra_required_extension_predicate
- get_feature_dependencies
- get_feature_dependency_mask
- can_feature_be_implemented_without_extensions
- get_KHR_extension_for_feature
- request_feature
- is_feature_requested
- resolve
- get_candidates_for_feature
- get_candidates_for_feature
- build_mask
- Result
- request_workaround_wrapper_overload
- rewrite_load_for_wrapped_row_major
- mask_stage_output_by_location
- mask_stage_output_by_builtin
- is_stage_output_variable_masked
- is_stage_output_block_member_masked
- is_per_primitive_variable
- is_stage_output_location_masked
- is_stage_output_builtin_masked
- get_declared_member_location
- get_accumulated_member_location
- get_expression_effective_storage_class
- type_to_location_count
- format_float
Start learning QML with our Intro Training
Find out more