1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_cross.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_cfg.hpp"
27#include "spirv_common.hpp"
28#include "spirv_parser.hpp"
29#include <algorithm>
30#include <cstring>
31#include <utility>
32
33using namespace std;
34using namespace spv;
35using namespace SPIRV_CROSS_NAMESPACE;
36
37Compiler::Compiler(vector<uint32_t> ir_)
38{
39 Parser parser(std::move(ir_));
40 parser.parse();
41 set_ir(std::move(parser.get_parsed_ir()));
42}
43
44Compiler::Compiler(const uint32_t *ir_, size_t word_count)
45{
46 Parser parser(ir_, word_count);
47 parser.parse();
48 set_ir(std::move(parser.get_parsed_ir()));
49}
50
51Compiler::Compiler(const ParsedIR &ir_)
52{
53 set_ir(ir_);
54}
55
56Compiler::Compiler(ParsedIR &&ir_)
57{
58 set_ir(std::move(ir_));
59}
60
61void Compiler::set_ir(ParsedIR &&ir_)
62{
63 ir = std::move(ir_);
64 parse_fixup();
65}
66
67void Compiler::set_ir(const ParsedIR &ir_)
68{
69 ir = ir_;
70 parse_fixup();
71}
72
73string Compiler::compile()
74{
75 return "";
76}
77
78bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
79{
80 auto &type = get<SPIRType>(id: v.basetype);
81 bool ssbo = v.storage == StorageClassStorageBuffer ||
82 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
83 bool image = type.basetype == SPIRType::Image;
84 bool counter = type.basetype == SPIRType::AtomicCounter;
85 bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
86
87 bool is_restrict;
88 if (ssbo)
89 is_restrict = ir.get_buffer_block_flags(var: v).get(bit: DecorationRestrict);
90 else
91 is_restrict = has_decoration(id: v.self, decoration: DecorationRestrict);
92
93 return !is_restrict && (ssbo || image || counter || buffer_reference);
94}
95
96bool Compiler::block_is_pure(const SPIRBlock &block)
97{
98 // This is a global side effect of the function.
99 if (block.terminator == SPIRBlock::Kill ||
100 block.terminator == SPIRBlock::TerminateRay ||
101 block.terminator == SPIRBlock::IgnoreIntersection)
102 return false;
103
104 for (auto &i : block.ops)
105 {
106 auto ops = stream(instr: i);
107 auto op = static_cast<Op>(i.op);
108
109 switch (op)
110 {
111 case OpFunctionCall:
112 {
113 uint32_t func = ops[2];
114 if (!function_is_pure(func: get<SPIRFunction>(id: func)))
115 return false;
116 break;
117 }
118
119 case OpCopyMemory:
120 case OpStore:
121 {
122 auto &type = expression_type(id: ops[0]);
123 if (type.storage != StorageClassFunction)
124 return false;
125 break;
126 }
127
128 case OpImageWrite:
129 return false;
130
131 // Atomics are impure.
132 case OpAtomicLoad:
133 case OpAtomicStore:
134 case OpAtomicExchange:
135 case OpAtomicCompareExchange:
136 case OpAtomicCompareExchangeWeak:
137 case OpAtomicIIncrement:
138 case OpAtomicIDecrement:
139 case OpAtomicIAdd:
140 case OpAtomicISub:
141 case OpAtomicSMin:
142 case OpAtomicUMin:
143 case OpAtomicSMax:
144 case OpAtomicUMax:
145 case OpAtomicAnd:
146 case OpAtomicOr:
147 case OpAtomicXor:
148 return false;
149
150 // Geometry shader builtins modify global state.
151 case OpEndPrimitive:
152 case OpEmitStreamVertex:
153 case OpEndStreamPrimitive:
154 case OpEmitVertex:
155 return false;
156
157 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
158 case OpControlBarrier:
159 case OpMemoryBarrier:
160 return false;
161
162 // Ray tracing builtins are impure.
163 case OpReportIntersectionKHR:
164 case OpIgnoreIntersectionNV:
165 case OpTerminateRayNV:
166 case OpTraceNV:
167 case OpTraceRayKHR:
168 case OpExecuteCallableNV:
169 case OpExecuteCallableKHR:
170 case OpRayQueryInitializeKHR:
171 case OpRayQueryTerminateKHR:
172 case OpRayQueryGenerateIntersectionKHR:
173 case OpRayQueryConfirmIntersectionKHR:
174 case OpRayQueryProceedKHR:
175 // There are various getters in ray query, but they are considered pure.
176 return false;
177
178 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
179
180 case OpDemoteToHelperInvocationEXT:
181 // This is a global side effect of the function.
182 return false;
183
184 case OpExtInst:
185 {
186 uint32_t extension_set = ops[2];
187 if (get<SPIRExtension>(id: extension_set).ext == SPIRExtension::GLSL)
188 {
189 auto op_450 = static_cast<GLSLstd450>(ops[3]);
190 switch (op_450)
191 {
192 case GLSLstd450Modf:
193 case GLSLstd450Frexp:
194 {
195 auto &type = expression_type(id: ops[5]);
196 if (type.storage != StorageClassFunction)
197 return false;
198 break;
199 }
200
201 default:
202 break;
203 }
204 }
205 break;
206 }
207
208 default:
209 break;
210 }
211 }
212
213 return true;
214}
215
216string Compiler::to_name(uint32_t id, bool allow_alias) const
217{
218 if (allow_alias && ir.ids[id].get_type() == TypeType)
219 {
220 // If this type is a simple alias, emit the
221 // name of the original type instead.
222 // We don't want to override the meta alias
223 // as that can be overridden by the reflection APIs after parse.
224 auto &type = get<SPIRType>(id);
225 if (type.type_alias)
226 {
227 // If the alias master has been specially packed, we will have emitted a clean variant as well,
228 // so skip the name aliasing here.
229 if (!has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
230 return to_name(id: type.type_alias);
231 }
232 }
233
234 auto &alias = ir.get_name(id);
235 if (alias.empty())
236 return join(ts: "_", ts&: id);
237 else
238 return alias;
239}
240
241bool Compiler::function_is_pure(const SPIRFunction &func)
242{
243 for (auto block : func.blocks)
244 {
245 if (!block_is_pure(block: get<SPIRBlock>(id: block)))
246 {
247 //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str());
248 return false;
249 }
250 }
251
252 //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str());
253 return true;
254}
255
256void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
257{
258 for (auto &i : block.ops)
259 {
260 auto ops = stream(instr: i);
261 auto op = static_cast<Op>(i.op);
262
263 switch (op)
264 {
265 case OpFunctionCall:
266 {
267 uint32_t func = ops[2];
268 register_global_read_dependencies(func: get<SPIRFunction>(id: func), id);
269 break;
270 }
271
272 case OpLoad:
273 case OpImageRead:
274 {
275 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
276 auto *var = maybe_get_backing_variable(chain: ops[2]);
277 if (var && var->storage != StorageClassFunction)
278 {
279 auto &type = get<SPIRType>(id: var->basetype);
280
281 // InputTargets are immutable.
282 if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData)
283 var->dependees.push_back(t: id);
284 }
285 break;
286 }
287
288 default:
289 break;
290 }
291 }
292}
293
294void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id)
295{
296 for (auto block : func.blocks)
297 register_global_read_dependencies(block: get<SPIRBlock>(id: block), id);
298}
299
300SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
301{
302 auto *var = maybe_get<SPIRVariable>(id: chain);
303 if (!var)
304 {
305 auto *cexpr = maybe_get<SPIRExpression>(id: chain);
306 if (cexpr)
307 var = maybe_get<SPIRVariable>(id: cexpr->loaded_from);
308
309 auto *access_chain = maybe_get<SPIRAccessChain>(id: chain);
310 if (access_chain)
311 var = maybe_get<SPIRVariable>(id: access_chain->loaded_from);
312 }
313
314 return var;
315}
316
317void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
318{
319 auto &e = get<SPIRExpression>(id: expr);
320 auto *var = maybe_get_backing_variable(chain);
321
322 if (var)
323 {
324 e.loaded_from = var->self;
325
326 // If the backing variable is immutable, we do not need to depend on the variable.
327 if (forwarded && !is_immutable(id: var->self))
328 var->dependees.push_back(t: e.self);
329
330 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
331 // The default is "in" however, so we never invalidate our compilation by reading.
332 if (var && var->parameter)
333 var->parameter->read_count++;
334 }
335}
336
337void Compiler::register_write(uint32_t chain)
338{
339 auto *var = maybe_get<SPIRVariable>(id: chain);
340 if (!var)
341 {
342 // If we're storing through an access chain, invalidate the backing variable instead.
343 auto *expr = maybe_get<SPIRExpression>(id: chain);
344 if (expr && expr->loaded_from)
345 var = maybe_get<SPIRVariable>(id: expr->loaded_from);
346
347 auto *access_chain = maybe_get<SPIRAccessChain>(id: chain);
348 if (access_chain && access_chain->loaded_from)
349 var = maybe_get<SPIRVariable>(id: access_chain->loaded_from);
350 }
351
352 auto &chain_type = expression_type(id: chain);
353
354 if (var)
355 {
356 bool check_argument_storage_qualifier = true;
357 auto &type = expression_type(id: chain);
358
359 // If our variable is in a storage class which can alias with other buffers,
360 // invalidate all variables which depend on aliased variables. And if this is a
361 // variable pointer, then invalidate all variables regardless.
362 if (get_variable_data_type(var: *var).pointer)
363 {
364 flush_all_active_variables();
365
366 if (type.pointer_depth == 1)
367 {
368 // We have a backing variable which is a pointer-to-pointer type.
369 // We are storing some data through a pointer acquired through that variable,
370 // but we are not writing to the value of the variable itself,
371 // i.e., we are not modifying the pointer directly.
372 // If we are storing a non-pointer type (pointer_depth == 1),
373 // we know that we are storing some unrelated data.
374 // A case here would be
375 // void foo(Foo * const *arg) {
376 // Foo *bar = *arg;
377 // bar->unrelated = 42;
378 // }
379 // arg, the argument is constant.
380 check_argument_storage_qualifier = false;
381 }
382 }
383
384 if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(v: *var))
385 flush_all_aliased_variables();
386 else if (var)
387 flush_dependees(var&: *var);
388
389 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
390 if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0)
391 {
392 var->parameter->write_count++;
393 force_recompile();
394 }
395 }
396 else if (chain_type.pointer)
397 {
398 // If we stored through a variable pointer, then we don't know which
399 // variable we stored to. So *all* expressions after this point need to
400 // be invalidated.
401 // FIXME: If we can prove that the variable pointer will point to
402 // only certain variables, we can invalidate only those.
403 flush_all_active_variables();
404 }
405
406 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
407 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
408}
409
410void Compiler::flush_dependees(SPIRVariable &var)
411{
412 for (auto expr : var.dependees)
413 invalid_expressions.insert(x: expr);
414 var.dependees.clear();
415}
416
417void Compiler::flush_all_aliased_variables()
418{
419 for (auto aliased : aliased_variables)
420 flush_dependees(var&: get<SPIRVariable>(id: aliased));
421}
422
423void Compiler::flush_all_atomic_capable_variables()
424{
425 for (auto global : global_variables)
426 flush_dependees(var&: get<SPIRVariable>(id: global));
427 flush_all_aliased_variables();
428}
429
430void Compiler::flush_control_dependent_expressions(uint32_t block_id)
431{
432 auto &block = get<SPIRBlock>(id: block_id);
433 for (auto &expr : block.invalidate_expressions)
434 invalid_expressions.insert(x: expr);
435 block.invalidate_expressions.clear();
436}
437
438void Compiler::flush_all_active_variables()
439{
440 // Invalidate all temporaries we read from variables in this block since they were forwarded.
441 // Invalidate all temporaries we read from globals.
442 for (auto &v : current_function->local_variables)
443 flush_dependees(var&: get<SPIRVariable>(id: v));
444 for (auto &arg : current_function->arguments)
445 flush_dependees(var&: get<SPIRVariable>(id: arg.id));
446 for (auto global : global_variables)
447 flush_dependees(var&: get<SPIRVariable>(id: global));
448
449 flush_all_aliased_variables();
450}
451
452uint32_t Compiler::expression_type_id(uint32_t id) const
453{
454 switch (ir.ids[id].get_type())
455 {
456 case TypeVariable:
457 return get<SPIRVariable>(id).basetype;
458
459 case TypeExpression:
460 return get<SPIRExpression>(id).expression_type;
461
462 case TypeConstant:
463 return get<SPIRConstant>(id).constant_type;
464
465 case TypeConstantOp:
466 return get<SPIRConstantOp>(id).basetype;
467
468 case TypeUndef:
469 return get<SPIRUndef>(id).basetype;
470
471 case TypeCombinedImageSampler:
472 return get<SPIRCombinedImageSampler>(id).combined_type;
473
474 case TypeAccessChain:
475 return get<SPIRAccessChain>(id).basetype;
476
477 default:
478 SPIRV_CROSS_THROW("Cannot resolve expression type.");
479 }
480}
481
482const SPIRType &Compiler::expression_type(uint32_t id) const
483{
484 return get<SPIRType>(id: expression_type_id(id));
485}
486
487bool Compiler::expression_is_lvalue(uint32_t id) const
488{
489 auto &type = expression_type(id);
490 switch (type.basetype)
491 {
492 case SPIRType::SampledImage:
493 case SPIRType::Image:
494 case SPIRType::Sampler:
495 return false;
496
497 default:
498 return true;
499 }
500}
501
502bool Compiler::is_immutable(uint32_t id) const
503{
504 if (ir.ids[id].get_type() == TypeVariable)
505 {
506 auto &var = get<SPIRVariable>(id);
507
508 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
509 bool pointer_to_const = var.storage == StorageClassUniformConstant;
510 return pointer_to_const || var.phi_variable || !expression_is_lvalue(id);
511 }
512 else if (ir.ids[id].get_type() == TypeAccessChain)
513 return get<SPIRAccessChain>(id).immutable;
514 else if (ir.ids[id].get_type() == TypeExpression)
515 return get<SPIRExpression>(id).immutable;
516 else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp ||
517 ir.ids[id].get_type() == TypeUndef)
518 return true;
519 else
520 return false;
521}
522
523static inline bool storage_class_is_interface(spv::StorageClass storage)
524{
525 switch (storage)
526 {
527 case StorageClassInput:
528 case StorageClassOutput:
529 case StorageClassUniform:
530 case StorageClassUniformConstant:
531 case StorageClassAtomicCounter:
532 case StorageClassPushConstant:
533 case StorageClassStorageBuffer:
534 return true;
535
536 default:
537 return false;
538 }
539}
540
541bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const
542{
543 if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable)
544 return true;
545
546 // Combined image samplers are always considered active as they are "magic" variables.
547 if (find_if(first: begin(cont: combined_image_samplers), last: end(cont: combined_image_samplers), pred: [&var](const CombinedImageSampler &samp) {
548 return samp.combined_id == var.self;
549 }) != end(cont: combined_image_samplers))
550 {
551 return false;
552 }
553
554 // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
555 // which are not part of the entry point.
556 if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric &&
557 var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(id: var.self))
558 {
559 return true;
560 }
561
562 return check_active_interface_variables && storage_class_is_interface(storage: var.storage) &&
563 active_interface_variables.find(x: var.self) == end(cont: active_interface_variables);
564}
565
566bool Compiler::is_builtin_type(const SPIRType &type) const
567{
568 auto *type_meta = ir.find_meta(id: type.self);
569
570 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
571 if (type_meta)
572 for (auto &m : type_meta->members)
573 if (m.builtin)
574 return true;
575
576 return false;
577}
578
579bool Compiler::is_builtin_variable(const SPIRVariable &var) const
580{
581 auto *m = ir.find_meta(id: var.self);
582
583 if (var.compat_builtin || (m && m->decoration.builtin))
584 return true;
585 else
586 return is_builtin_type(type: get<SPIRType>(id: var.basetype));
587}
588
589bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const
590{
591 auto *type_meta = ir.find_meta(id: type.self);
592
593 if (type_meta)
594 {
595 auto &memb = type_meta->members;
596 if (index < memb.size() && memb[index].builtin)
597 {
598 if (builtin)
599 *builtin = memb[index].builtin_type;
600 return true;
601 }
602 }
603
604 return false;
605}
606
607bool Compiler::is_scalar(const SPIRType &type) const
608{
609 return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
610}
611
612bool Compiler::is_vector(const SPIRType &type) const
613{
614 return type.vecsize > 1 && type.columns == 1;
615}
616
617bool Compiler::is_matrix(const SPIRType &type) const
618{
619 return type.vecsize > 1 && type.columns > 1;
620}
621
622bool Compiler::is_array(const SPIRType &type) const
623{
624 return !type.array.empty();
625}
626
627ShaderResources Compiler::get_shader_resources() const
628{
629 return get_shader_resources(active_variables: nullptr);
630}
631
632ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
633{
634 return get_shader_resources(active_variables: &active_variables);
635}
636
637bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
638{
639 uint32_t variable = 0;
640 switch (opcode)
641 {
642 // Need this first, otherwise, GCC complains about unhandled switch statements.
643 default:
644 break;
645
646 case OpFunctionCall:
647 {
648 // Invalid SPIR-V.
649 if (length < 3)
650 return false;
651
652 uint32_t count = length - 3;
653 args += 3;
654 for (uint32_t i = 0; i < count; i++)
655 {
656 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
657 if (var && storage_class_is_interface(storage: var->storage))
658 variables.insert(x: args[i]);
659 }
660 break;
661 }
662
663 case OpSelect:
664 {
665 // Invalid SPIR-V.
666 if (length < 5)
667 return false;
668
669 uint32_t count = length - 3;
670 args += 3;
671 for (uint32_t i = 0; i < count; i++)
672 {
673 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
674 if (var && storage_class_is_interface(storage: var->storage))
675 variables.insert(x: args[i]);
676 }
677 break;
678 }
679
680 case OpPhi:
681 {
682 // Invalid SPIR-V.
683 if (length < 2)
684 return false;
685
686 uint32_t count = length - 2;
687 args += 2;
688 for (uint32_t i = 0; i < count; i += 2)
689 {
690 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
691 if (var && storage_class_is_interface(storage: var->storage))
692 variables.insert(x: args[i]);
693 }
694 break;
695 }
696
697 case OpAtomicStore:
698 case OpStore:
699 // Invalid SPIR-V.
700 if (length < 1)
701 return false;
702 variable = args[0];
703 break;
704
705 case OpCopyMemory:
706 {
707 if (length < 2)
708 return false;
709
710 auto *var = compiler.maybe_get<SPIRVariable>(id: args[0]);
711 if (var && storage_class_is_interface(storage: var->storage))
712 variables.insert(x: args[0]);
713
714 var = compiler.maybe_get<SPIRVariable>(id: args[1]);
715 if (var && storage_class_is_interface(storage: var->storage))
716 variables.insert(x: args[1]);
717 break;
718 }
719
720 case OpExtInst:
721 {
722 if (length < 5)
723 return false;
724 auto &extension_set = compiler.get<SPIRExtension>(id: args[2]);
725 switch (extension_set.ext)
726 {
727 case SPIRExtension::GLSL:
728 {
729 auto op = static_cast<GLSLstd450>(args[3]);
730
731 switch (op)
732 {
733 case GLSLstd450InterpolateAtCentroid:
734 case GLSLstd450InterpolateAtSample:
735 case GLSLstd450InterpolateAtOffset:
736 {
737 auto *var = compiler.maybe_get<SPIRVariable>(id: args[4]);
738 if (var && storage_class_is_interface(storage: var->storage))
739 variables.insert(x: args[4]);
740 break;
741 }
742
743 case GLSLstd450Modf:
744 case GLSLstd450Fract:
745 {
746 auto *var = compiler.maybe_get<SPIRVariable>(id: args[5]);
747 if (var && storage_class_is_interface(storage: var->storage))
748 variables.insert(x: args[5]);
749 break;
750 }
751
752 default:
753 break;
754 }
755 break;
756 }
757 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
758 {
759 enum AMDShaderExplicitVertexParameter
760 {
761 InterpolateAtVertexAMD = 1
762 };
763
764 auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
765
766 switch (op)
767 {
768 case InterpolateAtVertexAMD:
769 {
770 auto *var = compiler.maybe_get<SPIRVariable>(id: args[4]);
771 if (var && storage_class_is_interface(storage: var->storage))
772 variables.insert(x: args[4]);
773 break;
774 }
775
776 default:
777 break;
778 }
779 break;
780 }
781 default:
782 break;
783 }
784 break;
785 }
786
787 case OpAccessChain:
788 case OpInBoundsAccessChain:
789 case OpPtrAccessChain:
790 case OpLoad:
791 case OpCopyObject:
792 case OpImageTexelPointer:
793 case OpAtomicLoad:
794 case OpAtomicExchange:
795 case OpAtomicCompareExchange:
796 case OpAtomicCompareExchangeWeak:
797 case OpAtomicIIncrement:
798 case OpAtomicIDecrement:
799 case OpAtomicIAdd:
800 case OpAtomicISub:
801 case OpAtomicSMin:
802 case OpAtomicUMin:
803 case OpAtomicSMax:
804 case OpAtomicUMax:
805 case OpAtomicAnd:
806 case OpAtomicOr:
807 case OpAtomicXor:
808 case OpArrayLength:
809 // Invalid SPIR-V.
810 if (length < 3)
811 return false;
812 variable = args[2];
813 break;
814 }
815
816 if (variable)
817 {
818 auto *var = compiler.maybe_get<SPIRVariable>(id: variable);
819 if (var && storage_class_is_interface(storage: var->storage))
820 variables.insert(x: variable);
821 }
822 return true;
823}
824
825unordered_set<VariableID> Compiler::get_active_interface_variables() const
826{
827 // Traverse the call graph and find all interface variables which are in use.
828 unordered_set<VariableID> variables;
829 InterfaceVariableAccessHandler handler(*this, variables);
830 traverse_all_reachable_opcodes(block: get<SPIRFunction>(id: ir.default_entry_point), handler);
831
832 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
833 if (var.storage != StorageClassOutput)
834 return;
835 if (!interface_variable_exists_in_entry_point(id: var.self))
836 return;
837
838 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
839 // so we should force-enable these outputs,
840 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
841 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
842 if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
843 variables.insert(x: var.self);
844 });
845
846 // If we needed to create one, we'll need it.
847 if (dummy_sampler_id)
848 variables.insert(x: dummy_sampler_id);
849
850 return variables;
851}
852
853void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
854{
855 active_interface_variables = std::move(active_variables);
856 check_active_interface_variables = true;
857}
858
859ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
860{
861 ShaderResources res;
862
863 bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
864
865 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
866 auto &type = this->get<SPIRType>(id: var.basetype);
867
868 // It is possible for uniform storage classes to be passed as function parameters, so detect
869 // that. To detect function parameters, check of StorageClass of variable is function scope.
870 if (var.storage == StorageClassFunction || !type.pointer)
871 return;
872
873 if (active_variables && active_variables->find(x: var.self) == end(cont: *active_variables))
874 return;
875
876 // In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
877 // not just IO variables.
878 bool active_in_entry_point = true;
879 if (ir.get_spirv_version() < 0x10400)
880 {
881 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
882 active_in_entry_point = interface_variable_exists_in_entry_point(id: var.self);
883 }
884 else
885 active_in_entry_point = interface_variable_exists_in_entry_point(id: var.self);
886
887 if (!active_in_entry_point)
888 return;
889
890 bool is_builtin = is_builtin_variable(var);
891
892 if (is_builtin)
893 {
894 if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
895 return;
896
897 auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
898 BuiltInResource resource;
899
900 if (has_decoration(id: type.self, decoration: DecorationBlock))
901 {
902 resource.resource = { .id: var.self, .type_id: var.basetype, .base_type_id: type.self,
903 .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) };
904
905 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
906 {
907 resource.value_type_id = type.member_types[i];
908 resource.builtin = BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn));
909 list.push_back(t: resource);
910 }
911 }
912 else
913 {
914 bool strip_array =
915 !has_decoration(id: var.self, decoration: DecorationPatch) && (
916 get_execution_model() == ExecutionModelTessellationControl ||
917 (get_execution_model() == ExecutionModelTessellationEvaluation &&
918 var.storage == StorageClassInput));
919
920 resource.resource = { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) };
921
922 if (strip_array && !type.array.empty())
923 resource.value_type_id = get_variable_data_type(var).parent_type;
924 else
925 resource.value_type_id = get_variable_data_type_id(var);
926
927 assert(resource.value_type_id);
928
929 resource.builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn));
930 list.push_back(t: std::move(resource));
931 }
932 return;
933 }
934
935 // Input
936 if (var.storage == StorageClassInput)
937 {
938 if (has_decoration(id: type.self, decoration: DecorationBlock))
939 {
940 res.stage_inputs.push_back(
941 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self,
942 .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
943 }
944 else
945 res.stage_inputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
946 }
947 // Subpass inputs
948 else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData)
949 {
950 res.subpass_inputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
951 }
952 // Outputs
953 else if (var.storage == StorageClassOutput)
954 {
955 if (has_decoration(id: type.self, decoration: DecorationBlock))
956 {
957 res.stage_outputs.push_back(
958 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
959 }
960 else
961 res.stage_outputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
962 }
963 // UBOs
964 else if (type.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock))
965 {
966 res.uniform_buffers.push_back(
967 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
968 }
969 // Old way to declare SSBOs.
970 else if (type.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBufferBlock))
971 {
972 res.storage_buffers.push_back(
973 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: ssbo_instance_name) });
974 }
975 // Modern way to declare SSBOs.
976 else if (type.storage == StorageClassStorageBuffer)
977 {
978 res.storage_buffers.push_back(
979 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: ssbo_instance_name) });
980 }
981 // Push constant blocks
982 else if (type.storage == StorageClassPushConstant)
983 {
984 // There can only be one push constant block, but keep the vector in case this restriction is lifted
985 // in the future.
986 res.push_constant_buffers.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
987 }
988 // Images
989 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
990 type.image.sampled == 2)
991 {
992 res.storage_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
993 }
994 // Separate images
995 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
996 type.image.sampled == 1)
997 {
998 res.separate_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
999 }
1000 // Separate samplers
1001 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler)
1002 {
1003 res.separate_samplers.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1004 }
1005 // Textures
1006 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage)
1007 {
1008 res.sampled_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1009 }
1010 // Atomic counters
1011 else if (type.storage == StorageClassAtomicCounter)
1012 {
1013 res.atomic_counters.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1014 }
1015 // Acceleration structures
1016 else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure)
1017 {
1018 res.acceleration_structures.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1019 }
1020 });
1021
1022 return res;
1023}
1024
1025bool Compiler::type_is_block_like(const SPIRType &type) const
1026{
1027 if (type.basetype != SPIRType::Struct)
1028 return false;
1029
1030 if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock))
1031 {
1032 return true;
1033 }
1034
1035 // Block-like types may have Offset decorations.
1036 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1037 if (has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
1038 return true;
1039
1040 return false;
1041}
1042
1043void Compiler::parse_fixup()
1044{
1045 // Figure out specialization constants for work group sizes.
1046 for (auto id_ : ir.ids_for_constant_or_variable)
1047 {
1048 auto &id = ir.ids[id_];
1049
1050 if (id.get_type() == TypeConstant)
1051 {
1052 auto &c = id.get<SPIRConstant>();
1053 if (has_decoration(id: c.self, decoration: DecorationBuiltIn) &&
1054 BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)) == BuiltInWorkgroupSize)
1055 {
1056 // In current SPIR-V, there can be just one constant like this.
1057 // All entry points will receive the constant value.
1058 // WorkgroupSize take precedence over LocalSizeId.
1059 for (auto &entry : ir.entry_points)
1060 {
1061 entry.second.workgroup_size.constant = c.self;
1062 entry.second.workgroup_size.x = c.scalar(col: 0, row: 0);
1063 entry.second.workgroup_size.y = c.scalar(col: 0, row: 1);
1064 entry.second.workgroup_size.z = c.scalar(col: 0, row: 2);
1065 }
1066 }
1067 }
1068 else if (id.get_type() == TypeVariable)
1069 {
1070 auto &var = id.get<SPIRVariable>();
1071 if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
1072 var.storage == StorageClassOutput)
1073 global_variables.push_back(t: var.self);
1074 if (variable_storage_is_aliased(v: var))
1075 aliased_variables.push_back(t: var.self);
1076 }
1077 }
1078}
1079
1080void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
1081 string &name)
1082{
1083 if (name.empty())
1084 return;
1085
1086 const auto find_name = [&](const string &n) -> bool {
1087 if (cache_primary.find(x: n) != end(cont&: cache_primary))
1088 return true;
1089
1090 if (&cache_primary != &cache_secondary)
1091 if (cache_secondary.find(x: n) != end(cont: cache_secondary))
1092 return true;
1093
1094 return false;
1095 };
1096
1097 const auto insert_name = [&](const string &n) { cache_primary.insert(x: n); };
1098
1099 if (!find_name(name))
1100 {
1101 insert_name(name);
1102 return;
1103 }
1104
1105 uint32_t counter = 0;
1106 auto tmpname = name;
1107
1108 bool use_linked_underscore = true;
1109
1110 if (tmpname == "_")
1111 {
1112 // We cannot just append numbers, as we will end up creating internally reserved names.
1113 // Make it like _0_<counter> instead.
1114 tmpname += "0";
1115 }
1116 else if (tmpname.back() == '_')
1117 {
1118 // The last_character is an underscore, so we don't need to link in underscore.
1119 // This would violate double underscore rules.
1120 use_linked_underscore = false;
1121 }
1122
1123 // If there is a collision (very rare),
1124 // keep tacking on extra identifier until it's unique.
1125 do
1126 {
1127 counter++;
1128 name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(t: counter);
1129 } while (find_name(name));
1130 insert_name(name);
1131}
1132
1133void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
1134{
1135 update_name_cache(cache_primary&: cache, cache_secondary: cache, name);
1136}
1137
1138void Compiler::set_name(ID id, const std::string &name)
1139{
1140 ir.set_name(id, name);
1141}
1142
1143const SPIRType &Compiler::get_type(TypeID id) const
1144{
1145 return get<SPIRType>(id);
1146}
1147
1148const SPIRType &Compiler::get_type_from_variable(VariableID id) const
1149{
1150 return get<SPIRType>(id: get<SPIRVariable>(id).basetype);
1151}
1152
1153uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const
1154{
1155 auto *p_type = &get<SPIRType>(id: type_id);
1156 if (p_type->pointer)
1157 {
1158 assert(p_type->parent_type);
1159 type_id = p_type->parent_type;
1160 }
1161 return type_id;
1162}
1163
1164const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const
1165{
1166 auto *p_type = &type;
1167 if (p_type->pointer)
1168 {
1169 assert(p_type->parent_type);
1170 p_type = &get<SPIRType>(id: p_type->parent_type);
1171 }
1172 return *p_type;
1173}
1174
1175const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const
1176{
1177 return get_pointee_type(type: get<SPIRType>(id: type_id));
1178}
1179
1180uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const
1181{
1182 if (var.phi_variable)
1183 return var.basetype;
1184 return get_pointee_type_id(type_id: var.basetype);
1185}
1186
1187SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var)
1188{
1189 return get<SPIRType>(id: get_variable_data_type_id(var));
1190}
1191
1192const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
1193{
1194 return get<SPIRType>(id: get_variable_data_type_id(var));
1195}
1196
1197SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
1198{
1199 SPIRType *type = &get_variable_data_type(var);
1200 if (is_array(type: *type))
1201 type = &get<SPIRType>(id: type->parent_type);
1202 return *type;
1203}
1204
1205const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
1206{
1207 const SPIRType *type = &get_variable_data_type(var);
1208 if (is_array(type: *type))
1209 type = &get<SPIRType>(id: type->parent_type);
1210 return *type;
1211}
1212
1213bool Compiler::is_sampled_image_type(const SPIRType &type)
1214{
1215 return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
1216 type.image.dim != DimBuffer;
1217}
1218
1219void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
1220 const std::string &argument)
1221{
1222 ir.set_member_decoration_string(id, index, decoration, argument);
1223}
1224
1225void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
1226{
1227 ir.set_member_decoration(id, index, decoration, argument);
1228}
1229
1230void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
1231{
1232 ir.set_member_name(id, index, name);
1233}
1234
1235const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
1236{
1237 return ir.get_member_name(id, index);
1238}
1239
1240void Compiler::set_qualified_name(uint32_t id, const string &name)
1241{
1242 ir.meta[id].decoration.qualified_alias = name;
1243}
1244
1245void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name)
1246{
1247 ir.meta[type_id].members.resize(new_size: max(a: ir.meta[type_id].members.size(), b: size_t(index) + 1));
1248 ir.meta[type_id].members[index].qualified_alias = name;
1249}
1250
1251const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
1252{
1253 auto *m = ir.find_meta(id: type_id);
1254 if (m && index < m->members.size())
1255 return m->members[index].qualified_alias;
1256 else
1257 return ir.get_empty_string();
1258}
1259
1260uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1261{
1262 return ir.get_member_decoration(id, index, decoration);
1263}
1264
1265const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
1266{
1267 return ir.get_member_decoration_bitset(id, index);
1268}
1269
1270bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1271{
1272 return ir.has_member_decoration(id, index, decoration);
1273}
1274
1275void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
1276{
1277 ir.unset_member_decoration(id, index, decoration);
1278}
1279
1280void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
1281{
1282 ir.set_decoration_string(id, decoration, argument);
1283}
1284
1285void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
1286{
1287 ir.set_decoration(id, decoration, argument);
1288}
1289
1290void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
1291{
1292 auto &dec = ir.meta[id].decoration;
1293 dec.extended.flags.set(decoration);
1294 dec.extended.values[decoration] = value;
1295}
1296
1297void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
1298 uint32_t value)
1299{
1300 ir.meta[type].members.resize(new_size: max(a: ir.meta[type].members.size(), b: size_t(index) + 1));
1301 auto &dec = ir.meta[type].members[index];
1302 dec.extended.flags.set(decoration);
1303 dec.extended.values[decoration] = value;
1304}
1305
1306static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
1307{
1308 switch (decoration)
1309 {
1310 case SPIRVCrossDecorationResourceIndexPrimary:
1311 case SPIRVCrossDecorationResourceIndexSecondary:
1312 case SPIRVCrossDecorationResourceIndexTertiary:
1313 case SPIRVCrossDecorationResourceIndexQuaternary:
1314 case SPIRVCrossDecorationInterfaceMemberIndex:
1315 return ~(0u);
1316
1317 default:
1318 return 0;
1319 }
1320}
1321
1322uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1323{
1324 auto *m = ir.find_meta(id);
1325 if (!m)
1326 return 0;
1327
1328 auto &dec = m->decoration;
1329
1330 if (!dec.extended.flags.get(bit: decoration))
1331 return get_default_extended_decoration(decoration);
1332
1333 return dec.extended.values[decoration];
1334}
1335
1336uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1337{
1338 auto *m = ir.find_meta(id: type);
1339 if (!m)
1340 return 0;
1341
1342 if (index >= m->members.size())
1343 return 0;
1344
1345 auto &dec = m->members[index];
1346 if (!dec.extended.flags.get(bit: decoration))
1347 return get_default_extended_decoration(decoration);
1348 return dec.extended.values[decoration];
1349}
1350
1351bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1352{
1353 auto *m = ir.find_meta(id);
1354 if (!m)
1355 return false;
1356
1357 auto &dec = m->decoration;
1358 return dec.extended.flags.get(bit: decoration);
1359}
1360
1361bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1362{
1363 auto *m = ir.find_meta(id: type);
1364 if (!m)
1365 return false;
1366
1367 if (index >= m->members.size())
1368 return false;
1369
1370 auto &dec = m->members[index];
1371 return dec.extended.flags.get(bit: decoration);
1372}
1373
1374void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
1375{
1376 auto &dec = ir.meta[id].decoration;
1377 dec.extended.flags.clear(bit: decoration);
1378 dec.extended.values[decoration] = 0;
1379}
1380
1381void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
1382{
1383 ir.meta[type].members.resize(new_size: max(a: ir.meta[type].members.size(), b: size_t(index) + 1));
1384 auto &dec = ir.meta[type].members[index];
1385 dec.extended.flags.clear(bit: decoration);
1386 dec.extended.values[decoration] = 0;
1387}
1388
1389StorageClass Compiler::get_storage_class(VariableID id) const
1390{
1391 return get<SPIRVariable>(id).storage;
1392}
1393
1394const std::string &Compiler::get_name(ID id) const
1395{
1396 return ir.get_name(id);
1397}
1398
1399const std::string Compiler::get_fallback_name(ID id) const
1400{
1401 return join(ts: "_", ts&: id);
1402}
1403
1404const std::string Compiler::get_block_fallback_name(VariableID id) const
1405{
1406 auto &var = get<SPIRVariable>(id);
1407 if (get_name(id).empty())
1408 return join(ts: "_", ts: get<SPIRType>(id: var.basetype).self, ts: "_", ts&: id);
1409 else
1410 return get_name(id);
1411}
1412
1413const Bitset &Compiler::get_decoration_bitset(ID id) const
1414{
1415 return ir.get_decoration_bitset(id);
1416}
1417
1418bool Compiler::has_decoration(ID id, Decoration decoration) const
1419{
1420 return ir.has_decoration(id, decoration);
1421}
1422
1423const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
1424{
1425 return ir.get_decoration_string(id, decoration);
1426}
1427
1428const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
1429{
1430 return ir.get_member_decoration_string(id, index, decoration);
1431}
1432
1433uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
1434{
1435 return ir.get_decoration(id, decoration);
1436}
1437
1438void Compiler::unset_decoration(ID id, Decoration decoration)
1439{
1440 ir.unset_decoration(id, decoration);
1441}
1442
1443bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
1444{
1445 auto *m = ir.find_meta(id);
1446 if (!m)
1447 return false;
1448
1449 auto &word_offsets = m->decoration_word_offset;
1450 auto itr = word_offsets.find(x: decoration);
1451 if (itr == end(cont: word_offsets))
1452 return false;
1453
1454 word_offset = itr->second;
1455 return true;
1456}
1457
1458bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
1459{
1460 // Tried and failed.
1461 if (block.disable_block_optimization || block.complex_continue)
1462 return false;
1463
1464 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
1465 {
1466 // Try to detect common for loop pattern
1467 // which the code backend can use to create cleaner code.
1468 // for(;;) { if (cond) { some_body; } else { break; } }
1469 // is the pattern we're looking for.
1470 const auto *false_block = maybe_get<SPIRBlock>(id: block.false_block);
1471 const auto *true_block = maybe_get<SPIRBlock>(id: block.true_block);
1472 const auto *merge_block = maybe_get<SPIRBlock>(id: block.merge_block);
1473
1474 bool false_block_is_merge = block.false_block == block.merge_block ||
1475 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block));
1476
1477 bool true_block_is_merge = block.true_block == block.merge_block ||
1478 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block));
1479
1480 bool positive_candidate =
1481 block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge;
1482
1483 bool negative_candidate =
1484 block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge;
1485
1486 bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop &&
1487 (positive_candidate || negative_candidate);
1488
1489 if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1490 ret = block.true_block == block.continue_block;
1491 else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1492 ret = block.false_block == block.continue_block;
1493
1494 // If we have OpPhi which depends on branches which came from our own block,
1495 // we need to flush phi variables in else block instead of a trivial break,
1496 // so we cannot assume this is a for loop candidate.
1497 if (ret)
1498 {
1499 for (auto &phi : block.phi_variables)
1500 if (phi.parent == block.self)
1501 return false;
1502
1503 auto *merge = maybe_get<SPIRBlock>(id: block.merge_block);
1504 if (merge)
1505 for (auto &phi : merge->phi_variables)
1506 if (phi.parent == block.self)
1507 return false;
1508 }
1509 return ret;
1510 }
1511 else if (method == SPIRBlock::MergeToDirectForLoop)
1512 {
1513 // Empty loop header that just sets up merge target
1514 // and branches to loop body.
1515 bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty();
1516
1517 if (!ret)
1518 return false;
1519
1520 auto &child = get<SPIRBlock>(id: block.next_block);
1521
1522 const auto *false_block = maybe_get<SPIRBlock>(id: child.false_block);
1523 const auto *true_block = maybe_get<SPIRBlock>(id: child.true_block);
1524 const auto *merge_block = maybe_get<SPIRBlock>(id: block.merge_block);
1525
1526 bool false_block_is_merge = child.false_block == block.merge_block ||
1527 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block));
1528
1529 bool true_block_is_merge = child.true_block == block.merge_block ||
1530 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block));
1531
1532 bool positive_candidate =
1533 child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge;
1534
1535 bool negative_candidate =
1536 child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge;
1537
1538 ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
1539 (positive_candidate || negative_candidate);
1540
1541 // If we have OpPhi which depends on branches which came from our own block,
1542 // we need to flush phi variables in else block instead of a trivial break,
1543 // so we cannot assume this is a for loop candidate.
1544 if (ret)
1545 {
1546 for (auto &phi : block.phi_variables)
1547 if (phi.parent == block.self || phi.parent == child.self)
1548 return false;
1549
1550 for (auto &phi : child.phi_variables)
1551 if (phi.parent == block.self)
1552 return false;
1553
1554 auto *merge = maybe_get<SPIRBlock>(id: block.merge_block);
1555 if (merge)
1556 for (auto &phi : merge->phi_variables)
1557 if (phi.parent == block.self || phi.parent == child.false_block)
1558 return false;
1559 }
1560
1561 return ret;
1562 }
1563 else
1564 return false;
1565}
1566
1567bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
1568{
1569 if (!execution_is_branchless(from, to))
1570 return false;
1571
1572 auto *start = &from;
1573 for (;;)
1574 {
1575 if (start->self == to.self)
1576 return true;
1577
1578 if (!start->ops.empty())
1579 return false;
1580
1581 auto &next = get<SPIRBlock>(id: start->next_block);
1582 // Flushing phi variables does not count as noop.
1583 for (auto &phi : next.phi_variables)
1584 if (phi.parent == start->self)
1585 return false;
1586
1587 start = &next;
1588 }
1589}
1590
1591bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const
1592{
1593 auto *start = &from;
1594 for (;;)
1595 {
1596 if (start->self == to.self)
1597 return true;
1598
1599 if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone)
1600 start = &get<SPIRBlock>(id: start->next_block);
1601 else
1602 return false;
1603 }
1604}
1605
1606bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
1607{
1608 return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
1609}
1610
1611SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
1612{
1613 // The block was deemed too complex during code emit, pick conservative fallback paths.
1614 if (block.complex_continue)
1615 return SPIRBlock::ComplexLoop;
1616
1617 // In older glslang output continue block can be equal to the loop header.
1618 // In this case, execution is clearly branchless, so just assume a while loop header here.
1619 if (block.merge == SPIRBlock::MergeLoop)
1620 return SPIRBlock::WhileLoop;
1621
1622 if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
1623 {
1624 // Continue block is never reached from CFG.
1625 return SPIRBlock::ComplexLoop;
1626 }
1627
1628 auto &dominator = get<SPIRBlock>(id: block.loop_dominator);
1629
1630 if (execution_is_noop(from: block, to: dominator))
1631 return SPIRBlock::WhileLoop;
1632 else if (execution_is_branchless(from: block, to: dominator))
1633 return SPIRBlock::ForLoop;
1634 else
1635 {
1636 const auto *false_block = maybe_get<SPIRBlock>(id: block.false_block);
1637 const auto *true_block = maybe_get<SPIRBlock>(id: block.true_block);
1638 const auto *merge_block = maybe_get<SPIRBlock>(id: dominator.merge_block);
1639
1640 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1641 bool flush_phi_to_false = false_block && flush_phi_required(from: block.self, to: block.false_block);
1642 bool flush_phi_to_true = true_block && flush_phi_required(from: block.self, to: block.true_block);
1643 if (flush_phi_to_false || flush_phi_to_true)
1644 return SPIRBlock::ComplexLoop;
1645
1646 bool positive_do_while = block.true_block == dominator.self &&
1647 (block.false_block == dominator.merge_block ||
1648 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block)));
1649
1650 bool negative_do_while = block.false_block == dominator.self &&
1651 (block.true_block == dominator.merge_block ||
1652 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block)));
1653
1654 if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select &&
1655 (positive_do_while || negative_do_while))
1656 {
1657 return SPIRBlock::DoWhileLoop;
1658 }
1659 else
1660 return SPIRBlock::ComplexLoop;
1661 }
1662}
1663
1664const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &block) const
1665{
1666 uint32_t width = 0;
1667
1668 // First we check if we can get the type directly from the block.condition
1669 // since it can be a SPIRConstant or a SPIRVariable.
1670 if (const auto *constant = maybe_get<SPIRConstant>(id: block.condition))
1671 {
1672 const auto &type = get<SPIRType>(id: constant->constant_type);
1673 width = type.width;
1674 }
1675 else if (const auto *var = maybe_get<SPIRVariable>(id: block.condition))
1676 {
1677 const auto &type = get<SPIRType>(id: var->basetype);
1678 width = type.width;
1679 }
1680 else if (const auto *undef = maybe_get<SPIRUndef>(id: block.condition))
1681 {
1682 const auto &type = get<SPIRType>(id: undef->basetype);
1683 width = type.width;
1684 }
1685 else
1686 {
1687 auto search = ir.load_type_width.find(x: block.condition);
1688 if (search == ir.load_type_width.end())
1689 {
1690 SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement.");
1691 }
1692
1693 width = search->second;
1694 }
1695
1696 if (width > 32)
1697 return block.cases_64bit;
1698
1699 return block.cases_32bit;
1700}
1701
1702bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
1703{
1704 handler.set_current_block(block);
1705 handler.rearm_current_block(block);
1706
1707 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1708 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1709 // inside dead blocks ...
1710 for (auto &i : block.ops)
1711 {
1712 auto ops = stream(instr: i);
1713 auto op = static_cast<Op>(i.op);
1714
1715 if (!handler.handle(opcode: op, args: ops, length: i.length))
1716 return false;
1717
1718 if (op == OpFunctionCall)
1719 {
1720 auto &func = get<SPIRFunction>(id: ops[2]);
1721 if (handler.follow_function_call(func))
1722 {
1723 if (!handler.begin_function_scope(ops, i.length))
1724 return false;
1725 if (!traverse_all_reachable_opcodes(block: get<SPIRFunction>(id: ops[2]), handler))
1726 return false;
1727 if (!handler.end_function_scope(ops, i.length))
1728 return false;
1729
1730 handler.rearm_current_block(block);
1731 }
1732 }
1733 }
1734
1735 if (!handler.handle_terminator(block))
1736 return false;
1737
1738 return true;
1739}
1740
1741bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const
1742{
1743 for (auto block : func.blocks)
1744 if (!traverse_all_reachable_opcodes(block: get<SPIRBlock>(id: block), handler))
1745 return false;
1746
1747 return true;
1748}
1749
1750uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const
1751{
1752 auto *type_meta = ir.find_meta(id: type.self);
1753 if (type_meta)
1754 {
1755 // Decoration must be set in valid SPIR-V, otherwise throw.
1756 auto &dec = type_meta->members[index];
1757 if (dec.decoration_flags.get(bit: DecorationOffset))
1758 return dec.offset;
1759 else
1760 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1761 }
1762 else
1763 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1764}
1765
1766uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const
1767{
1768 auto *type_meta = ir.find_meta(id: type.member_types[index]);
1769 if (type_meta)
1770 {
1771 // Decoration must be set in valid SPIR-V, otherwise throw.
1772 // ArrayStride is part of the array type not OpMemberDecorate.
1773 auto &dec = type_meta->decoration;
1774 if (dec.decoration_flags.get(bit: DecorationArrayStride))
1775 return dec.array_stride;
1776 else
1777 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1778 }
1779 else
1780 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1781}
1782
1783uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
1784{
1785 auto *type_meta = ir.find_meta(id: type.self);
1786 if (type_meta)
1787 {
1788 // Decoration must be set in valid SPIR-V, otherwise throw.
1789 // MatrixStride is part of OpMemberDecorate.
1790 auto &dec = type_meta->members[index];
1791 if (dec.decoration_flags.get(bit: DecorationMatrixStride))
1792 return dec.matrix_stride;
1793 else
1794 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1795 }
1796 else
1797 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1798}
1799
1800size_t Compiler::get_declared_struct_size(const SPIRType &type) const
1801{
1802 if (type.member_types.empty())
1803 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1804
1805 // Offsets can be declared out of order, so we need to deduce the actual size
1806 // based on last member instead.
1807 uint32_t member_index = 0;
1808 size_t highest_offset = 0;
1809 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1810 {
1811 size_t offset = type_struct_member_offset(type, index: i);
1812 if (offset > highest_offset)
1813 {
1814 highest_offset = offset;
1815 member_index = i;
1816 }
1817 }
1818
1819 size_t size = get_declared_struct_member_size(struct_type: type, index: member_index);
1820 return highest_offset + size;
1821}
1822
1823size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
1824{
1825 if (type.member_types.empty())
1826 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1827
1828 size_t size = get_declared_struct_size(type);
1829 auto &last_type = get<SPIRType>(id: type.member_types.back());
1830 if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array
1831 size += array_size * type_struct_member_array_stride(type, index: uint32_t(type.member_types.size() - 1));
1832
1833 return size;
1834}
1835
1836uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
1837{
1838 auto &result_type = get<SPIRType>(id: spec.basetype);
1839 if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
1840 result_type.basetype != SPIRType::Boolean)
1841 {
1842 SPIRV_CROSS_THROW(
1843 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
1844 }
1845
1846 if (!is_scalar(type: result_type))
1847 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1848
1849 uint32_t value = 0;
1850
1851 const auto eval_u32 = [&](uint32_t id) -> uint32_t {
1852 auto &type = expression_type(id);
1853 if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
1854 {
1855 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
1856 "specialization constants.\n");
1857 }
1858
1859 if (!is_scalar(type))
1860 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
1861 if (const auto *c = this->maybe_get<SPIRConstant>(id))
1862 return c->scalar();
1863 else
1864 return evaluate_spec_constant_u32(spec: this->get<SPIRConstantOp>(id));
1865 };
1866
1867#define binary_spec_op(op, binary_op) \
1868 case Op##op: \
1869 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
1870 break
1871#define binary_spec_op_cast(op, binary_op, type) \
1872 case Op##op: \
1873 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
1874 break
1875
1876 // Support the basic opcodes which are typically used when computing array sizes.
1877 switch (spec.opcode)
1878 {
1879 binary_spec_op(IAdd, +);
1880 binary_spec_op(ISub, -);
1881 binary_spec_op(IMul, *);
1882 binary_spec_op(BitwiseAnd, &);
1883 binary_spec_op(BitwiseOr, |);
1884 binary_spec_op(BitwiseXor, ^);
1885 binary_spec_op(LogicalAnd, &);
1886 binary_spec_op(LogicalOr, |);
1887 binary_spec_op(ShiftLeftLogical, <<);
1888 binary_spec_op(ShiftRightLogical, >>);
1889 binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
1890 binary_spec_op(LogicalEqual, ==);
1891 binary_spec_op(LogicalNotEqual, !=);
1892 binary_spec_op(IEqual, ==);
1893 binary_spec_op(INotEqual, !=);
1894 binary_spec_op(ULessThan, <);
1895 binary_spec_op(ULessThanEqual, <=);
1896 binary_spec_op(UGreaterThan, >);
1897 binary_spec_op(UGreaterThanEqual, >=);
1898 binary_spec_op_cast(SLessThan, <, int32_t);
1899 binary_spec_op_cast(SLessThanEqual, <=, int32_t);
1900 binary_spec_op_cast(SGreaterThan, >, int32_t);
1901 binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
1902#undef binary_spec_op
1903#undef binary_spec_op_cast
1904
1905 case OpLogicalNot:
1906 value = uint32_t(!eval_u32(spec.arguments[0]));
1907 break;
1908
1909 case OpNot:
1910 value = ~eval_u32(spec.arguments[0]);
1911 break;
1912
1913 case OpSNegate:
1914 value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
1915 break;
1916
1917 case OpSelect:
1918 value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
1919 break;
1920
1921 case OpUMod:
1922 {
1923 uint32_t a = eval_u32(spec.arguments[0]);
1924 uint32_t b = eval_u32(spec.arguments[1]);
1925 if (b == 0)
1926 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
1927 value = a % b;
1928 break;
1929 }
1930
1931 case OpSRem:
1932 {
1933 auto a = int32_t(eval_u32(spec.arguments[0]));
1934 auto b = int32_t(eval_u32(spec.arguments[1]));
1935 if (b == 0)
1936 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
1937 value = a % b;
1938 break;
1939 }
1940
1941 case OpSMod:
1942 {
1943 auto a = int32_t(eval_u32(spec.arguments[0]));
1944 auto b = int32_t(eval_u32(spec.arguments[1]));
1945 if (b == 0)
1946 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
1947 auto v = a % b;
1948
1949 // Makes sure we match the sign of b, not a.
1950 if ((b < 0 && v > 0) || (b > 0 && v < 0))
1951 v += b;
1952 value = v;
1953 break;
1954 }
1955
1956 case OpUDiv:
1957 {
1958 uint32_t a = eval_u32(spec.arguments[0]);
1959 uint32_t b = eval_u32(spec.arguments[1]);
1960 if (b == 0)
1961 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
1962 value = a / b;
1963 break;
1964 }
1965
1966 case OpSDiv:
1967 {
1968 auto a = int32_t(eval_u32(spec.arguments[0]));
1969 auto b = int32_t(eval_u32(spec.arguments[1]));
1970 if (b == 0)
1971 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
1972 value = a / b;
1973 break;
1974 }
1975
1976 default:
1977 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
1978 }
1979
1980 return value;
1981}
1982
1983uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
1984{
1985 if (const auto *c = maybe_get<SPIRConstant>(id))
1986 return c->scalar();
1987 else
1988 return evaluate_spec_constant_u32(spec: get<SPIRConstantOp>(id));
1989}
1990
1991size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
1992{
1993 if (struct_type.member_types.empty())
1994 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1995
1996 auto &flags = get_member_decoration_bitset(id: struct_type.self, index);
1997 auto &type = get<SPIRType>(id: struct_type.member_types[index]);
1998
1999 switch (type.basetype)
2000 {
2001 case SPIRType::Unknown:
2002 case SPIRType::Void:
2003 case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types.
2004 case SPIRType::AtomicCounter:
2005 case SPIRType::Image:
2006 case SPIRType::SampledImage:
2007 case SPIRType::Sampler:
2008 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
2009
2010 default:
2011 break;
2012 }
2013
2014 if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
2015 {
2016 // Check if this is a top-level pointer type, and not an array of pointers.
2017 if (type.pointer_depth > get<SPIRType>(id: type.parent_type).pointer_depth)
2018 return 8;
2019 }
2020
2021 if (!type.array.empty())
2022 {
2023 // For arrays, we can use ArrayStride to get an easy check.
2024 bool array_size_literal = type.array_size_literal.back();
2025 uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(id: type.array.back());
2026 return type_struct_member_array_stride(type: struct_type, index) * array_size;
2027 }
2028 else if (type.basetype == SPIRType::Struct)
2029 {
2030 return get_declared_struct_size(type);
2031 }
2032 else
2033 {
2034 unsigned vecsize = type.vecsize;
2035 unsigned columns = type.columns;
2036
2037 // Vectors.
2038 if (columns == 1)
2039 {
2040 size_t component_size = type.width / 8;
2041 return vecsize * component_size;
2042 }
2043 else
2044 {
2045 uint32_t matrix_stride = type_struct_member_matrix_stride(type: struct_type, index);
2046
2047 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
2048 if (flags.get(bit: DecorationRowMajor))
2049 return matrix_stride * vecsize;
2050 else if (flags.get(bit: DecorationColMajor))
2051 return matrix_stride * columns;
2052 else
2053 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
2054 }
2055 }
2056}
2057
2058bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2059{
2060 if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain)
2061 return true;
2062
2063 bool ptr_chain = (opcode == OpPtrAccessChain);
2064
2065 // Invalid SPIR-V.
2066 if (length < (ptr_chain ? 5u : 4u))
2067 return false;
2068
2069 if (args[2] != id)
2070 return true;
2071
2072 // Don't bother traversing the entire access chain tree yet.
2073 // If we access a struct member, assume we access the entire member.
2074 uint32_t index = compiler.get<SPIRConstant>(id: args[ptr_chain ? 4 : 3]).scalar();
2075
2076 // Seen this index already.
2077 if (seen.find(x: index) != end(cont&: seen))
2078 return true;
2079 seen.insert(x: index);
2080
2081 auto &type = compiler.expression_type(id);
2082 uint32_t offset = compiler.type_struct_member_offset(type, index);
2083
2084 size_t range;
2085 // If we have another member in the struct, deduce the range by looking at the next member.
2086 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
2087 // monotonically increasing.
2088 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
2089 // very large amounts of padding, but that's not really a big deal.
2090 if (index + 1 < type.member_types.size())
2091 {
2092 range = compiler.type_struct_member_offset(type, index: index + 1) - offset;
2093 }
2094 else
2095 {
2096 // No padding, so just deduce it from the size of the member directly.
2097 range = compiler.get_declared_struct_member_size(struct_type: type, index);
2098 }
2099
2100 ranges.push_back(t: { .index: index, .offset: offset, .range: range });
2101 return true;
2102}
2103
2104SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
2105{
2106 SmallVector<BufferRange> ranges;
2107 BufferAccessHandler handler(*this, ranges, id);
2108 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
2109 return ranges;
2110}
2111
2112bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const
2113{
2114 if (a.basetype != b.basetype)
2115 return false;
2116 if (a.width != b.width)
2117 return false;
2118 if (a.vecsize != b.vecsize)
2119 return false;
2120 if (a.columns != b.columns)
2121 return false;
2122 if (a.array.size() != b.array.size())
2123 return false;
2124
2125 size_t array_count = a.array.size();
2126 if (array_count && memcmp(s1: a.array.data(), s2: b.array.data(), n: array_count * sizeof(uint32_t)) != 0)
2127 return false;
2128
2129 if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage)
2130 {
2131 if (memcmp(s1: &a.image, s2: &b.image, n: sizeof(SPIRType::Image)) != 0)
2132 return false;
2133 }
2134
2135 if (a.member_types.size() != b.member_types.size())
2136 return false;
2137
2138 size_t member_types = a.member_types.size();
2139 for (size_t i = 0; i < member_types; i++)
2140 {
2141 if (!types_are_logically_equivalent(a: get<SPIRType>(id: a.member_types[i]), b: get<SPIRType>(id: b.member_types[i])))
2142 return false;
2143 }
2144
2145 return true;
2146}
2147
2148const Bitset &Compiler::get_execution_mode_bitset() const
2149{
2150 return get_entry_point().flags;
2151}
2152
2153void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2)
2154{
2155 auto &execution = get_entry_point();
2156
2157 execution.flags.set(mode);
2158 switch (mode)
2159 {
2160 case ExecutionModeLocalSize:
2161 execution.workgroup_size.x = arg0;
2162 execution.workgroup_size.y = arg1;
2163 execution.workgroup_size.z = arg2;
2164 break;
2165
2166 case ExecutionModeLocalSizeId:
2167 execution.workgroup_size.id_x = arg0;
2168 execution.workgroup_size.id_y = arg1;
2169 execution.workgroup_size.id_z = arg2;
2170 break;
2171
2172 case ExecutionModeInvocations:
2173 execution.invocations = arg0;
2174 break;
2175
2176 case ExecutionModeOutputVertices:
2177 execution.output_vertices = arg0;
2178 break;
2179
2180 default:
2181 break;
2182 }
2183}
2184
2185void Compiler::unset_execution_mode(ExecutionMode mode)
2186{
2187 auto &execution = get_entry_point();
2188 execution.flags.clear(bit: mode);
2189}
2190
2191uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
2192 SpecializationConstant &z) const
2193{
2194 auto &execution = get_entry_point();
2195 x = { .id: 0, .constant_id: 0 };
2196 y = { .id: 0, .constant_id: 0 };
2197 z = { .id: 0, .constant_id: 0 };
2198
2199 // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId.
2200 if (execution.workgroup_size.constant != 0)
2201 {
2202 auto &c = get<SPIRConstant>(id: execution.workgroup_size.constant);
2203
2204 if (c.m.c[0].id[0] != ID(0))
2205 {
2206 x.id = c.m.c[0].id[0];
2207 x.constant_id = get_decoration(id: c.m.c[0].id[0], decoration: DecorationSpecId);
2208 }
2209
2210 if (c.m.c[0].id[1] != ID(0))
2211 {
2212 y.id = c.m.c[0].id[1];
2213 y.constant_id = get_decoration(id: c.m.c[0].id[1], decoration: DecorationSpecId);
2214 }
2215
2216 if (c.m.c[0].id[2] != ID(0))
2217 {
2218 z.id = c.m.c[0].id[2];
2219 z.constant_id = get_decoration(id: c.m.c[0].id[2], decoration: DecorationSpecId);
2220 }
2221 }
2222 else if (execution.flags.get(bit: ExecutionModeLocalSizeId))
2223 {
2224 auto &cx = get<SPIRConstant>(id: execution.workgroup_size.id_x);
2225 if (cx.specialization)
2226 {
2227 x.id = execution.workgroup_size.id_x;
2228 x.constant_id = get_decoration(id: execution.workgroup_size.id_x, decoration: DecorationSpecId);
2229 }
2230
2231 auto &cy = get<SPIRConstant>(id: execution.workgroup_size.id_y);
2232 if (cy.specialization)
2233 {
2234 y.id = execution.workgroup_size.id_y;
2235 y.constant_id = get_decoration(id: execution.workgroup_size.id_y, decoration: DecorationSpecId);
2236 }
2237
2238 auto &cz = get<SPIRConstant>(id: execution.workgroup_size.id_z);
2239 if (cz.specialization)
2240 {
2241 z.id = execution.workgroup_size.id_z;
2242 z.constant_id = get_decoration(id: execution.workgroup_size.id_z, decoration: DecorationSpecId);
2243 }
2244 }
2245
2246 return execution.workgroup_size.constant;
2247}
2248
2249uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const
2250{
2251 auto &execution = get_entry_point();
2252 switch (mode)
2253 {
2254 case ExecutionModeLocalSizeId:
2255 if (execution.flags.get(bit: ExecutionModeLocalSizeId))
2256 {
2257 switch (index)
2258 {
2259 case 0:
2260 return execution.workgroup_size.id_x;
2261 case 1:
2262 return execution.workgroup_size.id_y;
2263 case 2:
2264 return execution.workgroup_size.id_z;
2265 default:
2266 return 0;
2267 }
2268 }
2269 else
2270 return 0;
2271
2272 case ExecutionModeLocalSize:
2273 switch (index)
2274 {
2275 case 0:
2276 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0)
2277 return get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar();
2278 else
2279 return execution.workgroup_size.x;
2280 case 1:
2281 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0)
2282 return get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar();
2283 else
2284 return execution.workgroup_size.y;
2285 case 2:
2286 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0)
2287 return get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar();
2288 else
2289 return execution.workgroup_size.z;
2290 default:
2291 return 0;
2292 }
2293
2294 case ExecutionModeInvocations:
2295 return execution.invocations;
2296
2297 case ExecutionModeOutputVertices:
2298 return execution.output_vertices;
2299
2300 default:
2301 return 0;
2302 }
2303}
2304
2305ExecutionModel Compiler::get_execution_model() const
2306{
2307 auto &execution = get_entry_point();
2308 return execution.model;
2309}
2310
2311bool Compiler::is_tessellation_shader(ExecutionModel model)
2312{
2313 return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2314}
2315
2316bool Compiler::is_vertex_like_shader() const
2317{
2318 auto model = get_execution_model();
2319 return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
2320 model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2321}
2322
2323bool Compiler::is_tessellation_shader() const
2324{
2325 return is_tessellation_shader(model: get_execution_model());
2326}
2327
2328void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
2329{
2330 get<SPIRVariable>(id).remapped_variable = remap_enable;
2331}
2332
2333bool Compiler::get_remapped_variable_state(VariableID id) const
2334{
2335 return get<SPIRVariable>(id).remapped_variable;
2336}
2337
2338void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
2339{
2340 get<SPIRVariable>(id).remapped_components = components;
2341}
2342
2343uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
2344{
2345 return get<SPIRVariable>(id).remapped_components;
2346}
2347
2348void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
2349{
2350 auto itr = find(first: begin(cont&: e.implied_read_expressions), last: end(cont&: e.implied_read_expressions), val: ID(source));
2351 if (itr == end(cont&: e.implied_read_expressions))
2352 e.implied_read_expressions.push_back(t: source);
2353}
2354
2355void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
2356{
2357 auto itr = find(first: begin(cont&: e.implied_read_expressions), last: end(cont&: e.implied_read_expressions), val: ID(source));
2358 if (itr == end(cont&: e.implied_read_expressions))
2359 e.implied_read_expressions.push_back(t: source);
2360}
2361
2362void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
2363{
2364 // Don't inherit any expression dependencies if the expression in dst
2365 // is not a forwarded temporary.
2366 if (forwarded_temporaries.find(x: dst) == end(cont&: forwarded_temporaries) ||
2367 forced_temporaries.find(x: dst) != end(cont&: forced_temporaries))
2368 {
2369 return;
2370 }
2371
2372 auto &e = get<SPIRExpression>(id: dst);
2373 auto *phi = maybe_get<SPIRVariable>(id: source_expression);
2374 if (phi && phi->phi_variable)
2375 {
2376 // We have used a phi variable, which can change at the end of the block,
2377 // so make sure we take a dependency on this phi variable.
2378 phi->dependees.push_back(t: dst);
2379 }
2380
2381 auto *s = maybe_get<SPIRExpression>(id: source_expression);
2382 if (!s)
2383 return;
2384
2385 auto &e_deps = e.expression_dependencies;
2386 auto &s_deps = s->expression_dependencies;
2387
2388 // If we depend on a expression, we also depend on all sub-dependencies from source.
2389 e_deps.push_back(t: source_expression);
2390 e_deps.insert(itr: end(cont&: e_deps), insert_begin: begin(cont&: s_deps), insert_end: end(cont&: s_deps));
2391
2392 // Eliminate duplicated dependencies.
2393 sort(first: begin(cont&: e_deps), last: end(cont&: e_deps));
2394 e_deps.erase(start_erase: unique(first: begin(cont&: e_deps), last: end(cont&: e_deps)), end_erase: end(cont&: e_deps));
2395}
2396
2397SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const
2398{
2399 SmallVector<EntryPoint> entries;
2400 for (auto &entry : ir.entry_points)
2401 entries.push_back(t: { .name: entry.second.orig_name, .execution_model: entry.second.model });
2402 return entries;
2403}
2404
2405void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
2406{
2407 auto &entry = get_entry_point(name: old_name, execution_model: model);
2408 entry.orig_name = new_name;
2409 entry.name = new_name;
2410}
2411
2412void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
2413{
2414 auto &entry = get_entry_point(name, execution_model: model);
2415 ir.default_entry_point = entry.self;
2416}
2417
2418SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
2419{
2420 auto itr = find_if(
2421 first: begin(cont&: ir.entry_points), last: end(cont&: ir.entry_points),
2422 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2423
2424 if (itr == end(cont&: ir.entry_points))
2425 SPIRV_CROSS_THROW("Entry point does not exist.");
2426
2427 return itr->second;
2428}
2429
2430const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
2431{
2432 auto itr = find_if(
2433 first: begin(cont: ir.entry_points), last: end(cont: ir.entry_points),
2434 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2435
2436 if (itr == end(cont: ir.entry_points))
2437 SPIRV_CROSS_THROW("Entry point does not exist.");
2438
2439 return itr->second;
2440}
2441
2442SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
2443{
2444 auto itr = find_if(first: begin(cont&: ir.entry_points), last: end(cont&: ir.entry_points),
2445 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2446 return entry.second.orig_name == name && entry.second.model == model;
2447 });
2448
2449 if (itr == end(cont&: ir.entry_points))
2450 SPIRV_CROSS_THROW("Entry point does not exist.");
2451
2452 return itr->second;
2453}
2454
2455const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
2456{
2457 auto itr = find_if(first: begin(cont: ir.entry_points), last: end(cont: ir.entry_points),
2458 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2459 return entry.second.orig_name == name && entry.second.model == model;
2460 });
2461
2462 if (itr == end(cont: ir.entry_points))
2463 SPIRV_CROSS_THROW("Entry point does not exist.");
2464
2465 return itr->second;
2466}
2467
2468const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
2469{
2470 return get_entry_point(name, model).name;
2471}
2472
2473const SPIREntryPoint &Compiler::get_entry_point() const
2474{
2475 return ir.entry_points.find(x: ir.default_entry_point)->second;
2476}
2477
2478SPIREntryPoint &Compiler::get_entry_point()
2479{
2480 return ir.entry_points.find(x: ir.default_entry_point)->second;
2481}
2482
2483bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
2484{
2485 auto &var = get<SPIRVariable>(id);
2486
2487 if (ir.get_spirv_version() < 0x10400)
2488 {
2489 if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
2490 var.storage != StorageClassUniformConstant)
2491 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2492
2493 // This is to avoid potential problems with very old glslang versions which did
2494 // not emit input/output interfaces properly.
2495 // We can assume they only had a single entry point, and single entry point
2496 // shaders could easily be assumed to use every interface variable anyways.
2497 if (ir.entry_points.size() <= 1)
2498 return true;
2499 }
2500
2501 // In SPIR-V 1.4 and later, all global resource variables must be present.
2502
2503 auto &execution = get_entry_point();
2504 return find(first: begin(cont: execution.interface_variables), last: end(cont: execution.interface_variables), val: VariableID(id)) !=
2505 end(cont: execution.interface_variables);
2506}
2507
2508void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args,
2509 uint32_t length)
2510{
2511 // If possible, pipe through a remapping table so that parameters know
2512 // which variables they actually bind to in this scope.
2513 unordered_map<uint32_t, uint32_t> remapping;
2514 for (uint32_t i = 0; i < length; i++)
2515 remapping[func.arguments[i].id] = remap_parameter(id: args[i]);
2516 parameter_remapping.push(x: std::move(remapping));
2517}
2518
2519void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2520{
2521 parameter_remapping.pop();
2522}
2523
2524uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id)
2525{
2526 auto *var = compiler.maybe_get_backing_variable(chain: id);
2527 if (var)
2528 id = var->self;
2529
2530 if (parameter_remapping.empty())
2531 return id;
2532
2533 auto &remapping = parameter_remapping.top();
2534 auto itr = remapping.find(x: id);
2535 if (itr != end(cont&: remapping))
2536 return itr->second;
2537 else
2538 return id;
2539}
2540
2541bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length)
2542{
2543 if (length < 3)
2544 return false;
2545
2546 auto &callee = compiler.get<SPIRFunction>(id: args[2]);
2547 args += 3;
2548 length -= 3;
2549 push_remap_parameters(func: callee, args, length);
2550 functions.push(x: &callee);
2551 return true;
2552}
2553
2554bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length)
2555{
2556 if (length < 3)
2557 return false;
2558
2559 auto &callee = compiler.get<SPIRFunction>(id: args[2]);
2560 args += 3;
2561
2562 // There are two types of cases we have to handle,
2563 // a callee might call sampler2D(texture2D, sampler) directly where
2564 // one or more parameters originate from parameters.
2565 // Alternatively, we need to provide combined image samplers to our callees,
2566 // and in this case we need to add those as well.
2567
2568 pop_remap_parameters();
2569
2570 // Our callee has now been processed at least once.
2571 // No point in doing it again.
2572 callee.do_combined_parameters = false;
2573
2574 auto &params = functions.top()->combined_parameters;
2575 functions.pop();
2576 if (functions.empty())
2577 return true;
2578
2579 auto &caller = *functions.top();
2580 if (caller.do_combined_parameters)
2581 {
2582 for (auto &param : params)
2583 {
2584 VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
2585 VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
2586
2587 auto *i = compiler.maybe_get_backing_variable(chain: image_id);
2588 auto *s = compiler.maybe_get_backing_variable(chain: sampler_id);
2589 if (i)
2590 image_id = i->self;
2591 if (s)
2592 sampler_id = s->self;
2593
2594 register_combined_image_sampler(caller, combined_id: 0, texture_id: image_id, sampler_id, depth: param.depth);
2595 }
2596 }
2597
2598 return true;
2599}
2600
2601void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
2602 VariableID combined_module_id,
2603 VariableID image_id, VariableID sampler_id,
2604 bool depth)
2605{
2606 // We now have a texture ID and a sampler ID which will either be found as a global
2607 // or a parameter in our own function. If both are global, they will not need a parameter,
2608 // otherwise, add it to our list.
2609 SPIRFunction::CombinedImageSamplerParameter param = {
2610 .id: 0u, .image_id: image_id, .sampler_id: sampler_id, .global_image: true, .global_sampler: true, .depth: depth,
2611 };
2612
2613 auto texture_itr = find_if(first: begin(cont&: caller.arguments), last: end(cont&: caller.arguments),
2614 pred: [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; });
2615 auto sampler_itr = find_if(first: begin(cont&: caller.arguments), last: end(cont&: caller.arguments),
2616 pred: [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; });
2617
2618 if (texture_itr != end(cont&: caller.arguments))
2619 {
2620 param.global_image = false;
2621 param.image_id = uint32_t(texture_itr - begin(cont&: caller.arguments));
2622 }
2623
2624 if (sampler_itr != end(cont&: caller.arguments))
2625 {
2626 param.global_sampler = false;
2627 param.sampler_id = uint32_t(sampler_itr - begin(cont&: caller.arguments));
2628 }
2629
2630 if (param.global_image && param.global_sampler)
2631 return;
2632
2633 auto itr = find_if(first: begin(cont&: caller.combined_parameters), last: end(cont&: caller.combined_parameters),
2634 pred: [&param](const SPIRFunction::CombinedImageSamplerParameter &p) {
2635 return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
2636 param.global_image == p.global_image && param.global_sampler == p.global_sampler;
2637 });
2638
2639 if (itr == end(cont&: caller.combined_parameters))
2640 {
2641 uint32_t id = compiler.ir.increase_bound_by(count: 3);
2642 auto type_id = id + 0;
2643 auto ptr_type_id = id + 1;
2644 auto combined_id = id + 2;
2645 auto &base = compiler.expression_type(id: image_id);
2646 auto &type = compiler.set<SPIRType>(type_id);
2647 auto &ptr_type = compiler.set<SPIRType>(ptr_type_id);
2648
2649 type = base;
2650 type.self = type_id;
2651 type.basetype = SPIRType::SampledImage;
2652 type.pointer = false;
2653 type.storage = StorageClassGeneric;
2654 type.image.depth = depth;
2655
2656 ptr_type = type;
2657 ptr_type.pointer = true;
2658 ptr_type.storage = StorageClassUniformConstant;
2659 ptr_type.parent_type = type_id;
2660
2661 // Build new variable.
2662 compiler.set<SPIRVariable>(id: combined_id, args&: ptr_type_id, args: StorageClassFunction, args: 0);
2663
2664 // Inherit RelaxedPrecision.
2665 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2666 bool relaxed_precision =
2667 compiler.has_decoration(id: sampler_id, decoration: DecorationRelaxedPrecision) ||
2668 compiler.has_decoration(id: image_id, decoration: DecorationRelaxedPrecision) ||
2669 (combined_module_id && compiler.has_decoration(id: combined_module_id, decoration: DecorationRelaxedPrecision));
2670
2671 if (relaxed_precision)
2672 compiler.set_decoration(id: combined_id, decoration: DecorationRelaxedPrecision);
2673
2674 param.id = combined_id;
2675
2676 compiler.set_name(id: combined_id,
2677 name: join(ts: "SPIRV_Cross_Combined", ts: compiler.to_name(id: image_id), ts: compiler.to_name(id: sampler_id)));
2678
2679 caller.combined_parameters.push_back(t: param);
2680 caller.shadow_arguments.push_back(t: { .type: ptr_type_id, .id: combined_id, .read_count: 0u, .write_count: 0u, .alias_global_variable: true });
2681 }
2682}
2683
2684bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2685{
2686 if (need_dummy_sampler)
2687 {
2688 // No need to traverse further, we know the result.
2689 return false;
2690 }
2691
2692 switch (opcode)
2693 {
2694 case OpLoad:
2695 {
2696 if (length < 3)
2697 return false;
2698
2699 uint32_t result_type = args[0];
2700
2701 auto &type = compiler.get<SPIRType>(id: result_type);
2702 bool separate_image =
2703 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2704
2705 // If not separate image, don't bother.
2706 if (!separate_image)
2707 return true;
2708
2709 uint32_t id = args[1];
2710 uint32_t ptr = args[2];
2711 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2712 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2713 break;
2714 }
2715
2716 case OpImageFetch:
2717 case OpImageQuerySizeLod:
2718 case OpImageQuerySize:
2719 case OpImageQueryLevels:
2720 case OpImageQuerySamples:
2721 {
2722 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2723 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
2724 if (var)
2725 {
2726 auto &type = compiler.get<SPIRType>(id: var->basetype);
2727 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2728 need_dummy_sampler = true;
2729 }
2730
2731 break;
2732 }
2733
2734 case OpInBoundsAccessChain:
2735 case OpAccessChain:
2736 case OpPtrAccessChain:
2737 {
2738 if (length < 3)
2739 return false;
2740
2741 uint32_t result_type = args[0];
2742 auto &type = compiler.get<SPIRType>(id: result_type);
2743 bool separate_image =
2744 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2745 if (!separate_image)
2746 return true;
2747
2748 uint32_t id = args[1];
2749 uint32_t ptr = args[2];
2750 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2751 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2752
2753 // Other backends might use SPIRAccessChain for this later.
2754 compiler.ir.ids[id].set_allow_type_rewrite();
2755 break;
2756 }
2757
2758 default:
2759 break;
2760 }
2761
2762 return true;
2763}
2764
2765bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2766{
2767 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2768 bool is_fetch = false;
2769
2770 switch (opcode)
2771 {
2772 case OpLoad:
2773 {
2774 if (length < 3)
2775 return false;
2776
2777 uint32_t result_type = args[0];
2778
2779 auto &type = compiler.get<SPIRType>(id: result_type);
2780 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2781 bool separate_sampler = type.basetype == SPIRType::Sampler;
2782
2783 // If not separate image or sampler, don't bother.
2784 if (!separate_image && !separate_sampler)
2785 return true;
2786
2787 uint32_t id = args[1];
2788 uint32_t ptr = args[2];
2789 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2790 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2791 return true;
2792 }
2793
2794 case OpInBoundsAccessChain:
2795 case OpAccessChain:
2796 case OpPtrAccessChain:
2797 {
2798 if (length < 3)
2799 return false;
2800
2801 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
2802 // impossible to implement, since we don't know which concrete sampler we are accessing.
2803 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
2804 // but this seems ridiculously complicated for a problem which is easy to work around.
2805 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
2806
2807 uint32_t result_type = args[0];
2808
2809 auto &type = compiler.get<SPIRType>(id: result_type);
2810 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2811 bool separate_sampler = type.basetype == SPIRType::Sampler;
2812 if (separate_sampler)
2813 SPIRV_CROSS_THROW(
2814 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
2815 "remap to plain GLSL.");
2816
2817 if (separate_image)
2818 {
2819 uint32_t id = args[1];
2820 uint32_t ptr = args[2];
2821 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2822 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2823 }
2824 return true;
2825 }
2826
2827 case OpImageFetch:
2828 case OpImageQuerySizeLod:
2829 case OpImageQuerySize:
2830 case OpImageQueryLevels:
2831 case OpImageQuerySamples:
2832 {
2833 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
2834 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
2835 if (!var)
2836 return true;
2837
2838 auto &type = compiler.get<SPIRType>(id: var->basetype);
2839 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2840 {
2841 if (compiler.dummy_sampler_id == 0)
2842 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
2843 "build_dummy_sampler_for_combined_images().");
2844
2845 // Do it outside.
2846 is_fetch = true;
2847 break;
2848 }
2849
2850 return true;
2851 }
2852
2853 case OpSampledImage:
2854 // Do it outside.
2855 break;
2856
2857 default:
2858 return true;
2859 }
2860
2861 // Registers sampler2D calls used in case they are parameters so
2862 // that their callees know which combined image samplers to propagate down the call stack.
2863 if (!functions.empty())
2864 {
2865 auto &callee = *functions.top();
2866 if (callee.do_combined_parameters)
2867 {
2868 uint32_t image_id = args[2];
2869
2870 auto *image = compiler.maybe_get_backing_variable(chain: image_id);
2871 if (image)
2872 image_id = image->self;
2873
2874 uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
2875 auto *sampler = compiler.maybe_get_backing_variable(chain: sampler_id);
2876 if (sampler)
2877 sampler_id = sampler->self;
2878
2879 uint32_t combined_id = args[1];
2880
2881 auto &combined_type = compiler.get<SPIRType>(id: args[0]);
2882 register_combined_image_sampler(caller&: callee, combined_module_id: combined_id, image_id, sampler_id, depth: combined_type.image.depth);
2883 }
2884 }
2885
2886 // For function calls, we need to remap IDs which are function parameters into global variables.
2887 // This information is statically known from the current place in the call stack.
2888 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
2889 // which backing variable the image/sample came from.
2890 VariableID image_id = remap_parameter(id: args[2]);
2891 VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(id: args[3]);
2892
2893 auto itr = find_if(first: begin(cont&: compiler.combined_image_samplers), last: end(cont&: compiler.combined_image_samplers),
2894 pred: [image_id, sampler_id](const CombinedImageSampler &combined) {
2895 return combined.image_id == image_id && combined.sampler_id == sampler_id;
2896 });
2897
2898 if (itr == end(cont&: compiler.combined_image_samplers))
2899 {
2900 uint32_t sampled_type;
2901 uint32_t combined_module_id;
2902 if (is_fetch)
2903 {
2904 // Have to invent the sampled image type.
2905 sampled_type = compiler.ir.increase_bound_by(count: 1);
2906 auto &type = compiler.set<SPIRType>(sampled_type);
2907 type = compiler.expression_type(id: args[2]);
2908 type.self = sampled_type;
2909 type.basetype = SPIRType::SampledImage;
2910 type.image.depth = false;
2911 combined_module_id = 0;
2912 }
2913 else
2914 {
2915 sampled_type = args[0];
2916 combined_module_id = args[1];
2917 }
2918
2919 auto id = compiler.ir.increase_bound_by(count: 2);
2920 auto type_id = id + 0;
2921 auto combined_id = id + 1;
2922
2923 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
2924 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
2925 auto &type = compiler.set<SPIRType>(type_id);
2926 auto &base = compiler.get<SPIRType>(id: sampled_type);
2927 type = base;
2928 type.pointer = true;
2929 type.storage = StorageClassUniformConstant;
2930 type.parent_type = type_id;
2931
2932 // Build new variable.
2933 compiler.set<SPIRVariable>(id: combined_id, args&: type_id, args: StorageClassUniformConstant, args: 0);
2934
2935 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
2936 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2937 bool relaxed_precision =
2938 (sampler_id && compiler.has_decoration(id: sampler_id, decoration: DecorationRelaxedPrecision)) ||
2939 (image_id && compiler.has_decoration(id: image_id, decoration: DecorationRelaxedPrecision)) ||
2940 (combined_module_id && compiler.has_decoration(id: combined_module_id, decoration: DecorationRelaxedPrecision));
2941
2942 if (relaxed_precision)
2943 compiler.set_decoration(id: combined_id, decoration: DecorationRelaxedPrecision);
2944
2945 // Propagate the array type for the original image as well.
2946 auto *var = compiler.maybe_get_backing_variable(chain: image_id);
2947 if (var)
2948 {
2949 auto &parent_type = compiler.get<SPIRType>(id: var->basetype);
2950 type.array = parent_type.array;
2951 type.array_size_literal = parent_type.array_size_literal;
2952 }
2953
2954 compiler.combined_image_samplers.push_back(t: { .combined_id: combined_id, .image_id: image_id, .sampler_id: sampler_id });
2955 }
2956
2957 return true;
2958}
2959
2960VariableID Compiler::build_dummy_sampler_for_combined_images()
2961{
2962 DummySamplerForCombinedImageHandler handler(*this);
2963 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
2964 if (handler.need_dummy_sampler)
2965 {
2966 uint32_t offset = ir.increase_bound_by(count: 3);
2967 auto type_id = offset + 0;
2968 auto ptr_type_id = offset + 1;
2969 auto var_id = offset + 2;
2970
2971 SPIRType sampler_type;
2972 auto &sampler = set<SPIRType>(type_id);
2973 sampler.basetype = SPIRType::Sampler;
2974
2975 auto &ptr_sampler = set<SPIRType>(ptr_type_id);
2976 ptr_sampler = sampler;
2977 ptr_sampler.self = type_id;
2978 ptr_sampler.storage = StorageClassUniformConstant;
2979 ptr_sampler.pointer = true;
2980 ptr_sampler.parent_type = type_id;
2981
2982 set<SPIRVariable>(id: var_id, args&: ptr_type_id, args: StorageClassUniformConstant, args: 0);
2983 set_name(id: var_id, name: "SPIRV_Cross_DummySampler");
2984 dummy_sampler_id = var_id;
2985 return var_id;
2986 }
2987 else
2988 return 0;
2989}
2990
2991void Compiler::build_combined_image_samplers()
2992{
2993 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) {
2994 func.combined_parameters.clear();
2995 func.shadow_arguments.clear();
2996 func.do_combined_parameters = true;
2997 });
2998
2999 combined_image_samplers.clear();
3000 CombinedImageSamplerHandler handler(*this);
3001 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
3002}
3003
3004SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const
3005{
3006 SmallVector<SpecializationConstant> spec_consts;
3007 ir.for_each_typed_id<SPIRConstant>(op: [&](uint32_t, const SPIRConstant &c) {
3008 if (c.specialization && has_decoration(id: c.self, decoration: DecorationSpecId))
3009 spec_consts.push_back(t: { .id: c.self, .constant_id: get_decoration(id: c.self, decoration: DecorationSpecId) });
3010 });
3011 return spec_consts;
3012}
3013
3014SPIRConstant &Compiler::get_constant(ConstantID id)
3015{
3016 return get<SPIRConstant>(id);
3017}
3018
3019const SPIRConstant &Compiler::get_constant(ConstantID id) const
3020{
3021 return get<SPIRConstant>(id);
3022}
3023
3024static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
3025 unordered_set<uint32_t> &visit_cache)
3026{
3027 // This block accesses the variable.
3028 if (blocks.find(x: block) != end(cont: blocks))
3029 return false;
3030
3031 // We are at the end of the CFG.
3032 if (cfg.get_succeeding_edges(block).empty())
3033 return true;
3034
3035 // If any of our successors have a path to the end, there exists a path from block.
3036 for (auto &succ : cfg.get_succeeding_edges(block))
3037 {
3038 if (visit_cache.count(x: succ) == 0)
3039 {
3040 if (exists_unaccessed_path_to_return(cfg, block: succ, blocks, visit_cache))
3041 return true;
3042 visit_cache.insert(x: succ);
3043 }
3044 }
3045
3046 return false;
3047}
3048
3049void Compiler::analyze_parameter_preservation(
3050 SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks,
3051 const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks)
3052{
3053 for (auto &arg : entry.arguments)
3054 {
3055 // Non-pointers are always inputs.
3056 auto &type = get<SPIRType>(id: arg.type);
3057 if (!type.pointer)
3058 continue;
3059
3060 // Opaque argument types are always in
3061 bool potential_preserve;
3062 switch (type.basetype)
3063 {
3064 case SPIRType::Sampler:
3065 case SPIRType::Image:
3066 case SPIRType::SampledImage:
3067 case SPIRType::AtomicCounter:
3068 potential_preserve = false;
3069 break;
3070
3071 default:
3072 potential_preserve = true;
3073 break;
3074 }
3075
3076 if (!potential_preserve)
3077 continue;
3078
3079 auto itr = variable_to_blocks.find(x: arg.id);
3080 if (itr == end(cont: variable_to_blocks))
3081 {
3082 // Variable is never accessed.
3083 continue;
3084 }
3085
3086 // We have accessed a variable, but there was no complete writes to that variable.
3087 // We deduce that we must preserve the argument.
3088 itr = complete_write_blocks.find(x: arg.id);
3089 if (itr == end(cont: complete_write_blocks))
3090 {
3091 arg.read_count++;
3092 continue;
3093 }
3094
3095 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
3096 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
3097 // Major case here is if a function is
3098 // void foo(int &var) { if (cond) var = 10; }
3099 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
3100 // because if we don't write anything whatever we put into the function must return back to the caller.
3101 unordered_set<uint32_t> visit_cache;
3102 if (exists_unaccessed_path_to_return(cfg, block: entry.entry_block, blocks: itr->second, visit_cache))
3103 arg.read_count++;
3104 }
3105}
3106
3107Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_,
3108 SPIRFunction &entry_)
3109 : compiler(compiler_)
3110 , entry(entry_)
3111{
3112}
3113
3114bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &)
3115{
3116 // Only analyze within this function.
3117 return false;
3118}
3119
3120void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block)
3121{
3122 current_block = &block;
3123
3124 // If we're branching to a block which uses OpPhi, in GLSL
3125 // this will be a variable write when we branch,
3126 // so we need to track access to these variables as well to
3127 // have a complete picture.
3128 const auto test_phi = [this, &block](uint32_t to) {
3129 auto &next = compiler.get<SPIRBlock>(id: to);
3130 for (auto &phi : next.phi_variables)
3131 {
3132 if (phi.parent == block.self)
3133 {
3134 accessed_variables_to_block[phi.function_variable].insert(x: block.self);
3135 // Phi variables are also accessed in our target branch block.
3136 accessed_variables_to_block[phi.function_variable].insert(x: next.self);
3137
3138 notify_variable_access(id: phi.local_variable, block: block.self);
3139 }
3140 }
3141 };
3142
3143 switch (block.terminator)
3144 {
3145 case SPIRBlock::Direct:
3146 notify_variable_access(id: block.condition, block: block.self);
3147 test_phi(block.next_block);
3148 break;
3149
3150 case SPIRBlock::Select:
3151 notify_variable_access(id: block.condition, block: block.self);
3152 test_phi(block.true_block);
3153 test_phi(block.false_block);
3154 break;
3155
3156 case SPIRBlock::MultiSelect:
3157 {
3158 notify_variable_access(id: block.condition, block: block.self);
3159 auto &cases = compiler.get_case_list(block);
3160 for (auto &target : cases)
3161 test_phi(target.block);
3162 if (block.default_block)
3163 test_phi(block.default_block);
3164 break;
3165 }
3166
3167 default:
3168 break;
3169 }
3170}
3171
3172void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
3173{
3174 if (id == 0)
3175 return;
3176
3177 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3178 auto itr = access_chain_children.find(x: id);
3179 if (itr != end(cont&: access_chain_children))
3180 for (auto child_id : itr->second)
3181 notify_variable_access(id: child_id, block);
3182
3183 if (id_is_phi_variable(id))
3184 accessed_variables_to_block[id].insert(x: block);
3185 else if (id_is_potential_temporary(id))
3186 accessed_temporaries_to_block[id].insert(x: block);
3187}
3188
3189bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const
3190{
3191 if (id >= compiler.get_current_id_bound())
3192 return false;
3193 auto *var = compiler.maybe_get<SPIRVariable>(id);
3194 return var && var->phi_variable;
3195}
3196
3197bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const
3198{
3199 if (id >= compiler.get_current_id_bound())
3200 return false;
3201
3202 // Temporaries are not created before we start emitting code.
3203 return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
3204}
3205
3206bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block)
3207{
3208 switch (block.terminator)
3209 {
3210 case SPIRBlock::Return:
3211 if (block.return_value)
3212 notify_variable_access(id: block.return_value, block: block.self);
3213 break;
3214
3215 case SPIRBlock::Select:
3216 case SPIRBlock::MultiSelect:
3217 notify_variable_access(id: block.condition, block: block.self);
3218 break;
3219
3220 default:
3221 break;
3222 }
3223
3224 return true;
3225}
3226
3227bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3228{
3229 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3230 uint32_t result_type, result_id;
3231 if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
3232 result_id_to_type[result_id] = result_type;
3233
3234 switch (op)
3235 {
3236 case OpStore:
3237 {
3238 if (length < 2)
3239 return false;
3240
3241 ID ptr = args[0];
3242 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3243
3244 // If we store through an access chain, we have a partial write.
3245 if (var)
3246 {
3247 accessed_variables_to_block[var->self].insert(x: current_block->self);
3248 if (var->self == ptr)
3249 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3250 else
3251 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3252 }
3253
3254 // args[0] might be an access chain we have to track use of.
3255 notify_variable_access(id: args[0], block: current_block->self);
3256 // Might try to store a Phi variable here.
3257 notify_variable_access(id: args[1], block: current_block->self);
3258 break;
3259 }
3260
3261 case OpAccessChain:
3262 case OpInBoundsAccessChain:
3263 case OpPtrAccessChain:
3264 {
3265 if (length < 3)
3266 return false;
3267
3268 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3269 uint32_t ptr = args[2];
3270 auto *var = compiler.maybe_get<SPIRVariable>(id: ptr);
3271 if (var)
3272 {
3273 accessed_variables_to_block[var->self].insert(x: current_block->self);
3274 access_chain_children[args[1]].insert(x: var->self);
3275 }
3276
3277 // args[2] might be another access chain we have to track use of.
3278 for (uint32_t i = 2; i < length; i++)
3279 {
3280 notify_variable_access(id: args[i], block: current_block->self);
3281 access_chain_children[args[1]].insert(x: args[i]);
3282 }
3283
3284 // Also keep track of the access chain pointer itself.
3285 // In exceptionally rare cases, we can end up with a case where
3286 // the access chain is generated in the loop body, but is consumed in continue block.
3287 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3288 notify_variable_access(id: args[1], block: current_block->self);
3289
3290 // The result of an access chain is a fixed expression and is not really considered a temporary.
3291 auto &e = compiler.set<SPIRExpression>(id: args[1], args: "", args: args[0], args: true);
3292 auto *backing_variable = compiler.maybe_get_backing_variable(chain: ptr);
3293 e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
3294
3295 // Other backends might use SPIRAccessChain for this later.
3296 compiler.ir.ids[args[1]].set_allow_type_rewrite();
3297 access_chain_expressions.insert(x: args[1]);
3298 break;
3299 }
3300
3301 case OpCopyMemory:
3302 {
3303 if (length < 2)
3304 return false;
3305
3306 ID lhs = args[0];
3307 ID rhs = args[1];
3308 auto *var = compiler.maybe_get_backing_variable(chain: lhs);
3309
3310 // If we store through an access chain, we have a partial write.
3311 if (var)
3312 {
3313 accessed_variables_to_block[var->self].insert(x: current_block->self);
3314 if (var->self == lhs)
3315 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3316 else
3317 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3318 }
3319
3320 // args[0:1] might be access chains we have to track use of.
3321 for (uint32_t i = 0; i < 2; i++)
3322 notify_variable_access(id: args[i], block: current_block->self);
3323
3324 var = compiler.maybe_get_backing_variable(chain: rhs);
3325 if (var)
3326 accessed_variables_to_block[var->self].insert(x: current_block->self);
3327 break;
3328 }
3329
3330 case OpCopyObject:
3331 {
3332 if (length < 3)
3333 return false;
3334
3335 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
3336 if (var)
3337 accessed_variables_to_block[var->self].insert(x: current_block->self);
3338
3339 // Might be an access chain which we have to keep track of.
3340 notify_variable_access(id: args[1], block: current_block->self);
3341 if (access_chain_expressions.count(x: args[2]))
3342 access_chain_expressions.insert(x: args[1]);
3343
3344 // Might try to copy a Phi variable here.
3345 notify_variable_access(id: args[2], block: current_block->self);
3346 break;
3347 }
3348
3349 case OpLoad:
3350 {
3351 if (length < 3)
3352 return false;
3353 uint32_t ptr = args[2];
3354 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3355 if (var)
3356 accessed_variables_to_block[var->self].insert(x: current_block->self);
3357
3358 // Loaded value is a temporary.
3359 notify_variable_access(id: args[1], block: current_block->self);
3360
3361 // Might be an access chain we have to track use of.
3362 notify_variable_access(id: args[2], block: current_block->self);
3363 break;
3364 }
3365
3366 case OpFunctionCall:
3367 {
3368 if (length < 3)
3369 return false;
3370
3371 // Return value may be a temporary.
3372 if (compiler.get_type(id: args[0]).basetype != SPIRType::Void)
3373 notify_variable_access(id: args[1], block: current_block->self);
3374
3375 length -= 3;
3376 args += 3;
3377
3378 for (uint32_t i = 0; i < length; i++)
3379 {
3380 auto *var = compiler.maybe_get_backing_variable(chain: args[i]);
3381 if (var)
3382 {
3383 accessed_variables_to_block[var->self].insert(x: current_block->self);
3384 // Assume we can get partial writes to this variable.
3385 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3386 }
3387
3388 // Cannot easily prove if argument we pass to a function is completely written.
3389 // Usually, functions write to a dummy variable,
3390 // which is then copied to in full to the real argument.
3391
3392 // Might try to copy a Phi variable here.
3393 notify_variable_access(id: args[i], block: current_block->self);
3394 }
3395 break;
3396 }
3397
3398 case OpSelect:
3399 {
3400 // In case of variable pointers, we might access a variable here.
3401 // We cannot prove anything about these accesses however.
3402 for (uint32_t i = 1; i < length; i++)
3403 {
3404 if (i >= 3)
3405 {
3406 auto *var = compiler.maybe_get_backing_variable(chain: args[i]);
3407 if (var)
3408 {
3409 accessed_variables_to_block[var->self].insert(x: current_block->self);
3410 // Assume we can get partial writes to this variable.
3411 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3412 }
3413 }
3414
3415 // Might try to copy a Phi variable here.
3416 notify_variable_access(id: args[i], block: current_block->self);
3417 }
3418 break;
3419 }
3420
3421 case OpExtInst:
3422 {
3423 for (uint32_t i = 4; i < length; i++)
3424 notify_variable_access(id: args[i], block: current_block->self);
3425 notify_variable_access(id: args[1], block: current_block->self);
3426
3427 uint32_t extension_set = args[2];
3428 if (compiler.get<SPIRExtension>(id: extension_set).ext == SPIRExtension::GLSL)
3429 {
3430 auto op_450 = static_cast<GLSLstd450>(args[3]);
3431 switch (op_450)
3432 {
3433 case GLSLstd450Modf:
3434 case GLSLstd450Frexp:
3435 {
3436 uint32_t ptr = args[5];
3437 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3438 if (var)
3439 {
3440 accessed_variables_to_block[var->self].insert(x: current_block->self);
3441 if (var->self == ptr)
3442 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3443 else
3444 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3445 }
3446 break;
3447 }
3448
3449 default:
3450 break;
3451 }
3452 }
3453 break;
3454 }
3455
3456 case OpArrayLength:
3457 // Only result is a temporary.
3458 notify_variable_access(id: args[1], block: current_block->self);
3459 break;
3460
3461 case OpLine:
3462 case OpNoLine:
3463 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3464 break;
3465
3466 // Atomics shouldn't be able to access function-local variables.
3467 // Some GLSL builtins access a pointer.
3468
3469 case OpCompositeInsert:
3470 case OpVectorShuffle:
3471 // Specialize for opcode which contains literals.
3472 for (uint32_t i = 1; i < 4; i++)
3473 notify_variable_access(id: args[i], block: current_block->self);
3474 break;
3475
3476 case OpCompositeExtract:
3477 // Specialize for opcode which contains literals.
3478 for (uint32_t i = 1; i < 3; i++)
3479 notify_variable_access(id: args[i], block: current_block->self);
3480 break;
3481
3482 case OpImageWrite:
3483 for (uint32_t i = 0; i < length; i++)
3484 {
3485 // Argument 3 is a literal.
3486 if (i != 3)
3487 notify_variable_access(id: args[i], block: current_block->self);
3488 }
3489 break;
3490
3491 case OpImageSampleImplicitLod:
3492 case OpImageSampleExplicitLod:
3493 case OpImageSparseSampleImplicitLod:
3494 case OpImageSparseSampleExplicitLod:
3495 case OpImageSampleProjImplicitLod:
3496 case OpImageSampleProjExplicitLod:
3497 case OpImageSparseSampleProjImplicitLod:
3498 case OpImageSparseSampleProjExplicitLod:
3499 case OpImageFetch:
3500 case OpImageSparseFetch:
3501 case OpImageRead:
3502 case OpImageSparseRead:
3503 for (uint32_t i = 1; i < length; i++)
3504 {
3505 // Argument 4 is a literal.
3506 if (i != 4)
3507 notify_variable_access(id: args[i], block: current_block->self);
3508 }
3509 break;
3510
3511 case OpImageSampleDrefImplicitLod:
3512 case OpImageSampleDrefExplicitLod:
3513 case OpImageSparseSampleDrefImplicitLod:
3514 case OpImageSparseSampleDrefExplicitLod:
3515 case OpImageSampleProjDrefImplicitLod:
3516 case OpImageSampleProjDrefExplicitLod:
3517 case OpImageSparseSampleProjDrefImplicitLod:
3518 case OpImageSparseSampleProjDrefExplicitLod:
3519 case OpImageGather:
3520 case OpImageSparseGather:
3521 case OpImageDrefGather:
3522 case OpImageSparseDrefGather:
3523 for (uint32_t i = 1; i < length; i++)
3524 {
3525 // Argument 5 is a literal.
3526 if (i != 5)
3527 notify_variable_access(id: args[i], block: current_block->self);
3528 }
3529 break;
3530
3531 default:
3532 {
3533 // Rather dirty way of figuring out where Phi variables are used.
3534 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3535 // the ID of a variable has been used.
3536 // There are potential false positives here where a literal is used in-place of an ID,
3537 // but worst case, it does not affect the correctness of the compile.
3538 // Exhaustive analysis would be better here, but it's not worth it for now.
3539 for (uint32_t i = 0; i < length; i++)
3540 notify_variable_access(id: args[i], block: current_block->self);
3541 break;
3542 }
3543 }
3544 return true;
3545}
3546
3547Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_)
3548 : compiler(compiler_)
3549 , variable_id(variable_id_)
3550{
3551}
3552
3553bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &)
3554{
3555 return false;
3556}
3557
3558bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3559{
3560 switch (op)
3561 {
3562 case OpStore:
3563 if (length < 2)
3564 return false;
3565 if (args[0] == variable_id)
3566 {
3567 static_expression = args[1];
3568 write_count++;
3569 }
3570 break;
3571
3572 case OpLoad:
3573 if (length < 3)
3574 return false;
3575 if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
3576 return false;
3577 break;
3578
3579 case OpAccessChain:
3580 case OpInBoundsAccessChain:
3581 case OpPtrAccessChain:
3582 if (length < 3)
3583 return false;
3584 if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail.
3585 return false;
3586 break;
3587
3588 default:
3589 break;
3590 }
3591
3592 return true;
3593}
3594
3595void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler,
3596 bool single_function)
3597{
3598 auto &cfg = *function_cfgs.find(x: entry.self)->second;
3599
3600 // For each variable which is statically accessed.
3601 for (auto &accessed_var : handler.accessed_variables_to_block)
3602 {
3603 auto &blocks = accessed_var.second;
3604 auto &var = get<SPIRVariable>(id: accessed_var.first);
3605 auto &type = expression_type(id: accessed_var.first);
3606
3607 // Only consider function local variables here.
3608 // If we only have a single function in our CFG, private storage is also fine,
3609 // since it behaves like a function local variable.
3610 bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate);
3611 if (!allow_lut)
3612 continue;
3613
3614 // We cannot be a phi variable.
3615 if (var.phi_variable)
3616 continue;
3617
3618 // Only consider arrays here.
3619 if (type.array.empty())
3620 continue;
3621
3622 // If the variable has an initializer, make sure it is a constant expression.
3623 uint32_t static_constant_expression = 0;
3624 if (var.initializer)
3625 {
3626 if (ir.ids[var.initializer].get_type() != TypeConstant)
3627 continue;
3628 static_constant_expression = var.initializer;
3629
3630 // There can be no stores to this variable, we have now proved we have a LUT.
3631 if (handler.complete_write_variables_to_block.count(x: var.self) != 0 ||
3632 handler.partial_write_variables_to_block.count(x: var.self) != 0)
3633 continue;
3634 }
3635 else
3636 {
3637 // We can have one, and only one write to the variable, and that write needs to be a constant.
3638
3639 // No partial writes allowed.
3640 if (handler.partial_write_variables_to_block.count(x: var.self) != 0)
3641 continue;
3642
3643 auto itr = handler.complete_write_variables_to_block.find(x: var.self);
3644
3645 // No writes?
3646 if (itr == end(cont: handler.complete_write_variables_to_block))
3647 continue;
3648
3649 // We write to the variable in more than one block.
3650 auto &write_blocks = itr->second;
3651 if (write_blocks.size() != 1)
3652 continue;
3653
3654 // The write needs to happen in the dominating block.
3655 DominatorBuilder builder(cfg);
3656 for (auto &block : blocks)
3657 builder.add_block(block);
3658 uint32_t dominator = builder.get_dominator();
3659
3660 // The complete write happened in a branch or similar, cannot deduce static expression.
3661 if (write_blocks.count(x: dominator) == 0)
3662 continue;
3663
3664 // Find the static expression for this variable.
3665 StaticExpressionAccessHandler static_expression_handler(*this, var.self);
3666 traverse_all_reachable_opcodes(block: get<SPIRBlock>(id: dominator), handler&: static_expression_handler);
3667
3668 // We want one, and exactly one write
3669 if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0)
3670 continue;
3671
3672 // Is it a constant expression?
3673 if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant)
3674 continue;
3675
3676 // We found a LUT!
3677 static_constant_expression = static_expression_handler.static_expression;
3678 }
3679
3680 get<SPIRConstant>(id: static_constant_expression).is_used_as_lut = true;
3681 var.static_expression = static_constant_expression;
3682 var.statically_assigned = true;
3683 var.remapped_variable = true;
3684 }
3685}
3686
3687void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler)
3688{
3689 // First, we map out all variable access within a function.
3690 // Essentially a map of block -> { variables accessed in the basic block }
3691 traverse_all_reachable_opcodes(func: entry, handler);
3692
3693 auto &cfg = *function_cfgs.find(x: entry.self)->second;
3694
3695 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3696 analyze_parameter_preservation(entry, cfg, variable_to_blocks: handler.accessed_variables_to_block,
3697 complete_write_blocks: handler.complete_write_variables_to_block);
3698
3699 unordered_map<uint32_t, uint32_t> potential_loop_variables;
3700
3701 // Find the loop dominator block for each block.
3702 for (auto &block_id : entry.blocks)
3703 {
3704 auto &block = get<SPIRBlock>(id: block_id);
3705
3706 auto itr = ir.continue_block_to_loop_header.find(x: block_id);
3707 if (itr != end(cont&: ir.continue_block_to_loop_header) && itr->second != block_id)
3708 {
3709 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3710 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3711 block.loop_dominator = itr->second;
3712 }
3713 else
3714 {
3715 uint32_t loop_dominator = cfg.find_loop_dominator(block: block_id);
3716 if (loop_dominator != block_id)
3717 block.loop_dominator = loop_dominator;
3718 else
3719 block.loop_dominator = SPIRBlock::NoDominator;
3720 }
3721 }
3722
3723 // For each variable which is statically accessed.
3724 for (auto &var : handler.accessed_variables_to_block)
3725 {
3726 // Only deal with variables which are considered local variables in this function.
3727 if (find(first: begin(cont&: entry.local_variables), last: end(cont&: entry.local_variables), val: VariableID(var.first)) ==
3728 end(cont&: entry.local_variables))
3729 continue;
3730
3731 DominatorBuilder builder(cfg);
3732 auto &blocks = var.second;
3733 auto &type = expression_type(id: var.first);
3734
3735 // Figure out which block is dominating all accesses of those variables.
3736 for (auto &block : blocks)
3737 {
3738 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3739 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3740 if (is_continue(next: block))
3741 {
3742 // Potentially awkward case to check for.
3743 // We might have a variable inside a loop, which is touched by the continue block,
3744 // but is not actually a loop variable.
3745 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3746 // language output because it will be declared before the body,
3747 // so we will have to lift the dominator up to the relevant loop header instead.
3748 builder.add_block(block: ir.continue_block_to_loop_header[block]);
3749
3750 // Arrays or structs cannot be loop variables.
3751 if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
3752 {
3753 // The variable is used in multiple continue blocks, this is not a loop
3754 // candidate, signal that by setting block to -1u.
3755 auto &potential = potential_loop_variables[var.first];
3756
3757 if (potential == 0)
3758 potential = block;
3759 else
3760 potential = ~(0u);
3761 }
3762 }
3763 builder.add_block(block);
3764 }
3765
3766 builder.lift_continue_block_dominator();
3767
3768 // Add it to a per-block list of variables.
3769 BlockID dominating_block = builder.get_dominator();
3770
3771 // For variables whose dominating block is inside a loop, there is a risk that these variables
3772 // actually need to be preserved across loop iterations. We can express this by adding
3773 // a "read" access to the loop header.
3774 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
3775 // Should that fail, we look for the outermost loop header and tack on an access there.
3776 // Phi nodes cannot have this problem.
3777 if (dominating_block)
3778 {
3779 auto &variable = get<SPIRVariable>(id: var.first);
3780 if (!variable.phi_variable)
3781 {
3782 auto *block = &get<SPIRBlock>(id: dominating_block);
3783 bool preserve = may_read_undefined_variable_in_block(block: *block, var: var.first);
3784 if (preserve)
3785 {
3786 // Find the outermost loop scope.
3787 while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
3788 block = &get<SPIRBlock>(id: block->loop_dominator);
3789
3790 if (block->self != dominating_block)
3791 {
3792 builder.add_block(block: block->self);
3793 dominating_block = builder.get_dominator();
3794 }
3795 }
3796 }
3797 }
3798
3799 // If all blocks here are dead code, this will be 0, so the variable in question
3800 // will be completely eliminated.
3801 if (dominating_block)
3802 {
3803 auto &block = get<SPIRBlock>(id: dominating_block);
3804 block.dominated_variables.push_back(t: var.first);
3805 get<SPIRVariable>(id: var.first).dominator = dominating_block;
3806 }
3807 }
3808
3809 for (auto &var : handler.accessed_temporaries_to_block)
3810 {
3811 auto itr = handler.result_id_to_type.find(x: var.first);
3812
3813 if (itr == end(cont&: handler.result_id_to_type))
3814 {
3815 // We found a false positive ID being used, ignore.
3816 // This should probably be an assert.
3817 continue;
3818 }
3819
3820 // There is no point in doing domination analysis for opaque types.
3821 auto &type = get<SPIRType>(id: itr->second);
3822 if (type_is_opaque_value(type))
3823 continue;
3824
3825 DominatorBuilder builder(cfg);
3826 bool force_temporary = false;
3827 bool used_in_header_hoisted_continue_block = false;
3828
3829 // Figure out which block is dominating all accesses of those temporaries.
3830 auto &blocks = var.second;
3831 for (auto &block : blocks)
3832 {
3833 builder.add_block(block);
3834
3835 if (blocks.size() != 1 && is_continue(next: block))
3836 {
3837 // The risk here is that inner loop can dominate the continue block.
3838 // Any temporary we access in the continue block must be declared before the loop.
3839 // This is moot for complex loops however.
3840 auto &loop_header_block = get<SPIRBlock>(id: ir.continue_block_to_loop_header[block]);
3841 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3842 builder.add_block(block: loop_header_block.self);
3843 used_in_header_hoisted_continue_block = true;
3844 }
3845 }
3846
3847 uint32_t dominating_block = builder.get_dominator();
3848
3849 if (blocks.size() != 1 && is_single_block_loop(next: dominating_block))
3850 {
3851 // Awkward case, because the loop header is also the continue block,
3852 // so hoisting to loop header does not help.
3853 force_temporary = true;
3854 }
3855
3856 if (dominating_block)
3857 {
3858 // If we touch a variable in the dominating block, this is the expected setup.
3859 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
3860 bool first_use_is_dominator = blocks.count(x: dominating_block) != 0;
3861
3862 if (!first_use_is_dominator || force_temporary)
3863 {
3864 if (handler.access_chain_expressions.count(x: var.first))
3865 {
3866 // Exceptionally rare case.
3867 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
3868 // Rather than do that, we force the indexing expressions to be declared in the right scope by
3869 // tracking their usage to that end. There is no temporary to hoist.
3870 // However, we still need to observe declaration order of the access chain.
3871
3872 if (used_in_header_hoisted_continue_block)
3873 {
3874 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
3875 // This is a problem as we need to declare an access chain properly first with full definition.
3876 // We cannot use temporaries for these expressions,
3877 // so we must make sure the access chain is declared ahead of time.
3878 // Force a complex for loop to deal with this.
3879 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
3880 auto &loop_header_block = get<SPIRBlock>(id: dominating_block);
3881 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
3882 loop_header_block.complex_continue = true;
3883 }
3884 }
3885 else
3886 {
3887 // This should be very rare, but if we try to declare a temporary inside a loop,
3888 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
3889 // we should actually emit the temporary outside the loop.
3890 hoisted_temporaries.insert(x: var.first);
3891 forced_temporaries.insert(x: var.first);
3892
3893 auto &block_temporaries = get<SPIRBlock>(id: dominating_block).declare_temporary;
3894 block_temporaries.emplace_back(ts&: handler.result_id_to_type[var.first], ts: var.first);
3895 }
3896 }
3897 else if (blocks.size() > 1)
3898 {
3899 // Keep track of the temporary as we might have to declare this temporary.
3900 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
3901 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
3902 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
3903 // declares the temporary.
3904 auto &block_temporaries = get<SPIRBlock>(id: dominating_block).potential_declare_temporary;
3905 block_temporaries.emplace_back(ts&: handler.result_id_to_type[var.first], ts: var.first);
3906 }
3907 }
3908 }
3909
3910 unordered_set<uint32_t> seen_blocks;
3911
3912 // Now, try to analyze whether or not these variables are actually loop variables.
3913 for (auto &loop_variable : potential_loop_variables)
3914 {
3915 auto &var = get<SPIRVariable>(id: loop_variable.first);
3916 auto dominator = var.dominator;
3917 BlockID block = loop_variable.second;
3918
3919 // The variable was accessed in multiple continue blocks, ignore.
3920 if (block == BlockID(~(0u)) || block == BlockID(0))
3921 continue;
3922
3923 // Dead code.
3924 if (dominator == ID(0))
3925 continue;
3926
3927 BlockID header = 0;
3928
3929 // Find the loop header for this block if we are a continue block.
3930 {
3931 auto itr = ir.continue_block_to_loop_header.find(x: block);
3932 if (itr != end(cont&: ir.continue_block_to_loop_header))
3933 {
3934 header = itr->second;
3935 }
3936 else if (get<SPIRBlock>(id: block).continue_block == block)
3937 {
3938 // Also check for self-referential continue block.
3939 header = block;
3940 }
3941 }
3942
3943 assert(header);
3944 auto &header_block = get<SPIRBlock>(id: header);
3945 auto &blocks = handler.accessed_variables_to_block[loop_variable.first];
3946
3947 // If a loop variable is not used before the loop, it's probably not a loop variable.
3948 bool has_accessed_variable = blocks.count(x: header) != 0;
3949
3950 // Now, there are two conditions we need to meet for the variable to be a loop variable.
3951 // 1. The dominating block must have a branch-free path to the loop header,
3952 // this way we statically know which expression should be part of the loop variable initializer.
3953
3954 // Walk from the dominator, if there is one straight edge connecting
3955 // dominator and loop header, we statically know the loop initializer.
3956 bool static_loop_init = true;
3957 while (dominator != header)
3958 {
3959 if (blocks.count(x: dominator) != 0)
3960 has_accessed_variable = true;
3961
3962 auto &succ = cfg.get_succeeding_edges(block: dominator);
3963 if (succ.size() != 1)
3964 {
3965 static_loop_init = false;
3966 break;
3967 }
3968
3969 auto &pred = cfg.get_preceding_edges(block: succ.front());
3970 if (pred.size() != 1 || pred.front() != dominator)
3971 {
3972 static_loop_init = false;
3973 break;
3974 }
3975
3976 dominator = succ.front();
3977 }
3978
3979 if (!static_loop_init || !has_accessed_variable)
3980 continue;
3981
3982 // The second condition we need to meet is that no access after the loop
3983 // merge can occur. Walk the CFG to see if we find anything.
3984
3985 seen_blocks.clear();
3986 cfg.walk_from(seen_blocks, block: header_block.merge_block, op: [&](uint32_t walk_block) -> bool {
3987 // We found a block which accesses the variable outside the loop.
3988 if (blocks.find(x: walk_block) != end(cont&: blocks))
3989 static_loop_init = false;
3990 return true;
3991 });
3992
3993 if (!static_loop_init)
3994 continue;
3995
3996 // We have a loop variable.
3997 header_block.loop_variables.push_back(t: loop_variable.first);
3998 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
3999 // will break reproducability in regression runs.
4000 sort(first: begin(cont&: header_block.loop_variables), last: end(cont&: header_block.loop_variables));
4001 get<SPIRVariable>(id: loop_variable.first).loop_variable = true;
4002 }
4003}
4004
4005bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
4006{
4007 for (auto &op : block.ops)
4008 {
4009 auto *ops = stream(instr: op);
4010 switch (op.op)
4011 {
4012 case OpStore:
4013 case OpCopyMemory:
4014 if (ops[0] == var)
4015 return false;
4016 break;
4017
4018 case OpAccessChain:
4019 case OpInBoundsAccessChain:
4020 case OpPtrAccessChain:
4021 // Access chains are generally used to partially read and write. It's too hard to analyze
4022 // if all constituents are written fully before continuing, so just assume it's preserved.
4023 // This is the same as the parameter preservation analysis.
4024 if (ops[2] == var)
4025 return true;
4026 break;
4027
4028 case OpSelect:
4029 // Variable pointers.
4030 // We might read before writing.
4031 if (ops[3] == var || ops[4] == var)
4032 return true;
4033 break;
4034
4035 case OpPhi:
4036 {
4037 // Variable pointers.
4038 // We might read before writing.
4039 if (op.length < 2)
4040 break;
4041
4042 uint32_t count = op.length - 2;
4043 for (uint32_t i = 0; i < count; i += 2)
4044 if (ops[i + 2] == var)
4045 return true;
4046 break;
4047 }
4048
4049 case OpCopyObject:
4050 case OpLoad:
4051 if (ops[2] == var)
4052 return true;
4053 break;
4054
4055 case OpFunctionCall:
4056 {
4057 if (op.length < 3)
4058 break;
4059
4060 // May read before writing.
4061 uint32_t count = op.length - 3;
4062 for (uint32_t i = 0; i < count; i++)
4063 if (ops[i + 3] == var)
4064 return true;
4065 break;
4066 }
4067
4068 default:
4069 break;
4070 }
4071 }
4072
4073 // Not accessed somehow, at least not in a usual fashion.
4074 // It's likely accessed in a branch, so assume we must preserve.
4075 return true;
4076}
4077
4078Bitset Compiler::get_buffer_block_flags(VariableID id) const
4079{
4080 return ir.get_buffer_block_flags(var: get<SPIRVariable>(id));
4081}
4082
4083bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
4084{
4085 if (type.basetype == SPIRType::Struct)
4086 {
4087 base_type = SPIRType::Unknown;
4088 for (auto &member_type : type.member_types)
4089 {
4090 SPIRType::BaseType member_base;
4091 if (!get_common_basic_type(type: get<SPIRType>(id: member_type), base_type&: member_base))
4092 return false;
4093
4094 if (base_type == SPIRType::Unknown)
4095 base_type = member_base;
4096 else if (base_type != member_base)
4097 return false;
4098 }
4099 return true;
4100 }
4101 else
4102 {
4103 base_type = type.basetype;
4104 return true;
4105 }
4106}
4107
4108void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
4109 const Bitset &decoration_flags)
4110{
4111 // If used, we will need to explicitly declare a new array size for these builtins.
4112
4113 if (builtin == BuiltInClipDistance)
4114 {
4115 if (!type.array_size_literal[0])
4116 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
4117 uint32_t array_size = type.array[0];
4118 if (array_size == 0)
4119 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
4120 compiler.clip_distance_count = array_size;
4121 }
4122 else if (builtin == BuiltInCullDistance)
4123 {
4124 if (!type.array_size_literal[0])
4125 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
4126 uint32_t array_size = type.array[0];
4127 if (array_size == 0)
4128 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
4129 compiler.cull_distance_count = array_size;
4130 }
4131 else if (builtin == BuiltInPosition)
4132 {
4133 if (decoration_flags.get(bit: DecorationInvariant))
4134 compiler.position_invariant = true;
4135 }
4136}
4137
4138void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
4139{
4140 // Only handle plain variables here.
4141 // Builtins which are part of a block are handled in AccessChain.
4142 // If allow_blocks is used however, this is to handle initializers of blocks,
4143 // which implies that all members are written to.
4144
4145 auto *var = compiler.maybe_get<SPIRVariable>(id);
4146 auto *m = compiler.ir.find_meta(id);
4147 if (var && m)
4148 {
4149 auto &type = compiler.get<SPIRType>(id: var->basetype);
4150 auto &decorations = m->decoration;
4151 auto &flags = type.storage == StorageClassInput ?
4152 compiler.active_input_builtins : compiler.active_output_builtins;
4153 if (decorations.builtin)
4154 {
4155 flags.set(decorations.builtin_type);
4156 handle_builtin(type, builtin: decorations.builtin_type, decoration_flags: decorations.decoration_flags);
4157 }
4158 else if (allow_blocks && compiler.has_decoration(id: type.self, decoration: DecorationBlock))
4159 {
4160 uint32_t member_count = uint32_t(type.member_types.size());
4161 for (uint32_t i = 0; i < member_count; i++)
4162 {
4163 if (compiler.has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
4164 {
4165 auto &member_type = compiler.get<SPIRType>(id: type.member_types[i]);
4166 BuiltIn builtin = BuiltIn(compiler.get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn));
4167 flags.set(builtin);
4168 handle_builtin(type: member_type, builtin, decoration_flags: compiler.get_member_decoration_bitset(id: type.self, index: i));
4169 }
4170 }
4171 }
4172 }
4173}
4174
4175void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
4176{
4177 add_if_builtin(id, allow_blocks: false);
4178}
4179
4180void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
4181{
4182 add_if_builtin(id, allow_blocks: true);
4183}
4184
4185bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
4186{
4187 switch (opcode)
4188 {
4189 case OpStore:
4190 if (length < 1)
4191 return false;
4192
4193 add_if_builtin(id: args[0]);
4194 break;
4195
4196 case OpCopyMemory:
4197 if (length < 2)
4198 return false;
4199
4200 add_if_builtin(id: args[0]);
4201 add_if_builtin(id: args[1]);
4202 break;
4203
4204 case OpCopyObject:
4205 case OpLoad:
4206 if (length < 3)
4207 return false;
4208
4209 add_if_builtin(id: args[2]);
4210 break;
4211
4212 case OpSelect:
4213 if (length < 5)
4214 return false;
4215
4216 add_if_builtin(id: args[3]);
4217 add_if_builtin(id: args[4]);
4218 break;
4219
4220 case OpPhi:
4221 {
4222 if (length < 2)
4223 return false;
4224
4225 uint32_t count = length - 2;
4226 args += 2;
4227 for (uint32_t i = 0; i < count; i += 2)
4228 add_if_builtin(id: args[i]);
4229 break;
4230 }
4231
4232 case OpFunctionCall:
4233 {
4234 if (length < 3)
4235 return false;
4236
4237 uint32_t count = length - 3;
4238 args += 3;
4239 for (uint32_t i = 0; i < count; i++)
4240 add_if_builtin(id: args[i]);
4241 break;
4242 }
4243
4244 case OpAccessChain:
4245 case OpInBoundsAccessChain:
4246 case OpPtrAccessChain:
4247 {
4248 if (length < 4)
4249 return false;
4250
4251 // Only consider global variables, cannot consider variables in functions yet, or other
4252 // access chains as they have not been created yet.
4253 auto *var = compiler.maybe_get<SPIRVariable>(id: args[2]);
4254 if (!var)
4255 break;
4256
4257 // Required if we access chain into builtins like gl_GlobalInvocationID.
4258 add_if_builtin(id: args[2]);
4259
4260 // Start traversing type hierarchy at the proper non-pointer types.
4261 auto *type = &compiler.get_variable_data_type(var: *var);
4262
4263 auto &flags =
4264 var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
4265
4266 uint32_t count = length - 3;
4267 args += 3;
4268 for (uint32_t i = 0; i < count; i++)
4269 {
4270 // Pointers
4271 if (opcode == OpPtrAccessChain && i == 0)
4272 {
4273 type = &compiler.get<SPIRType>(id: type->parent_type);
4274 continue;
4275 }
4276
4277 // Arrays
4278 if (!type->array.empty())
4279 {
4280 type = &compiler.get<SPIRType>(id: type->parent_type);
4281 }
4282 // Structs
4283 else if (type->basetype == SPIRType::Struct)
4284 {
4285 uint32_t index = compiler.get<SPIRConstant>(id: args[i]).scalar();
4286
4287 if (index < uint32_t(compiler.ir.meta[type->self].members.size()))
4288 {
4289 auto &decorations = compiler.ir.meta[type->self].members[index];
4290 if (decorations.builtin)
4291 {
4292 flags.set(decorations.builtin_type);
4293 handle_builtin(type: compiler.get<SPIRType>(id: type->member_types[index]), builtin: decorations.builtin_type,
4294 decoration_flags: decorations.decoration_flags);
4295 }
4296 }
4297
4298 type = &compiler.get<SPIRType>(id: type->member_types[index]);
4299 }
4300 else
4301 {
4302 // No point in traversing further. We won't find any extra builtins.
4303 break;
4304 }
4305 }
4306 break;
4307 }
4308
4309 default:
4310 break;
4311 }
4312
4313 return true;
4314}
4315
4316void Compiler::update_active_builtins()
4317{
4318 active_input_builtins.reset();
4319 active_output_builtins.reset();
4320 cull_distance_count = 0;
4321 clip_distance_count = 0;
4322 ActiveBuiltinHandler handler(*this);
4323 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4324
4325 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
4326 if (var.storage != StorageClassOutput)
4327 return;
4328 if (!interface_variable_exists_in_entry_point(id: var.self))
4329 return;
4330
4331 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4332 if (var.initializer != ID(0))
4333 handler.add_if_builtin_or_block(id: var.self);
4334 });
4335}
4336
4337// Returns whether this shader uses a builtin of the storage class
4338bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
4339{
4340 const Bitset *flags;
4341 switch (storage)
4342 {
4343 case StorageClassInput:
4344 flags = &active_input_builtins;
4345 break;
4346 case StorageClassOutput:
4347 flags = &active_output_builtins;
4348 break;
4349
4350 default:
4351 return false;
4352 }
4353 return flags->get(bit: builtin);
4354}
4355
4356void Compiler::analyze_image_and_sampler_usage()
4357{
4358 CombinedImageSamplerDrefHandler dref_handler(*this);
4359 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler&: dref_handler);
4360
4361 CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
4362 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4363
4364 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4365 // down to main().
4366 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4367 handler.dependency_hierarchy.clear();
4368 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4369
4370 comparison_ids = std::move(handler.comparison_ids);
4371 need_subpass_input = handler.need_subpass_input;
4372
4373 // Forward information from separate images and samplers into combined image samplers.
4374 for (auto &combined : combined_image_samplers)
4375 if (comparison_ids.count(x: combined.sampler_id))
4376 comparison_ids.insert(x: combined.combined_id);
4377}
4378
4379bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t)
4380{
4381 // Mark all sampled images which are used with Dref.
4382 switch (opcode)
4383 {
4384 case OpImageSampleDrefExplicitLod:
4385 case OpImageSampleDrefImplicitLod:
4386 case OpImageSampleProjDrefExplicitLod:
4387 case OpImageSampleProjDrefImplicitLod:
4388 case OpImageSparseSampleProjDrefImplicitLod:
4389 case OpImageSparseSampleDrefImplicitLod:
4390 case OpImageSparseSampleProjDrefExplicitLod:
4391 case OpImageSparseSampleDrefExplicitLod:
4392 case OpImageDrefGather:
4393 case OpImageSparseDrefGather:
4394 dref_combined_samplers.insert(x: args[2]);
4395 return true;
4396
4397 default:
4398 break;
4399 }
4400
4401 return true;
4402}
4403
4404const CFG &Compiler::get_cfg_for_current_function() const
4405{
4406 assert(current_function);
4407 return get_cfg_for_function(id: current_function->self);
4408}
4409
4410const CFG &Compiler::get_cfg_for_function(uint32_t id) const
4411{
4412 auto cfg_itr = function_cfgs.find(x: id);
4413 assert(cfg_itr != end(function_cfgs));
4414 assert(cfg_itr->second);
4415 return *cfg_itr->second;
4416}
4417
4418void Compiler::build_function_control_flow_graphs_and_analyze()
4419{
4420 CFGBuilder handler(*this);
4421 handler.function_cfgs[ir.default_entry_point].reset(p: new CFG(*this, get<SPIRFunction>(id: ir.default_entry_point)));
4422 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4423 function_cfgs = std::move(handler.function_cfgs);
4424 bool single_function = function_cfgs.size() <= 1;
4425
4426 for (auto &f : function_cfgs)
4427 {
4428 auto &func = get<SPIRFunction>(id: f.first);
4429 AnalyzeVariableScopeAccessHandler scope_handler(*this, func);
4430 analyze_variable_scope(entry&: func, handler&: scope_handler);
4431 find_function_local_luts(entry&: func, handler: scope_handler, single_function);
4432
4433 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4434 // To use multiple initializers, we need the same type and qualifiers.
4435 for (auto block : func.blocks)
4436 {
4437 auto &b = get<SPIRBlock>(id: block);
4438 if (b.loop_variables.size() < 2)
4439 continue;
4440
4441 auto &flags = get_decoration_bitset(id: b.loop_variables.front());
4442 uint32_t type = get<SPIRVariable>(id: b.loop_variables.front()).basetype;
4443 bool invalid_initializers = false;
4444 for (auto loop_variable : b.loop_variables)
4445 {
4446 if (flags != get_decoration_bitset(id: loop_variable) ||
4447 type != get<SPIRVariable>(id: b.loop_variables.front()).basetype)
4448 {
4449 invalid_initializers = true;
4450 break;
4451 }
4452 }
4453
4454 if (invalid_initializers)
4455 {
4456 for (auto loop_variable : b.loop_variables)
4457 get<SPIRVariable>(id: loop_variable).loop_variable = false;
4458 b.loop_variables.clear();
4459 }
4460 }
4461 }
4462}
4463
4464Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_)
4465 : compiler(compiler_)
4466{
4467}
4468
4469bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t)
4470{
4471 return true;
4472}
4473
4474bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
4475{
4476 if (function_cfgs.find(x: func.self) == end(cont&: function_cfgs))
4477 {
4478 function_cfgs[func.self].reset(p: new CFG(compiler, func));
4479 return true;
4480 }
4481 else
4482 return false;
4483}
4484
4485void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
4486{
4487 dependency_hierarchy[dst].insert(x: src);
4488 // Propagate up any comparison state if we're loading from one such variable.
4489 if (comparison_ids.count(x: src))
4490 comparison_ids.insert(x: dst);
4491}
4492
4493bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4494{
4495 if (length < 3)
4496 return false;
4497
4498 auto &func = compiler.get<SPIRFunction>(id: args[2]);
4499 const auto *arg = &args[3];
4500 length -= 3;
4501
4502 for (uint32_t i = 0; i < length; i++)
4503 {
4504 auto &argument = func.arguments[i];
4505 add_dependency(dst: argument.id, src: arg[i]);
4506 }
4507
4508 return true;
4509}
4510
4511void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id)
4512{
4513 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4514 comparison_ids.insert(x: id);
4515
4516 for (auto &dep_id : dependency_hierarchy[id])
4517 add_hierarchy_to_comparison_ids(id: dep_id);
4518}
4519
4520bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4521{
4522 switch (opcode)
4523 {
4524 case OpAccessChain:
4525 case OpInBoundsAccessChain:
4526 case OpPtrAccessChain:
4527 case OpLoad:
4528 {
4529 if (length < 3)
4530 return false;
4531
4532 add_dependency(dst: args[1], src: args[2]);
4533
4534 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4535 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4536 auto &type = compiler.get<SPIRType>(id: args[0]);
4537 if (type.image.dim == DimSubpassData)
4538 need_subpass_input = true;
4539
4540 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4541 if (dref_combined_samplers.count(x: args[1]) != 0)
4542 add_hierarchy_to_comparison_ids(id: args[1]);
4543 break;
4544 }
4545
4546 case OpSampledImage:
4547 {
4548 if (length < 4)
4549 return false;
4550
4551 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4552 // This image must be a depth image.
4553 uint32_t result_id = args[1];
4554 uint32_t image = args[2];
4555 uint32_t sampler = args[3];
4556
4557 if (dref_combined_samplers.count(x: result_id) != 0)
4558 {
4559 add_hierarchy_to_comparison_ids(id: image);
4560
4561 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4562 add_hierarchy_to_comparison_ids(id: sampler);
4563
4564 // Mark the OpSampledImage itself as being comparison state.
4565 comparison_ids.insert(x: result_id);
4566 }
4567 return true;
4568 }
4569
4570 default:
4571 break;
4572 }
4573
4574 return true;
4575}
4576
4577bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
4578{
4579 auto *m = ir.find_meta(id);
4580 return m && m->hlsl_is_magic_counter_buffer;
4581}
4582
4583bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
4584{
4585 auto *m = ir.find_meta(id);
4586
4587 // First, check for the proper decoration.
4588 if (m && m->hlsl_magic_counter_buffer != 0)
4589 {
4590 counter_id = m->hlsl_magic_counter_buffer;
4591 return true;
4592 }
4593 else
4594 return false;
4595}
4596
4597void Compiler::make_constant_null(uint32_t id, uint32_t type)
4598{
4599 auto &constant_type = get<SPIRType>(id: type);
4600
4601 if (constant_type.pointer)
4602 {
4603 auto &constant = set<SPIRConstant>(id, args&: type);
4604 constant.make_null(constant_type_: constant_type);
4605 }
4606 else if (!constant_type.array.empty())
4607 {
4608 assert(constant_type.parent_type);
4609 uint32_t parent_id = ir.increase_bound_by(count: 1);
4610 make_constant_null(id: parent_id, type: constant_type.parent_type);
4611
4612 if (!constant_type.array_size_literal.back())
4613 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4614
4615 SmallVector<uint32_t> elements(constant_type.array.back());
4616 for (uint32_t i = 0; i < constant_type.array.back(); i++)
4617 elements[i] = parent_id;
4618 set<SPIRConstant>(id, args&: type, args: elements.data(), args: uint32_t(elements.size()), args: false);
4619 }
4620 else if (!constant_type.member_types.empty())
4621 {
4622 uint32_t member_ids = ir.increase_bound_by(count: uint32_t(constant_type.member_types.size()));
4623 SmallVector<uint32_t> elements(constant_type.member_types.size());
4624 for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
4625 {
4626 make_constant_null(id: member_ids + i, type: constant_type.member_types[i]);
4627 elements[i] = member_ids + i;
4628 }
4629 set<SPIRConstant>(id, args&: type, args: elements.data(), args: uint32_t(elements.size()), args: false);
4630 }
4631 else
4632 {
4633 auto &constant = set<SPIRConstant>(id, args&: type);
4634 constant.make_null(constant_type_: constant_type);
4635 }
4636}
4637
4638const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const
4639{
4640 return ir.declared_capabilities;
4641}
4642
4643const SmallVector<std::string> &Compiler::get_declared_extensions() const
4644{
4645 return ir.declared_extensions;
4646}
4647
4648std::string Compiler::get_remapped_declared_block_name(VariableID id) const
4649{
4650 return get_remapped_declared_block_name(id, fallback_prefer_instance_name: false);
4651}
4652
4653std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
4654{
4655 auto itr = declared_block_names.find(x: id);
4656 if (itr != end(cont: declared_block_names))
4657 {
4658 return itr->second;
4659 }
4660 else
4661 {
4662 auto &var = get<SPIRVariable>(id);
4663
4664 if (fallback_prefer_instance_name)
4665 {
4666 return to_name(id: var.self);
4667 }
4668 else
4669 {
4670 auto &type = get<SPIRType>(id: var.basetype);
4671 auto *type_meta = ir.find_meta(id: type.self);
4672 auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
4673 return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
4674 }
4675 }
4676}
4677
4678bool Compiler::reflection_ssbo_instance_name_is_significant() const
4679{
4680 if (ir.source.known)
4681 {
4682 // UAVs from HLSL source tend to be declared in a way where the type is reused
4683 // but the instance name is significant, and that's the name we should report.
4684 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4685 return ir.source.hlsl;
4686 }
4687
4688 unordered_set<uint32_t> ssbo_type_ids;
4689 bool aliased_ssbo_types = false;
4690
4691 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4692 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
4693 auto &type = this->get<SPIRType>(id: var.basetype);
4694 if (!type.pointer || var.storage == StorageClassFunction)
4695 return;
4696
4697 bool ssbo = var.storage == StorageClassStorageBuffer ||
4698 (var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBufferBlock));
4699
4700 if (ssbo)
4701 {
4702 if (ssbo_type_ids.count(x: type.self))
4703 aliased_ssbo_types = true;
4704 else
4705 ssbo_type_ids.insert(x: type.self);
4706 }
4707 });
4708
4709 // If the block name is aliased, assume we have HLSL-style UAV declarations.
4710 return aliased_ssbo_types;
4711}
4712
4713bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op,
4714 const uint32_t *args, uint32_t length)
4715{
4716 if (length < 2)
4717 return false;
4718
4719 bool has_result_id = false, has_result_type = false;
4720 HasResultAndType(opcode: op, hasResult: &has_result_id, hasResultType: &has_result_type);
4721 if (has_result_id && has_result_type)
4722 {
4723 result_type = args[0];
4724 result_id = args[1];
4725 return true;
4726 }
4727 else
4728 return false;
4729}
4730
4731Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
4732{
4733 Bitset flags;
4734 auto *type_meta = ir.find_meta(id: type.self);
4735
4736 if (type_meta)
4737 {
4738 auto &members = type_meta->members;
4739 if (index >= members.size())
4740 return flags;
4741 auto &dec = members[index];
4742
4743 flags.merge_or(other: dec.decoration_flags);
4744
4745 auto &member_type = get<SPIRType>(id: type.member_types[index]);
4746
4747 // If our member type is a struct, traverse all the child members as well recursively.
4748 auto &member_childs = member_type.member_types;
4749 for (uint32_t i = 0; i < member_childs.size(); i++)
4750 {
4751 auto &child_member_type = get<SPIRType>(id: member_childs[i]);
4752 if (!child_member_type.pointer)
4753 flags.merge_or(other: combined_decoration_for_member(type: member_type, index: i));
4754 }
4755 }
4756
4757 return flags;
4758}
4759
4760bool Compiler::is_desktop_only_format(spv::ImageFormat format)
4761{
4762 switch (format)
4763 {
4764 // Desktop-only formats
4765 case ImageFormatR11fG11fB10f:
4766 case ImageFormatR16f:
4767 case ImageFormatRgb10A2:
4768 case ImageFormatR8:
4769 case ImageFormatRg8:
4770 case ImageFormatR16:
4771 case ImageFormatRg16:
4772 case ImageFormatRgba16:
4773 case ImageFormatR16Snorm:
4774 case ImageFormatRg16Snorm:
4775 case ImageFormatRgba16Snorm:
4776 case ImageFormatR8Snorm:
4777 case ImageFormatRg8Snorm:
4778 case ImageFormatR8ui:
4779 case ImageFormatRg8ui:
4780 case ImageFormatR16ui:
4781 case ImageFormatRgb10a2ui:
4782 case ImageFormatR8i:
4783 case ImageFormatRg8i:
4784 case ImageFormatR16i:
4785 return true;
4786 default:
4787 break;
4788 }
4789
4790 return false;
4791}
4792
4793// An image is determined to be a depth image if it is marked as a depth image and is not also
4794// explicitly marked with a color format, or if there are any sample/gather compare operations on it.
4795bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
4796{
4797 return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(x: id);
4798}
4799
4800bool Compiler::type_is_opaque_value(const SPIRType &type) const
4801{
4802 return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
4803 type.basetype == SPIRType::Sampler);
4804}
4805
4806// Make these member functions so we can easily break on any force_recompile events.
4807void Compiler::force_recompile()
4808{
4809 is_force_recompile = true;
4810}
4811
4812void Compiler::force_recompile_guarantee_forward_progress()
4813{
4814 force_recompile();
4815 is_force_recompile_forward_progress = true;
4816}
4817
4818bool Compiler::is_forcing_recompilation() const
4819{
4820 return is_force_recompile;
4821}
4822
4823void Compiler::clear_force_recompile()
4824{
4825 is_force_recompile = false;
4826 is_force_recompile_forward_progress = false;
4827}
4828
4829Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
4830 : compiler(compiler_)
4831{
4832}
4833
4834Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const
4835{
4836 auto chain_itr = access_chain_to_physical_block.find(x: id);
4837 if (chain_itr != access_chain_to_physical_block.end())
4838 return chain_itr->second;
4839 else
4840 return nullptr;
4841}
4842
4843void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length)
4844{
4845 uint32_t mask = *args;
4846 args++;
4847 length--;
4848 if (length && (mask & MemoryAccessVolatileMask) != 0)
4849 {
4850 args++;
4851 length--;
4852 }
4853
4854 if (length && (mask & MemoryAccessAlignedMask) != 0)
4855 {
4856 uint32_t alignment = *args;
4857 auto *meta = find_block_meta(id);
4858
4859 // This makes the assumption that the application does not rely on insane edge cases like:
4860 // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment.
4861 // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should
4862 // actually have alignment of 8 bytes, but this is too theoretical and awkward to support.
4863 // We could potentially keep track of any offset in the access chain, but it's
4864 // practically impossible for high level compilers to emit code like that,
4865 // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine.
4866 if (meta && alignment > meta->alignment)
4867 meta->alignment = alignment;
4868 }
4869}
4870
4871bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const
4872{
4873 auto &type = compiler.get<SPIRType>(id: type_id);
4874 return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer &&
4875 type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type);
4876}
4877
4878uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
4879{
4880 if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
4881 return 8;
4882 else if (type.basetype == SPIRType::Struct)
4883 {
4884 uint32_t alignment = 0;
4885 for (auto &member_type : type.member_types)
4886 {
4887 uint32_t member_align = get_minimum_scalar_alignment(type: compiler.get<SPIRType>(id: member_type));
4888 if (member_align > alignment)
4889 alignment = member_align;
4890 }
4891 return alignment;
4892 }
4893 else
4894 return type.width / 8;
4895}
4896
4897void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id)
4898{
4899 if (type_is_bda_block_entry(type_id))
4900 {
4901 auto &meta = physical_block_type_meta[type_id];
4902 access_chain_to_physical_block[var_id] = &meta;
4903
4904 auto &type = compiler.get<SPIRType>(id: type_id);
4905 if (type.basetype != SPIRType::Struct)
4906 non_block_types.insert(x: type_id);
4907
4908 if (meta.alignment == 0)
4909 meta.alignment = get_minimum_scalar_alignment(type: compiler.get_pointee_type(type));
4910 }
4911}
4912
4913bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length)
4914{
4915 // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type.
4916 // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment
4917 // requirements.
4918 switch (op)
4919 {
4920 case OpConvertUToPtr:
4921 case OpBitcast:
4922 case OpCompositeExtract:
4923 // Extract can begin a new chain if we had a struct or array of pointers as input.
4924 // We don't begin chains before we have a pure scalar pointer.
4925 setup_meta_chain(type_id: args[0], var_id: args[1]);
4926 break;
4927
4928 case OpAccessChain:
4929 case OpInBoundsAccessChain:
4930 case OpPtrAccessChain:
4931 case OpCopyObject:
4932 {
4933 auto itr = access_chain_to_physical_block.find(x: args[2]);
4934 if (itr != access_chain_to_physical_block.end())
4935 access_chain_to_physical_block[args[1]] = itr->second;
4936 break;
4937 }
4938
4939 case OpLoad:
4940 {
4941 setup_meta_chain(type_id: args[0], var_id: args[1]);
4942 if (length >= 4)
4943 mark_aligned_access(id: args[2], args: args + 3, length: length - 3);
4944 break;
4945 }
4946
4947 case OpStore:
4948 {
4949 if (length >= 3)
4950 mark_aligned_access(id: args[0], args: args + 2, length: length - 2);
4951 break;
4952 }
4953
4954 default:
4955 break;
4956 }
4957
4958 return true;
4959}
4960
4961uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const
4962{
4963 auto *type = &compiler.get<SPIRType>(id: type_id);
4964 while (type->pointer &&
4965 type->storage == StorageClassPhysicalStorageBufferEXT &&
4966 !type_is_bda_block_entry(type_id))
4967 {
4968 type_id = type->parent_type;
4969 type = &compiler.get<SPIRType>(id: type_id);
4970 }
4971
4972 assert(type_is_bda_block_entry(type_id));
4973 return type_id;
4974}
4975
4976void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
4977{
4978 for (auto &member : type.member_types)
4979 {
4980 auto &subtype = compiler.get<SPIRType>(id: member);
4981 if (subtype.basetype != SPIRType::Struct && subtype.pointer &&
4982 subtype.storage == spv::StorageClassPhysicalStorageBufferEXT)
4983 {
4984 non_block_types.insert(x: get_base_non_block_type_id(type_id: member));
4985 }
4986 else if (subtype.basetype == SPIRType::Struct && !subtype.pointer)
4987 analyze_non_block_types_from_block(type: subtype);
4988 }
4989}
4990
4991void Compiler::analyze_non_block_pointer_types()
4992{
4993 PhysicalStorageBufferPointerHandler handler(*this);
4994 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4995
4996 // Analyze any block declaration we have to make. It might contain
4997 // physical pointers to POD types which we never used, and thus never added to the list.
4998 // We'll need to add those pointer types to the set of types we declare.
4999 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t, SPIRType &type) {
5000 if (has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock))
5001 handler.analyze_non_block_types_from_block(type);
5002 });
5003
5004 physical_storage_non_block_pointer_types.reserve(count: handler.non_block_types.size());
5005 for (auto type : handler.non_block_types)
5006 physical_storage_non_block_pointer_types.push_back(t: type);
5007 sort(first: begin(cont&: physical_storage_non_block_pointer_types), last: end(cont&: physical_storage_non_block_pointer_types));
5008 physical_storage_type_to_alignment = std::move(handler.physical_block_type_meta);
5009}
5010
5011bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
5012{
5013 if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
5014 {
5015 if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
5016 {
5017 // Most complex case, we have no sensible way of dealing with this
5018 // other than taking the 100% conservative approach, exit early.
5019 split_function_case = true;
5020 return false;
5021 }
5022 else
5023 {
5024 interlock_function_id = call_stack.back();
5025 // If this call is performed inside control flow we have a problem.
5026 auto &cfg = compiler.get_cfg_for_function(id: interlock_function_id);
5027
5028 uint32_t from_block_id = compiler.get<SPIRFunction>(id: interlock_function_id).entry_block;
5029 bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: from_block_id, to: current_block_id);
5030 if (!outside_control_flow)
5031 control_flow_interlock = true;
5032 }
5033 }
5034 return true;
5035}
5036
5037void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
5038{
5039 current_block_id = block.self;
5040}
5041
5042bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5043{
5044 if (length < 3)
5045 return false;
5046 call_stack.push_back(t: args[2]);
5047 return true;
5048}
5049
5050bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
5051{
5052 call_stack.pop_back();
5053 return true;
5054}
5055
5056bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5057{
5058 if (length < 3)
5059 return false;
5060
5061 if (args[2] == interlock_function_id)
5062 call_stack_is_interlocked = true;
5063
5064 call_stack.push_back(t: args[2]);
5065 return true;
5066}
5067
5068bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
5069{
5070 if (call_stack.back() == interlock_function_id)
5071 call_stack_is_interlocked = false;
5072
5073 call_stack.pop_back();
5074 return true;
5075}
5076
5077void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
5078{
5079 if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
5080 split_function_case)
5081 {
5082 compiler.interlocked_resources.insert(x: id);
5083 }
5084}
5085
5086bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
5087{
5088 // Only care about critical section analysis if we have simple case.
5089 if (use_critical_section)
5090 {
5091 if (opcode == OpBeginInvocationInterlockEXT)
5092 {
5093 in_crit_sec = true;
5094 return true;
5095 }
5096
5097 if (opcode == OpEndInvocationInterlockEXT)
5098 {
5099 // End critical section--nothing more to do.
5100 return false;
5101 }
5102 }
5103
5104 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
5105 switch (opcode)
5106 {
5107 case OpLoad:
5108 {
5109 if (length < 3)
5110 return false;
5111
5112 uint32_t ptr = args[2];
5113 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5114
5115 // We're only concerned with buffer and image memory here.
5116 if (!var)
5117 break;
5118
5119 switch (var->storage)
5120 {
5121 default:
5122 break;
5123
5124 case StorageClassUniformConstant:
5125 {
5126 uint32_t result_type = args[0];
5127 uint32_t id = args[1];
5128 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5129 compiler.register_read(expr: id, chain: ptr, forwarded: true);
5130 break;
5131 }
5132
5133 case StorageClassUniform:
5134 // Must have BufferBlock; we only care about SSBOs.
5135 if (!compiler.has_decoration(id: compiler.get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
5136 break;
5137 // fallthrough
5138 case StorageClassStorageBuffer:
5139 access_potential_resource(id: var->self);
5140 break;
5141 }
5142 break;
5143 }
5144
5145 case OpInBoundsAccessChain:
5146 case OpAccessChain:
5147 case OpPtrAccessChain:
5148 {
5149 if (length < 3)
5150 return false;
5151
5152 uint32_t result_type = args[0];
5153
5154 auto &type = compiler.get<SPIRType>(id: result_type);
5155 if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
5156 type.storage == StorageClassStorageBuffer)
5157 {
5158 uint32_t id = args[1];
5159 uint32_t ptr = args[2];
5160 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5161 compiler.register_read(expr: id, chain: ptr, forwarded: true);
5162 compiler.ir.ids[id].set_allow_type_rewrite();
5163 }
5164 break;
5165 }
5166
5167 case OpImageTexelPointer:
5168 {
5169 if (length < 3)
5170 return false;
5171
5172 uint32_t result_type = args[0];
5173 uint32_t id = args[1];
5174 uint32_t ptr = args[2];
5175 auto &e = compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5176 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5177 if (var)
5178 e.loaded_from = var->self;
5179 break;
5180 }
5181
5182 case OpStore:
5183 case OpImageWrite:
5184 case OpAtomicStore:
5185 {
5186 if (length < 1)
5187 return false;
5188
5189 uint32_t ptr = args[0];
5190 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5191 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5192 var->storage == StorageClassStorageBuffer))
5193 {
5194 access_potential_resource(id: var->self);
5195 }
5196
5197 break;
5198 }
5199
5200 case OpCopyMemory:
5201 {
5202 if (length < 2)
5203 return false;
5204
5205 uint32_t dst = args[0];
5206 uint32_t src = args[1];
5207 auto *dst_var = compiler.maybe_get_backing_variable(chain: dst);
5208 auto *src_var = compiler.maybe_get_backing_variable(chain: src);
5209
5210 if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
5211 access_potential_resource(id: dst_var->self);
5212
5213 if (src_var)
5214 {
5215 if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
5216 break;
5217
5218 if (src_var->storage == StorageClassUniform &&
5219 !compiler.has_decoration(id: compiler.get<SPIRType>(id: src_var->basetype).self, decoration: DecorationBufferBlock))
5220 {
5221 break;
5222 }
5223
5224 access_potential_resource(id: src_var->self);
5225 }
5226
5227 break;
5228 }
5229
5230 case OpImageRead:
5231 case OpAtomicLoad:
5232 {
5233 if (length < 3)
5234 return false;
5235
5236 uint32_t ptr = args[2];
5237 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5238
5239 // We're only concerned with buffer and image memory here.
5240 if (!var)
5241 break;
5242
5243 switch (var->storage)
5244 {
5245 default:
5246 break;
5247
5248 case StorageClassUniform:
5249 // Must have BufferBlock; we only care about SSBOs.
5250 if (!compiler.has_decoration(id: compiler.get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
5251 break;
5252 // fallthrough
5253 case StorageClassUniformConstant:
5254 case StorageClassStorageBuffer:
5255 access_potential_resource(id: var->self);
5256 break;
5257 }
5258 break;
5259 }
5260
5261 case OpAtomicExchange:
5262 case OpAtomicCompareExchange:
5263 case OpAtomicIIncrement:
5264 case OpAtomicIDecrement:
5265 case OpAtomicIAdd:
5266 case OpAtomicISub:
5267 case OpAtomicSMin:
5268 case OpAtomicUMin:
5269 case OpAtomicSMax:
5270 case OpAtomicUMax:
5271 case OpAtomicAnd:
5272 case OpAtomicOr:
5273 case OpAtomicXor:
5274 {
5275 if (length < 3)
5276 return false;
5277
5278 uint32_t ptr = args[2];
5279 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5280 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5281 var->storage == StorageClassStorageBuffer))
5282 {
5283 access_potential_resource(id: var->self);
5284 }
5285
5286 break;
5287 }
5288
5289 default:
5290 break;
5291 }
5292
5293 return true;
5294}
5295
5296void Compiler::analyze_interlocked_resource_usage()
5297{
5298 if (get_execution_model() == ExecutionModelFragment &&
5299 (get_entry_point().flags.get(bit: ExecutionModePixelInterlockOrderedEXT) ||
5300 get_entry_point().flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) ||
5301 get_entry_point().flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) ||
5302 get_entry_point().flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT)))
5303 {
5304 InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
5305 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler&: prepass_handler);
5306
5307 InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
5308 handler.interlock_function_id = prepass_handler.interlock_function_id;
5309 handler.split_function_case = prepass_handler.split_function_case;
5310 handler.control_flow_interlock = prepass_handler.control_flow_interlock;
5311 handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
5312
5313 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
5314
5315 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
5316 interlocked_is_complex =
5317 !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
5318 }
5319}
5320
5321bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
5322{
5323 if (!type.pointer)
5324 return false;
5325
5326 // If parent type has same pointer depth, we must have an array of pointers.
5327 return type.pointer_depth == get<SPIRType>(id: type.parent_type).pointer_depth;
5328}
5329
5330bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const
5331{
5332 return type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
5333 type.pointer_depth > get<SPIRType>(id: type.parent_type).pointer_depth;
5334}
5335
5336bool Compiler::flush_phi_required(BlockID from, BlockID to) const
5337{
5338 auto &child = get<SPIRBlock>(id: to);
5339 for (auto &phi : child.phi_variables)
5340 if (phi.parent == from)
5341 return true;
5342 return false;
5343}
5344
5345void Compiler::add_loop_level()
5346{
5347 current_loop_level++;
5348}
5349

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_cross.cpp