1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_cross.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_cfg.hpp"
27#include "spirv_common.hpp"
28#include "spirv_parser.hpp"
29#include <algorithm>
30#include <cstring>
31#include <utility>
32
33using namespace std;
34using namespace spv;
35using namespace SPIRV_CROSS_NAMESPACE;
36
37Compiler::Compiler(vector<uint32_t> ir_)
38{
39 Parser parser(std::move(ir_));
40 parser.parse();
41 set_ir(std::move(parser.get_parsed_ir()));
42}
43
44Compiler::Compiler(const uint32_t *ir_, size_t word_count)
45{
46 Parser parser(ir_, word_count);
47 parser.parse();
48 set_ir(std::move(parser.get_parsed_ir()));
49}
50
51Compiler::Compiler(const ParsedIR &ir_)
52{
53 set_ir(ir_);
54}
55
56Compiler::Compiler(ParsedIR &&ir_)
57{
58 set_ir(std::move(ir_));
59}
60
61void Compiler::set_ir(ParsedIR &&ir_)
62{
63 ir = std::move(ir_);
64 parse_fixup();
65}
66
67void Compiler::set_ir(const ParsedIR &ir_)
68{
69 ir = ir_;
70 parse_fixup();
71}
72
73string Compiler::compile()
74{
75 return "";
76}
77
78bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
79{
80 auto &type = get<SPIRType>(id: v.basetype);
81 bool ssbo = v.storage == StorageClassStorageBuffer ||
82 ir.meta[type.self].decoration.decoration_flags.get(bit: DecorationBufferBlock);
83 bool image = type.basetype == SPIRType::Image;
84 bool counter = type.basetype == SPIRType::AtomicCounter;
85 bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
86
87 bool is_restrict;
88 if (ssbo)
89 is_restrict = ir.get_buffer_block_flags(var: v).get(bit: DecorationRestrict);
90 else
91 is_restrict = has_decoration(id: v.self, decoration: DecorationRestrict);
92
93 return !is_restrict && (ssbo || image || counter || buffer_reference);
94}
95
96bool Compiler::block_is_control_dependent(const SPIRBlock &block)
97{
98 for (auto &i : block.ops)
99 {
100 auto ops = stream(instr: i);
101 auto op = static_cast<Op>(i.op);
102
103 switch (op)
104 {
105 case OpFunctionCall:
106 {
107 uint32_t func = ops[2];
108 if (function_is_control_dependent(func: get<SPIRFunction>(id: func)))
109 return true;
110 break;
111 }
112
113 // Derivatives
114 case OpDPdx:
115 case OpDPdxCoarse:
116 case OpDPdxFine:
117 case OpDPdy:
118 case OpDPdyCoarse:
119 case OpDPdyFine:
120 case OpFwidth:
121 case OpFwidthCoarse:
122 case OpFwidthFine:
123
124 // Anything implicit LOD
125 case OpImageSampleImplicitLod:
126 case OpImageSampleDrefImplicitLod:
127 case OpImageSampleProjImplicitLod:
128 case OpImageSampleProjDrefImplicitLod:
129 case OpImageSparseSampleImplicitLod:
130 case OpImageSparseSampleDrefImplicitLod:
131 case OpImageSparseSampleProjImplicitLod:
132 case OpImageSparseSampleProjDrefImplicitLod:
133 case OpImageQueryLod:
134 case OpImageDrefGather:
135 case OpImageGather:
136 case OpImageSparseDrefGather:
137 case OpImageSparseGather:
138
139 // Anything subgroups
140 case OpGroupNonUniformElect:
141 case OpGroupNonUniformAll:
142 case OpGroupNonUniformAny:
143 case OpGroupNonUniformAllEqual:
144 case OpGroupNonUniformBroadcast:
145 case OpGroupNonUniformBroadcastFirst:
146 case OpGroupNonUniformBallot:
147 case OpGroupNonUniformInverseBallot:
148 case OpGroupNonUniformBallotBitExtract:
149 case OpGroupNonUniformBallotBitCount:
150 case OpGroupNonUniformBallotFindLSB:
151 case OpGroupNonUniformBallotFindMSB:
152 case OpGroupNonUniformShuffle:
153 case OpGroupNonUniformShuffleXor:
154 case OpGroupNonUniformShuffleUp:
155 case OpGroupNonUniformShuffleDown:
156 case OpGroupNonUniformIAdd:
157 case OpGroupNonUniformFAdd:
158 case OpGroupNonUniformIMul:
159 case OpGroupNonUniformFMul:
160 case OpGroupNonUniformSMin:
161 case OpGroupNonUniformUMin:
162 case OpGroupNonUniformFMin:
163 case OpGroupNonUniformSMax:
164 case OpGroupNonUniformUMax:
165 case OpGroupNonUniformFMax:
166 case OpGroupNonUniformBitwiseAnd:
167 case OpGroupNonUniformBitwiseOr:
168 case OpGroupNonUniformBitwiseXor:
169 case OpGroupNonUniformLogicalAnd:
170 case OpGroupNonUniformLogicalOr:
171 case OpGroupNonUniformLogicalXor:
172 case OpGroupNonUniformQuadBroadcast:
173 case OpGroupNonUniformQuadSwap:
174
175 // Control barriers
176 case OpControlBarrier:
177 return true;
178
179 default:
180 break;
181 }
182 }
183
184 return false;
185}
186
187bool Compiler::block_is_pure(const SPIRBlock &block)
188{
189 // This is a global side effect of the function.
190 if (block.terminator == SPIRBlock::Kill ||
191 block.terminator == SPIRBlock::TerminateRay ||
192 block.terminator == SPIRBlock::IgnoreIntersection ||
193 block.terminator == SPIRBlock::EmitMeshTasks)
194 return false;
195
196 for (auto &i : block.ops)
197 {
198 auto ops = stream(instr: i);
199 auto op = static_cast<Op>(i.op);
200
201 switch (op)
202 {
203 case OpFunctionCall:
204 {
205 uint32_t func = ops[2];
206 if (!function_is_pure(func: get<SPIRFunction>(id: func)))
207 return false;
208 break;
209 }
210
211 case OpCopyMemory:
212 case OpStore:
213 {
214 auto &type = expression_type(id: ops[0]);
215 if (type.storage != StorageClassFunction)
216 return false;
217 break;
218 }
219
220 case OpImageWrite:
221 return false;
222
223 // Atomics are impure.
224 case OpAtomicLoad:
225 case OpAtomicStore:
226 case OpAtomicExchange:
227 case OpAtomicCompareExchange:
228 case OpAtomicCompareExchangeWeak:
229 case OpAtomicIIncrement:
230 case OpAtomicIDecrement:
231 case OpAtomicIAdd:
232 case OpAtomicISub:
233 case OpAtomicSMin:
234 case OpAtomicUMin:
235 case OpAtomicSMax:
236 case OpAtomicUMax:
237 case OpAtomicAnd:
238 case OpAtomicOr:
239 case OpAtomicXor:
240 return false;
241
242 // Geometry shader builtins modify global state.
243 case OpEndPrimitive:
244 case OpEmitStreamVertex:
245 case OpEndStreamPrimitive:
246 case OpEmitVertex:
247 return false;
248
249 // Mesh shader functions modify global state.
250 // (EmitMeshTasks is a terminator).
251 case OpSetMeshOutputsEXT:
252 return false;
253
254 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
255 case OpControlBarrier:
256 case OpMemoryBarrier:
257 return false;
258
259 // Ray tracing builtins are impure.
260 case OpReportIntersectionKHR:
261 case OpIgnoreIntersectionNV:
262 case OpTerminateRayNV:
263 case OpTraceNV:
264 case OpTraceRayKHR:
265 case OpExecuteCallableNV:
266 case OpExecuteCallableKHR:
267 case OpRayQueryInitializeKHR:
268 case OpRayQueryTerminateKHR:
269 case OpRayQueryGenerateIntersectionKHR:
270 case OpRayQueryConfirmIntersectionKHR:
271 case OpRayQueryProceedKHR:
272 // There are various getters in ray query, but they are considered pure.
273 return false;
274
275 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
276
277 case OpDemoteToHelperInvocationEXT:
278 // This is a global side effect of the function.
279 return false;
280
281 case OpExtInst:
282 {
283 uint32_t extension_set = ops[2];
284 if (get<SPIRExtension>(id: extension_set).ext == SPIRExtension::GLSL)
285 {
286 auto op_450 = static_cast<GLSLstd450>(ops[3]);
287 switch (op_450)
288 {
289 case GLSLstd450Modf:
290 case GLSLstd450Frexp:
291 {
292 auto &type = expression_type(id: ops[5]);
293 if (type.storage != StorageClassFunction)
294 return false;
295 break;
296 }
297
298 default:
299 break;
300 }
301 }
302 break;
303 }
304
305 default:
306 break;
307 }
308 }
309
310 return true;
311}
312
313string Compiler::to_name(uint32_t id, bool allow_alias) const
314{
315 if (allow_alias && ir.ids[id].get_type() == TypeType)
316 {
317 // If this type is a simple alias, emit the
318 // name of the original type instead.
319 // We don't want to override the meta alias
320 // as that can be overridden by the reflection APIs after parse.
321 auto &type = get<SPIRType>(id);
322 if (type.type_alias)
323 {
324 // If the alias master has been specially packed, we will have emitted a clean variant as well,
325 // so skip the name aliasing here.
326 if (!has_extended_decoration(id: type.type_alias, decoration: SPIRVCrossDecorationBufferBlockRepacked))
327 return to_name(id: type.type_alias);
328 }
329 }
330
331 auto &alias = ir.get_name(id);
332 if (alias.empty())
333 return join(ts: "_", ts&: id);
334 else
335 return alias;
336}
337
338bool Compiler::function_is_pure(const SPIRFunction &func)
339{
340 for (auto block : func.blocks)
341 if (!block_is_pure(block: get<SPIRBlock>(id: block)))
342 return false;
343
344 return true;
345}
346
347bool Compiler::function_is_control_dependent(const SPIRFunction &func)
348{
349 for (auto block : func.blocks)
350 if (block_is_control_dependent(block: get<SPIRBlock>(id: block)))
351 return true;
352
353 return false;
354}
355
356void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
357{
358 for (auto &i : block.ops)
359 {
360 auto ops = stream(instr: i);
361 auto op = static_cast<Op>(i.op);
362
363 switch (op)
364 {
365 case OpFunctionCall:
366 {
367 uint32_t func = ops[2];
368 register_global_read_dependencies(func: get<SPIRFunction>(id: func), id);
369 break;
370 }
371
372 case OpLoad:
373 case OpImageRead:
374 {
375 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
376 auto *var = maybe_get_backing_variable(chain: ops[2]);
377 if (var && var->storage != StorageClassFunction)
378 {
379 auto &type = get<SPIRType>(id: var->basetype);
380
381 // InputTargets are immutable.
382 if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData)
383 var->dependees.push_back(t: id);
384 }
385 break;
386 }
387
388 default:
389 break;
390 }
391 }
392}
393
394void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id)
395{
396 for (auto block : func.blocks)
397 register_global_read_dependencies(block: get<SPIRBlock>(id: block), id);
398}
399
400SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
401{
402 auto *var = maybe_get<SPIRVariable>(id: chain);
403 if (!var)
404 {
405 auto *cexpr = maybe_get<SPIRExpression>(id: chain);
406 if (cexpr)
407 var = maybe_get<SPIRVariable>(id: cexpr->loaded_from);
408
409 auto *access_chain = maybe_get<SPIRAccessChain>(id: chain);
410 if (access_chain)
411 var = maybe_get<SPIRVariable>(id: access_chain->loaded_from);
412 }
413
414 return var;
415}
416
417void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
418{
419 auto &e = get<SPIRExpression>(id: expr);
420 auto *var = maybe_get_backing_variable(chain);
421
422 if (var)
423 {
424 e.loaded_from = var->self;
425
426 // If the backing variable is immutable, we do not need to depend on the variable.
427 if (forwarded && !is_immutable(id: var->self))
428 var->dependees.push_back(t: e.self);
429
430 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
431 // The default is "in" however, so we never invalidate our compilation by reading.
432 if (var && var->parameter)
433 var->parameter->read_count++;
434 }
435}
436
437void Compiler::register_write(uint32_t chain)
438{
439 auto *var = maybe_get<SPIRVariable>(id: chain);
440 if (!var)
441 {
442 // If we're storing through an access chain, invalidate the backing variable instead.
443 auto *expr = maybe_get<SPIRExpression>(id: chain);
444 if (expr && expr->loaded_from)
445 var = maybe_get<SPIRVariable>(id: expr->loaded_from);
446
447 auto *access_chain = maybe_get<SPIRAccessChain>(id: chain);
448 if (access_chain && access_chain->loaded_from)
449 var = maybe_get<SPIRVariable>(id: access_chain->loaded_from);
450 }
451
452 auto &chain_type = expression_type(id: chain);
453
454 if (var)
455 {
456 bool check_argument_storage_qualifier = true;
457 auto &type = expression_type(id: chain);
458
459 // If our variable is in a storage class which can alias with other buffers,
460 // invalidate all variables which depend on aliased variables. And if this is a
461 // variable pointer, then invalidate all variables regardless.
462 if (get_variable_data_type(var: *var).pointer)
463 {
464 flush_all_active_variables();
465
466 if (type.pointer_depth == 1)
467 {
468 // We have a backing variable which is a pointer-to-pointer type.
469 // We are storing some data through a pointer acquired through that variable,
470 // but we are not writing to the value of the variable itself,
471 // i.e., we are not modifying the pointer directly.
472 // If we are storing a non-pointer type (pointer_depth == 1),
473 // we know that we are storing some unrelated data.
474 // A case here would be
475 // void foo(Foo * const *arg) {
476 // Foo *bar = *arg;
477 // bar->unrelated = 42;
478 // }
479 // arg, the argument is constant.
480 check_argument_storage_qualifier = false;
481 }
482 }
483
484 if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(v: *var))
485 flush_all_aliased_variables();
486 else if (var)
487 flush_dependees(var&: *var);
488
489 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
490 if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0)
491 {
492 var->parameter->write_count++;
493 force_recompile();
494 }
495 }
496 else if (chain_type.pointer)
497 {
498 // If we stored through a variable pointer, then we don't know which
499 // variable we stored to. So *all* expressions after this point need to
500 // be invalidated.
501 // FIXME: If we can prove that the variable pointer will point to
502 // only certain variables, we can invalidate only those.
503 flush_all_active_variables();
504 }
505
506 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
507 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
508}
509
510void Compiler::flush_dependees(SPIRVariable &var)
511{
512 for (auto expr : var.dependees)
513 invalid_expressions.insert(x: expr);
514 var.dependees.clear();
515}
516
517void Compiler::flush_all_aliased_variables()
518{
519 for (auto aliased : aliased_variables)
520 flush_dependees(var&: get<SPIRVariable>(id: aliased));
521}
522
523void Compiler::flush_all_atomic_capable_variables()
524{
525 for (auto global : global_variables)
526 flush_dependees(var&: get<SPIRVariable>(id: global));
527 flush_all_aliased_variables();
528}
529
530void Compiler::flush_control_dependent_expressions(uint32_t block_id)
531{
532 auto &block = get<SPIRBlock>(id: block_id);
533 for (auto &expr : block.invalidate_expressions)
534 invalid_expressions.insert(x: expr);
535 block.invalidate_expressions.clear();
536}
537
538void Compiler::flush_all_active_variables()
539{
540 // Invalidate all temporaries we read from variables in this block since they were forwarded.
541 // Invalidate all temporaries we read from globals.
542 for (auto &v : current_function->local_variables)
543 flush_dependees(var&: get<SPIRVariable>(id: v));
544 for (auto &arg : current_function->arguments)
545 flush_dependees(var&: get<SPIRVariable>(id: arg.id));
546 for (auto global : global_variables)
547 flush_dependees(var&: get<SPIRVariable>(id: global));
548
549 flush_all_aliased_variables();
550}
551
552uint32_t Compiler::expression_type_id(uint32_t id) const
553{
554 switch (ir.ids[id].get_type())
555 {
556 case TypeVariable:
557 return get<SPIRVariable>(id).basetype;
558
559 case TypeExpression:
560 return get<SPIRExpression>(id).expression_type;
561
562 case TypeConstant:
563 return get<SPIRConstant>(id).constant_type;
564
565 case TypeConstantOp:
566 return get<SPIRConstantOp>(id).basetype;
567
568 case TypeUndef:
569 return get<SPIRUndef>(id).basetype;
570
571 case TypeCombinedImageSampler:
572 return get<SPIRCombinedImageSampler>(id).combined_type;
573
574 case TypeAccessChain:
575 return get<SPIRAccessChain>(id).basetype;
576
577 default:
578 SPIRV_CROSS_THROW("Cannot resolve expression type.");
579 }
580}
581
582const SPIRType &Compiler::expression_type(uint32_t id) const
583{
584 return get<SPIRType>(id: expression_type_id(id));
585}
586
587bool Compiler::expression_is_lvalue(uint32_t id) const
588{
589 auto &type = expression_type(id);
590 switch (type.basetype)
591 {
592 case SPIRType::SampledImage:
593 case SPIRType::Image:
594 case SPIRType::Sampler:
595 return false;
596
597 default:
598 return true;
599 }
600}
601
602bool Compiler::is_immutable(uint32_t id) const
603{
604 if (ir.ids[id].get_type() == TypeVariable)
605 {
606 auto &var = get<SPIRVariable>(id);
607
608 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
609 bool pointer_to_const = var.storage == StorageClassUniformConstant;
610 return pointer_to_const || var.phi_variable || !expression_is_lvalue(id);
611 }
612 else if (ir.ids[id].get_type() == TypeAccessChain)
613 return get<SPIRAccessChain>(id).immutable;
614 else if (ir.ids[id].get_type() == TypeExpression)
615 return get<SPIRExpression>(id).immutable;
616 else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp ||
617 ir.ids[id].get_type() == TypeUndef)
618 return true;
619 else
620 return false;
621}
622
623static inline bool storage_class_is_interface(spv::StorageClass storage)
624{
625 switch (storage)
626 {
627 case StorageClassInput:
628 case StorageClassOutput:
629 case StorageClassUniform:
630 case StorageClassUniformConstant:
631 case StorageClassAtomicCounter:
632 case StorageClassPushConstant:
633 case StorageClassStorageBuffer:
634 return true;
635
636 default:
637 return false;
638 }
639}
640
641bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const
642{
643 if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable)
644 return true;
645
646 // Combined image samplers are always considered active as they are "magic" variables.
647 if (find_if(first: begin(cont: combined_image_samplers), last: end(cont: combined_image_samplers), pred: [&var](const CombinedImageSampler &samp) {
648 return samp.combined_id == var.self;
649 }) != end(cont: combined_image_samplers))
650 {
651 return false;
652 }
653
654 // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
655 // which are not part of the entry point.
656 if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric &&
657 var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(id: var.self))
658 {
659 return true;
660 }
661
662 return check_active_interface_variables && storage_class_is_interface(storage: var.storage) &&
663 active_interface_variables.find(x: var.self) == end(cont: active_interface_variables);
664}
665
666bool Compiler::is_builtin_type(const SPIRType &type) const
667{
668 auto *type_meta = ir.find_meta(id: type.self);
669
670 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
671 if (type_meta)
672 for (auto &m : type_meta->members)
673 if (m.builtin)
674 return true;
675
676 return false;
677}
678
679bool Compiler::is_builtin_variable(const SPIRVariable &var) const
680{
681 auto *m = ir.find_meta(id: var.self);
682
683 if (var.compat_builtin || (m && m->decoration.builtin))
684 return true;
685 else
686 return is_builtin_type(type: get<SPIRType>(id: var.basetype));
687}
688
689bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const
690{
691 auto *type_meta = ir.find_meta(id: type.self);
692
693 if (type_meta)
694 {
695 auto &memb = type_meta->members;
696 if (index < memb.size() && memb[index].builtin)
697 {
698 if (builtin)
699 *builtin = memb[index].builtin_type;
700 return true;
701 }
702 }
703
704 return false;
705}
706
707bool Compiler::is_scalar(const SPIRType &type) const
708{
709 return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
710}
711
712bool Compiler::is_vector(const SPIRType &type) const
713{
714 return type.vecsize > 1 && type.columns == 1;
715}
716
717bool Compiler::is_matrix(const SPIRType &type) const
718{
719 return type.vecsize > 1 && type.columns > 1;
720}
721
722bool Compiler::is_array(const SPIRType &type) const
723{
724 return type.op == OpTypeArray || type.op == OpTypeRuntimeArray;
725}
726
727bool Compiler::is_pointer(const SPIRType &type) const
728{
729 return type.op == OpTypePointer && type.basetype != SPIRType::Unknown; // Ignore function pointers.
730}
731
732bool Compiler::is_physical_pointer(const SPIRType &type) const
733{
734 return type.op == OpTypePointer && type.storage == StorageClassPhysicalStorageBuffer;
735}
736
737bool Compiler::is_physical_pointer_to_buffer_block(const SPIRType &type) const
738{
739 return is_physical_pointer(type) && get_pointee_type(type).self == type.parent_type &&
740 (has_decoration(id: type.self, decoration: DecorationBlock) ||
741 has_decoration(id: type.self, decoration: DecorationBufferBlock));
742}
743
744bool Compiler::is_runtime_size_array(const SPIRType &type)
745{
746 return type.op == OpTypeRuntimeArray;
747}
748
749ShaderResources Compiler::get_shader_resources() const
750{
751 return get_shader_resources(active_variables: nullptr);
752}
753
754ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
755{
756 return get_shader_resources(active_variables: &active_variables);
757}
758
759bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
760{
761 uint32_t variable = 0;
762 switch (opcode)
763 {
764 // Need this first, otherwise, GCC complains about unhandled switch statements.
765 default:
766 break;
767
768 case OpFunctionCall:
769 {
770 // Invalid SPIR-V.
771 if (length < 3)
772 return false;
773
774 uint32_t count = length - 3;
775 args += 3;
776 for (uint32_t i = 0; i < count; i++)
777 {
778 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
779 if (var && storage_class_is_interface(storage: var->storage))
780 variables.insert(x: args[i]);
781 }
782 break;
783 }
784
785 case OpSelect:
786 {
787 // Invalid SPIR-V.
788 if (length < 5)
789 return false;
790
791 uint32_t count = length - 3;
792 args += 3;
793 for (uint32_t i = 0; i < count; i++)
794 {
795 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
796 if (var && storage_class_is_interface(storage: var->storage))
797 variables.insert(x: args[i]);
798 }
799 break;
800 }
801
802 case OpPhi:
803 {
804 // Invalid SPIR-V.
805 if (length < 2)
806 return false;
807
808 uint32_t count = length - 2;
809 args += 2;
810 for (uint32_t i = 0; i < count; i += 2)
811 {
812 auto *var = compiler.maybe_get<SPIRVariable>(id: args[i]);
813 if (var && storage_class_is_interface(storage: var->storage))
814 variables.insert(x: args[i]);
815 }
816 break;
817 }
818
819 case OpAtomicStore:
820 case OpStore:
821 // Invalid SPIR-V.
822 if (length < 1)
823 return false;
824 variable = args[0];
825 break;
826
827 case OpCopyMemory:
828 {
829 if (length < 2)
830 return false;
831
832 auto *var = compiler.maybe_get<SPIRVariable>(id: args[0]);
833 if (var && storage_class_is_interface(storage: var->storage))
834 variables.insert(x: args[0]);
835
836 var = compiler.maybe_get<SPIRVariable>(id: args[1]);
837 if (var && storage_class_is_interface(storage: var->storage))
838 variables.insert(x: args[1]);
839 break;
840 }
841
842 case OpExtInst:
843 {
844 if (length < 3)
845 return false;
846 auto &extension_set = compiler.get<SPIRExtension>(id: args[2]);
847 switch (extension_set.ext)
848 {
849 case SPIRExtension::GLSL:
850 {
851 auto op = static_cast<GLSLstd450>(args[3]);
852
853 switch (op)
854 {
855 case GLSLstd450InterpolateAtCentroid:
856 case GLSLstd450InterpolateAtSample:
857 case GLSLstd450InterpolateAtOffset:
858 {
859 auto *var = compiler.maybe_get<SPIRVariable>(id: args[4]);
860 if (var && storage_class_is_interface(storage: var->storage))
861 variables.insert(x: args[4]);
862 break;
863 }
864
865 case GLSLstd450Modf:
866 case GLSLstd450Fract:
867 {
868 auto *var = compiler.maybe_get<SPIRVariable>(id: args[5]);
869 if (var && storage_class_is_interface(storage: var->storage))
870 variables.insert(x: args[5]);
871 break;
872 }
873
874 default:
875 break;
876 }
877 break;
878 }
879 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
880 {
881 enum AMDShaderExplicitVertexParameter
882 {
883 InterpolateAtVertexAMD = 1
884 };
885
886 auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
887
888 switch (op)
889 {
890 case InterpolateAtVertexAMD:
891 {
892 auto *var = compiler.maybe_get<SPIRVariable>(id: args[4]);
893 if (var && storage_class_is_interface(storage: var->storage))
894 variables.insert(x: args[4]);
895 break;
896 }
897
898 default:
899 break;
900 }
901 break;
902 }
903 default:
904 break;
905 }
906 break;
907 }
908
909 case OpAccessChain:
910 case OpInBoundsAccessChain:
911 case OpPtrAccessChain:
912 case OpLoad:
913 case OpCopyObject:
914 case OpImageTexelPointer:
915 case OpAtomicLoad:
916 case OpAtomicExchange:
917 case OpAtomicCompareExchange:
918 case OpAtomicCompareExchangeWeak:
919 case OpAtomicIIncrement:
920 case OpAtomicIDecrement:
921 case OpAtomicIAdd:
922 case OpAtomicISub:
923 case OpAtomicSMin:
924 case OpAtomicUMin:
925 case OpAtomicSMax:
926 case OpAtomicUMax:
927 case OpAtomicAnd:
928 case OpAtomicOr:
929 case OpAtomicXor:
930 case OpArrayLength:
931 // Invalid SPIR-V.
932 if (length < 3)
933 return false;
934 variable = args[2];
935 break;
936 }
937
938 if (variable)
939 {
940 auto *var = compiler.maybe_get<SPIRVariable>(id: variable);
941 if (var && storage_class_is_interface(storage: var->storage))
942 variables.insert(x: variable);
943 }
944 return true;
945}
946
947unordered_set<VariableID> Compiler::get_active_interface_variables() const
948{
949 // Traverse the call graph and find all interface variables which are in use.
950 unordered_set<VariableID> variables;
951 InterfaceVariableAccessHandler handler(*this, variables);
952 traverse_all_reachable_opcodes(block: get<SPIRFunction>(id: ir.default_entry_point), handler);
953
954 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
955 if (var.storage != StorageClassOutput)
956 return;
957 if (!interface_variable_exists_in_entry_point(id: var.self))
958 return;
959
960 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
961 // so we should force-enable these outputs,
962 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
963 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
964 if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
965 variables.insert(x: var.self);
966 });
967
968 // If we needed to create one, we'll need it.
969 if (dummy_sampler_id)
970 variables.insert(x: dummy_sampler_id);
971
972 return variables;
973}
974
975void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
976{
977 active_interface_variables = std::move(active_variables);
978 check_active_interface_variables = true;
979}
980
981ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
982{
983 ShaderResources res;
984
985 bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
986
987 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
988 auto &type = this->get<SPIRType>(id: var.basetype);
989
990 // It is possible for uniform storage classes to be passed as function parameters, so detect
991 // that. To detect function parameters, check of StorageClass of variable is function scope.
992 if (var.storage == StorageClassFunction || !type.pointer)
993 return;
994
995 if (active_variables && active_variables->find(x: var.self) == end(cont: *active_variables))
996 return;
997
998 // In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
999 // not just IO variables.
1000 bool active_in_entry_point = true;
1001 if (ir.get_spirv_version() < 0x10400)
1002 {
1003 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
1004 active_in_entry_point = interface_variable_exists_in_entry_point(id: var.self);
1005 }
1006 else
1007 active_in_entry_point = interface_variable_exists_in_entry_point(id: var.self);
1008
1009 if (!active_in_entry_point)
1010 return;
1011
1012 bool is_builtin = is_builtin_variable(var);
1013
1014 if (is_builtin)
1015 {
1016 if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
1017 return;
1018
1019 auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
1020 BuiltInResource resource;
1021
1022 if (has_decoration(id: type.self, decoration: DecorationBlock))
1023 {
1024 resource.resource = { .id: var.self, .type_id: var.basetype, .base_type_id: type.self,
1025 .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) };
1026
1027 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1028 {
1029 resource.value_type_id = type.member_types[i];
1030 resource.builtin = BuiltIn(get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn));
1031 list.push_back(t: resource);
1032 }
1033 }
1034 else
1035 {
1036 bool strip_array =
1037 !has_decoration(id: var.self, decoration: DecorationPatch) && (
1038 get_execution_model() == ExecutionModelTessellationControl ||
1039 (get_execution_model() == ExecutionModelTessellationEvaluation &&
1040 var.storage == StorageClassInput));
1041
1042 resource.resource = { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) };
1043
1044 if (strip_array && !type.array.empty())
1045 resource.value_type_id = get_variable_data_type(var).parent_type;
1046 else
1047 resource.value_type_id = get_variable_data_type_id(var);
1048
1049 assert(resource.value_type_id);
1050
1051 resource.builtin = BuiltIn(get_decoration(id: var.self, decoration: DecorationBuiltIn));
1052 list.push_back(t: std::move(resource));
1053 }
1054 return;
1055 }
1056
1057 // Input
1058 if (var.storage == StorageClassInput)
1059 {
1060 if (has_decoration(id: type.self, decoration: DecorationBlock))
1061 {
1062 res.stage_inputs.push_back(
1063 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self,
1064 .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
1065 }
1066 else
1067 res.stage_inputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1068 }
1069 // Subpass inputs
1070 else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData)
1071 {
1072 res.subpass_inputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1073 }
1074 // Outputs
1075 else if (var.storage == StorageClassOutput)
1076 {
1077 if (has_decoration(id: type.self, decoration: DecorationBlock))
1078 {
1079 res.stage_outputs.push_back(
1080 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
1081 }
1082 else
1083 res.stage_outputs.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1084 }
1085 // UBOs
1086 else if (type.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBlock))
1087 {
1088 res.uniform_buffers.push_back(
1089 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: false) });
1090 }
1091 // Old way to declare SSBOs.
1092 else if (type.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBufferBlock))
1093 {
1094 res.storage_buffers.push_back(
1095 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: ssbo_instance_name) });
1096 }
1097 // Modern way to declare SSBOs.
1098 else if (type.storage == StorageClassStorageBuffer)
1099 {
1100 res.storage_buffers.push_back(
1101 t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: ssbo_instance_name) });
1102 }
1103 // Push constant blocks
1104 else if (type.storage == StorageClassPushConstant)
1105 {
1106 // There can only be one push constant block, but keep the vector in case this restriction is lifted
1107 // in the future.
1108 res.push_constant_buffers.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1109 }
1110 else if (type.storage == StorageClassShaderRecordBufferKHR)
1111 {
1112 res.shader_record_buffers.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_remapped_declared_block_name(id: var.self, fallback_prefer_instance_name: ssbo_instance_name) });
1113 }
1114 // Atomic counters
1115 else if (type.storage == StorageClassAtomicCounter)
1116 {
1117 res.atomic_counters.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1118 }
1119 else if (type.storage == StorageClassUniformConstant)
1120 {
1121 if (type.basetype == SPIRType::Image)
1122 {
1123 // Images
1124 if (type.image.sampled == 2)
1125 {
1126 res.storage_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1127 }
1128 // Separate images
1129 else if (type.image.sampled == 1)
1130 {
1131 res.separate_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1132 }
1133 }
1134 // Separate samplers
1135 else if (type.basetype == SPIRType::Sampler)
1136 {
1137 res.separate_samplers.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1138 }
1139 // Textures
1140 else if (type.basetype == SPIRType::SampledImage)
1141 {
1142 res.sampled_images.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1143 }
1144 // Acceleration structures
1145 else if (type.basetype == SPIRType::AccelerationStructure)
1146 {
1147 res.acceleration_structures.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1148 }
1149 else
1150 {
1151 res.gl_plain_uniforms.push_back(t: { .id: var.self, .type_id: var.basetype, .base_type_id: type.self, .name: get_name(id: var.self) });
1152 }
1153 }
1154 });
1155
1156 return res;
1157}
1158
1159bool Compiler::type_is_top_level_block(const SPIRType &type) const
1160{
1161 if (type.basetype != SPIRType::Struct)
1162 return false;
1163 return has_decoration(id: type.self, decoration: DecorationBlock) || has_decoration(id: type.self, decoration: DecorationBufferBlock);
1164}
1165
1166bool Compiler::type_is_block_like(const SPIRType &type) const
1167{
1168 if (type_is_top_level_block(type))
1169 return true;
1170
1171 if (type.basetype == SPIRType::Struct)
1172 {
1173 // Block-like types may have Offset decorations.
1174 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1175 if (has_member_decoration(id: type.self, index: i, decoration: DecorationOffset))
1176 return true;
1177 }
1178
1179 return false;
1180}
1181
1182void Compiler::parse_fixup()
1183{
1184 // Figure out specialization constants for work group sizes.
1185 for (auto id_ : ir.ids_for_constant_or_variable)
1186 {
1187 auto &id = ir.ids[id_];
1188
1189 if (id.get_type() == TypeConstant)
1190 {
1191 auto &c = id.get<SPIRConstant>();
1192 if (has_decoration(id: c.self, decoration: DecorationBuiltIn) &&
1193 BuiltIn(get_decoration(id: c.self, decoration: DecorationBuiltIn)) == BuiltInWorkgroupSize)
1194 {
1195 // In current SPIR-V, there can be just one constant like this.
1196 // All entry points will receive the constant value.
1197 // WorkgroupSize take precedence over LocalSizeId.
1198 for (auto &entry : ir.entry_points)
1199 {
1200 entry.second.workgroup_size.constant = c.self;
1201 entry.second.workgroup_size.x = c.scalar(col: 0, row: 0);
1202 entry.second.workgroup_size.y = c.scalar(col: 0, row: 1);
1203 entry.second.workgroup_size.z = c.scalar(col: 0, row: 2);
1204 }
1205 }
1206 }
1207 else if (id.get_type() == TypeVariable)
1208 {
1209 auto &var = id.get<SPIRVariable>();
1210 if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
1211 var.storage == StorageClassTaskPayloadWorkgroupEXT ||
1212 var.storage == StorageClassOutput)
1213 {
1214 global_variables.push_back(t: var.self);
1215 }
1216 if (variable_storage_is_aliased(v: var))
1217 aliased_variables.push_back(t: var.self);
1218 }
1219 }
1220}
1221
1222void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
1223 string &name)
1224{
1225 if (name.empty())
1226 return;
1227
1228 const auto find_name = [&](const string &n) -> bool {
1229 if (cache_primary.find(x: n) != end(cont&: cache_primary))
1230 return true;
1231
1232 if (&cache_primary != &cache_secondary)
1233 if (cache_secondary.find(x: n) != end(cont: cache_secondary))
1234 return true;
1235
1236 return false;
1237 };
1238
1239 const auto insert_name = [&](const string &n) { cache_primary.insert(x: n); };
1240
1241 if (!find_name(name))
1242 {
1243 insert_name(name);
1244 return;
1245 }
1246
1247 uint32_t counter = 0;
1248 auto tmpname = name;
1249
1250 bool use_linked_underscore = true;
1251
1252 if (tmpname == "_")
1253 {
1254 // We cannot just append numbers, as we will end up creating internally reserved names.
1255 // Make it like _0_<counter> instead.
1256 tmpname += "0";
1257 }
1258 else if (tmpname.back() == '_')
1259 {
1260 // The last_character is an underscore, so we don't need to link in underscore.
1261 // This would violate double underscore rules.
1262 use_linked_underscore = false;
1263 }
1264
1265 // If there is a collision (very rare),
1266 // keep tacking on extra identifier until it's unique.
1267 do
1268 {
1269 counter++;
1270 name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(t: counter);
1271 } while (find_name(name));
1272 insert_name(name);
1273}
1274
1275void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
1276{
1277 update_name_cache(cache_primary&: cache, cache_secondary: cache, name);
1278}
1279
1280void Compiler::set_name(ID id, const std::string &name)
1281{
1282 ir.set_name(id, name);
1283}
1284
1285const SPIRType &Compiler::get_type(TypeID id) const
1286{
1287 return get<SPIRType>(id);
1288}
1289
1290const SPIRType &Compiler::get_type_from_variable(VariableID id) const
1291{
1292 return get<SPIRType>(id: get<SPIRVariable>(id).basetype);
1293}
1294
1295uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const
1296{
1297 auto *p_type = &get<SPIRType>(id: type_id);
1298 if (p_type->pointer)
1299 {
1300 assert(p_type->parent_type);
1301 type_id = p_type->parent_type;
1302 }
1303 return type_id;
1304}
1305
1306const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const
1307{
1308 auto *p_type = &type;
1309 if (p_type->pointer)
1310 {
1311 assert(p_type->parent_type);
1312 p_type = &get<SPIRType>(id: p_type->parent_type);
1313 }
1314 return *p_type;
1315}
1316
1317const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const
1318{
1319 return get_pointee_type(type: get<SPIRType>(id: type_id));
1320}
1321
1322uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const
1323{
1324 if (var.phi_variable || var.storage == spv::StorageClass::StorageClassAtomicCounter)
1325 return var.basetype;
1326 return get_pointee_type_id(type_id: var.basetype);
1327}
1328
1329SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var)
1330{
1331 return get<SPIRType>(id: get_variable_data_type_id(var));
1332}
1333
1334const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
1335{
1336 return get<SPIRType>(id: get_variable_data_type_id(var));
1337}
1338
1339SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
1340{
1341 SPIRType *type = &get_variable_data_type(var);
1342 if (is_array(type: *type))
1343 type = &get<SPIRType>(id: type->parent_type);
1344 return *type;
1345}
1346
1347const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
1348{
1349 const SPIRType *type = &get_variable_data_type(var);
1350 if (is_array(type: *type))
1351 type = &get<SPIRType>(id: type->parent_type);
1352 return *type;
1353}
1354
1355bool Compiler::is_sampled_image_type(const SPIRType &type)
1356{
1357 return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
1358 type.image.dim != DimBuffer;
1359}
1360
1361void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
1362 const std::string &argument)
1363{
1364 ir.set_member_decoration_string(id, index, decoration, argument);
1365}
1366
1367void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
1368{
1369 ir.set_member_decoration(id, index, decoration, argument);
1370}
1371
1372void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
1373{
1374 ir.set_member_name(id, index, name);
1375}
1376
1377const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
1378{
1379 return ir.get_member_name(id, index);
1380}
1381
1382void Compiler::set_qualified_name(uint32_t id, const string &name)
1383{
1384 ir.meta[id].decoration.qualified_alias = name;
1385}
1386
1387void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name)
1388{
1389 ir.meta[type_id].members.resize(new_size: max(a: ir.meta[type_id].members.size(), b: size_t(index) + 1));
1390 ir.meta[type_id].members[index].qualified_alias = name;
1391}
1392
1393const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
1394{
1395 auto *m = ir.find_meta(id: type_id);
1396 if (m && index < m->members.size())
1397 return m->members[index].qualified_alias;
1398 else
1399 return ir.get_empty_string();
1400}
1401
1402uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1403{
1404 return ir.get_member_decoration(id, index, decoration);
1405}
1406
1407const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
1408{
1409 return ir.get_member_decoration_bitset(id, index);
1410}
1411
1412bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1413{
1414 return ir.has_member_decoration(id, index, decoration);
1415}
1416
1417void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
1418{
1419 ir.unset_member_decoration(id, index, decoration);
1420}
1421
1422void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
1423{
1424 ir.set_decoration_string(id, decoration, argument);
1425}
1426
1427void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
1428{
1429 ir.set_decoration(id, decoration, argument);
1430}
1431
1432void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
1433{
1434 auto &dec = ir.meta[id].decoration;
1435 dec.extended.flags.set(decoration);
1436 dec.extended.values[decoration] = value;
1437}
1438
1439void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
1440 uint32_t value)
1441{
1442 ir.meta[type].members.resize(new_size: max(a: ir.meta[type].members.size(), b: size_t(index) + 1));
1443 auto &dec = ir.meta[type].members[index];
1444 dec.extended.flags.set(decoration);
1445 dec.extended.values[decoration] = value;
1446}
1447
1448static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
1449{
1450 switch (decoration)
1451 {
1452 case SPIRVCrossDecorationResourceIndexPrimary:
1453 case SPIRVCrossDecorationResourceIndexSecondary:
1454 case SPIRVCrossDecorationResourceIndexTertiary:
1455 case SPIRVCrossDecorationResourceIndexQuaternary:
1456 case SPIRVCrossDecorationInterfaceMemberIndex:
1457 return ~(0u);
1458
1459 default:
1460 return 0;
1461 }
1462}
1463
1464uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1465{
1466 auto *m = ir.find_meta(id);
1467 if (!m)
1468 return 0;
1469
1470 auto &dec = m->decoration;
1471
1472 if (!dec.extended.flags.get(bit: decoration))
1473 return get_default_extended_decoration(decoration);
1474
1475 return dec.extended.values[decoration];
1476}
1477
1478uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1479{
1480 auto *m = ir.find_meta(id: type);
1481 if (!m)
1482 return 0;
1483
1484 if (index >= m->members.size())
1485 return 0;
1486
1487 auto &dec = m->members[index];
1488 if (!dec.extended.flags.get(bit: decoration))
1489 return get_default_extended_decoration(decoration);
1490 return dec.extended.values[decoration];
1491}
1492
1493bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1494{
1495 auto *m = ir.find_meta(id);
1496 if (!m)
1497 return false;
1498
1499 auto &dec = m->decoration;
1500 return dec.extended.flags.get(bit: decoration);
1501}
1502
1503bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1504{
1505 auto *m = ir.find_meta(id: type);
1506 if (!m)
1507 return false;
1508
1509 if (index >= m->members.size())
1510 return false;
1511
1512 auto &dec = m->members[index];
1513 return dec.extended.flags.get(bit: decoration);
1514}
1515
1516void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
1517{
1518 auto &dec = ir.meta[id].decoration;
1519 dec.extended.flags.clear(bit: decoration);
1520 dec.extended.values[decoration] = 0;
1521}
1522
1523void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
1524{
1525 ir.meta[type].members.resize(new_size: max(a: ir.meta[type].members.size(), b: size_t(index) + 1));
1526 auto &dec = ir.meta[type].members[index];
1527 dec.extended.flags.clear(bit: decoration);
1528 dec.extended.values[decoration] = 0;
1529}
1530
1531StorageClass Compiler::get_storage_class(VariableID id) const
1532{
1533 return get<SPIRVariable>(id).storage;
1534}
1535
1536const std::string &Compiler::get_name(ID id) const
1537{
1538 return ir.get_name(id);
1539}
1540
1541const std::string Compiler::get_fallback_name(ID id) const
1542{
1543 return join(ts: "_", ts&: id);
1544}
1545
1546const std::string Compiler::get_block_fallback_name(VariableID id) const
1547{
1548 auto &var = get<SPIRVariable>(id);
1549 if (get_name(id).empty())
1550 return join(ts: "_", ts: get<SPIRType>(id: var.basetype).self, ts: "_", ts&: id);
1551 else
1552 return get_name(id);
1553}
1554
1555const Bitset &Compiler::get_decoration_bitset(ID id) const
1556{
1557 return ir.get_decoration_bitset(id);
1558}
1559
1560bool Compiler::has_decoration(ID id, Decoration decoration) const
1561{
1562 return ir.has_decoration(id, decoration);
1563}
1564
1565const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
1566{
1567 return ir.get_decoration_string(id, decoration);
1568}
1569
1570const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
1571{
1572 return ir.get_member_decoration_string(id, index, decoration);
1573}
1574
1575uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
1576{
1577 return ir.get_decoration(id, decoration);
1578}
1579
1580void Compiler::unset_decoration(ID id, Decoration decoration)
1581{
1582 ir.unset_decoration(id, decoration);
1583}
1584
1585bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
1586{
1587 auto *m = ir.find_meta(id);
1588 if (!m)
1589 return false;
1590
1591 auto &word_offsets = m->decoration_word_offset;
1592 auto itr = word_offsets.find(x: decoration);
1593 if (itr == end(cont: word_offsets))
1594 return false;
1595
1596 word_offset = itr->second;
1597 return true;
1598}
1599
1600bool Compiler::block_is_noop(const SPIRBlock &block) const
1601{
1602 if (block.terminator != SPIRBlock::Direct)
1603 return false;
1604
1605 auto &child = get<SPIRBlock>(id: block.next_block);
1606
1607 // If this block participates in PHI, the block isn't really noop.
1608 for (auto &phi : block.phi_variables)
1609 if (phi.parent == block.self || phi.parent == child.self)
1610 return false;
1611
1612 for (auto &phi : child.phi_variables)
1613 if (phi.parent == block.self)
1614 return false;
1615
1616 // Verify all instructions have no semantic impact.
1617 for (auto &i : block.ops)
1618 {
1619 auto op = static_cast<Op>(i.op);
1620
1621 switch (op)
1622 {
1623 // Non-Semantic instructions.
1624 case OpLine:
1625 case OpNoLine:
1626 break;
1627
1628 case OpExtInst:
1629 {
1630 auto *ops = stream(instr: i);
1631 auto ext = get<SPIRExtension>(id: ops[2]).ext;
1632
1633 bool ext_is_nonsemantic_only =
1634 ext == SPIRExtension::NonSemanticShaderDebugInfo ||
1635 ext == SPIRExtension::SPV_debug_info ||
1636 ext == SPIRExtension::NonSemanticGeneric;
1637
1638 if (!ext_is_nonsemantic_only)
1639 return false;
1640
1641 break;
1642 }
1643
1644 default:
1645 return false;
1646 }
1647 }
1648
1649 return true;
1650}
1651
1652bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
1653{
1654 // Tried and failed.
1655 if (block.disable_block_optimization || block.complex_continue)
1656 return false;
1657
1658 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
1659 {
1660 // Try to detect common for loop pattern
1661 // which the code backend can use to create cleaner code.
1662 // for(;;) { if (cond) { some_body; } else { break; } }
1663 // is the pattern we're looking for.
1664 const auto *false_block = maybe_get<SPIRBlock>(id: block.false_block);
1665 const auto *true_block = maybe_get<SPIRBlock>(id: block.true_block);
1666 const auto *merge_block = maybe_get<SPIRBlock>(id: block.merge_block);
1667
1668 bool false_block_is_merge = block.false_block == block.merge_block ||
1669 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block));
1670
1671 bool true_block_is_merge = block.true_block == block.merge_block ||
1672 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block));
1673
1674 bool positive_candidate =
1675 block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge;
1676
1677 bool negative_candidate =
1678 block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge;
1679
1680 bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop &&
1681 (positive_candidate || negative_candidate);
1682
1683 if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1684 ret = block.true_block == block.continue_block;
1685 else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1686 ret = block.false_block == block.continue_block;
1687
1688 // If we have OpPhi which depends on branches which came from our own block,
1689 // we need to flush phi variables in else block instead of a trivial break,
1690 // so we cannot assume this is a for loop candidate.
1691 if (ret)
1692 {
1693 for (auto &phi : block.phi_variables)
1694 if (phi.parent == block.self)
1695 return false;
1696
1697 auto *merge = maybe_get<SPIRBlock>(id: block.merge_block);
1698 if (merge)
1699 for (auto &phi : merge->phi_variables)
1700 if (phi.parent == block.self)
1701 return false;
1702 }
1703 return ret;
1704 }
1705 else if (method == SPIRBlock::MergeToDirectForLoop)
1706 {
1707 // Empty loop header that just sets up merge target
1708 // and branches to loop body.
1709 bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block_is_noop(block);
1710
1711 if (!ret)
1712 return false;
1713
1714 auto &child = get<SPIRBlock>(id: block.next_block);
1715
1716 const auto *false_block = maybe_get<SPIRBlock>(id: child.false_block);
1717 const auto *true_block = maybe_get<SPIRBlock>(id: child.true_block);
1718 const auto *merge_block = maybe_get<SPIRBlock>(id: block.merge_block);
1719
1720 bool false_block_is_merge = child.false_block == block.merge_block ||
1721 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block));
1722
1723 bool true_block_is_merge = child.true_block == block.merge_block ||
1724 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block));
1725
1726 bool positive_candidate =
1727 child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge;
1728
1729 bool negative_candidate =
1730 child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge;
1731
1732 ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
1733 (positive_candidate || negative_candidate);
1734
1735 if (ret)
1736 {
1737 auto *merge = maybe_get<SPIRBlock>(id: block.merge_block);
1738 if (merge)
1739 for (auto &phi : merge->phi_variables)
1740 if (phi.parent == block.self || phi.parent == child.false_block)
1741 return false;
1742 }
1743
1744 return ret;
1745 }
1746 else
1747 return false;
1748}
1749
1750bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
1751{
1752 if (!execution_is_branchless(from, to))
1753 return false;
1754
1755 auto *start = &from;
1756 for (;;)
1757 {
1758 if (start->self == to.self)
1759 return true;
1760
1761 if (!block_is_noop(block: *start))
1762 return false;
1763
1764 auto &next = get<SPIRBlock>(id: start->next_block);
1765 start = &next;
1766 }
1767}
1768
1769bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const
1770{
1771 auto *start = &from;
1772 for (;;)
1773 {
1774 if (start->self == to.self)
1775 return true;
1776
1777 if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone)
1778 start = &get<SPIRBlock>(id: start->next_block);
1779 else
1780 return false;
1781 }
1782}
1783
1784bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
1785{
1786 return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
1787}
1788
1789SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
1790{
1791 // The block was deemed too complex during code emit, pick conservative fallback paths.
1792 if (block.complex_continue)
1793 return SPIRBlock::ComplexLoop;
1794
1795 // In older glslang output continue block can be equal to the loop header.
1796 // In this case, execution is clearly branchless, so just assume a while loop header here.
1797 if (block.merge == SPIRBlock::MergeLoop)
1798 return SPIRBlock::WhileLoop;
1799
1800 if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
1801 {
1802 // Continue block is never reached from CFG.
1803 return SPIRBlock::ComplexLoop;
1804 }
1805
1806 auto &dominator = get<SPIRBlock>(id: block.loop_dominator);
1807
1808 if (execution_is_noop(from: block, to: dominator))
1809 return SPIRBlock::WhileLoop;
1810 else if (execution_is_branchless(from: block, to: dominator))
1811 return SPIRBlock::ForLoop;
1812 else
1813 {
1814 const auto *false_block = maybe_get<SPIRBlock>(id: block.false_block);
1815 const auto *true_block = maybe_get<SPIRBlock>(id: block.true_block);
1816 const auto *merge_block = maybe_get<SPIRBlock>(id: dominator.merge_block);
1817
1818 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1819 bool flush_phi_to_false = false_block && flush_phi_required(from: block.self, to: block.false_block);
1820 bool flush_phi_to_true = true_block && flush_phi_required(from: block.self, to: block.true_block);
1821 if (flush_phi_to_false || flush_phi_to_true)
1822 return SPIRBlock::ComplexLoop;
1823
1824 bool positive_do_while = block.true_block == dominator.self &&
1825 (block.false_block == dominator.merge_block ||
1826 (false_block && merge_block && execution_is_noop(from: *false_block, to: *merge_block)));
1827
1828 bool negative_do_while = block.false_block == dominator.self &&
1829 (block.true_block == dominator.merge_block ||
1830 (true_block && merge_block && execution_is_noop(from: *true_block, to: *merge_block)));
1831
1832 if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select &&
1833 (positive_do_while || negative_do_while))
1834 {
1835 return SPIRBlock::DoWhileLoop;
1836 }
1837 else
1838 return SPIRBlock::ComplexLoop;
1839 }
1840}
1841
1842const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &block) const
1843{
1844 uint32_t width = 0;
1845
1846 // First we check if we can get the type directly from the block.condition
1847 // since it can be a SPIRConstant or a SPIRVariable.
1848 if (const auto *constant = maybe_get<SPIRConstant>(id: block.condition))
1849 {
1850 const auto &type = get<SPIRType>(id: constant->constant_type);
1851 width = type.width;
1852 }
1853 else if (const auto *var = maybe_get<SPIRVariable>(id: block.condition))
1854 {
1855 const auto &type = get<SPIRType>(id: var->basetype);
1856 width = type.width;
1857 }
1858 else if (const auto *undef = maybe_get<SPIRUndef>(id: block.condition))
1859 {
1860 const auto &type = get<SPIRType>(id: undef->basetype);
1861 width = type.width;
1862 }
1863 else
1864 {
1865 auto search = ir.load_type_width.find(x: block.condition);
1866 if (search == ir.load_type_width.end())
1867 {
1868 SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement.");
1869 }
1870
1871 width = search->second;
1872 }
1873
1874 if (width > 32)
1875 return block.cases_64bit;
1876
1877 return block.cases_32bit;
1878}
1879
1880bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
1881{
1882 handler.set_current_block(block);
1883 handler.rearm_current_block(block);
1884
1885 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1886 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1887 // inside dead blocks ...
1888 for (auto &i : block.ops)
1889 {
1890 auto ops = stream(instr: i);
1891 auto op = static_cast<Op>(i.op);
1892
1893 if (!handler.handle(opcode: op, args: ops, length: i.length))
1894 return false;
1895
1896 if (op == OpFunctionCall)
1897 {
1898 auto &func = get<SPIRFunction>(id: ops[2]);
1899 if (handler.follow_function_call(func))
1900 {
1901 if (!handler.begin_function_scope(ops, i.length))
1902 return false;
1903 if (!traverse_all_reachable_opcodes(block: get<SPIRFunction>(id: ops[2]), handler))
1904 return false;
1905 if (!handler.end_function_scope(ops, i.length))
1906 return false;
1907
1908 handler.rearm_current_block(block);
1909 }
1910 }
1911 }
1912
1913 if (!handler.handle_terminator(block))
1914 return false;
1915
1916 return true;
1917}
1918
1919bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const
1920{
1921 for (auto block : func.blocks)
1922 if (!traverse_all_reachable_opcodes(block: get<SPIRBlock>(id: block), handler))
1923 return false;
1924
1925 return true;
1926}
1927
1928uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const
1929{
1930 auto *type_meta = ir.find_meta(id: type.self);
1931 if (type_meta)
1932 {
1933 // Decoration must be set in valid SPIR-V, otherwise throw.
1934 auto &dec = type_meta->members[index];
1935 if (dec.decoration_flags.get(bit: DecorationOffset))
1936 return dec.offset;
1937 else
1938 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1939 }
1940 else
1941 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1942}
1943
1944uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const
1945{
1946 auto *type_meta = ir.find_meta(id: type.member_types[index]);
1947 if (type_meta)
1948 {
1949 // Decoration must be set in valid SPIR-V, otherwise throw.
1950 // ArrayStride is part of the array type not OpMemberDecorate.
1951 auto &dec = type_meta->decoration;
1952 if (dec.decoration_flags.get(bit: DecorationArrayStride))
1953 return dec.array_stride;
1954 else
1955 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1956 }
1957 else
1958 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1959}
1960
1961uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
1962{
1963 auto *type_meta = ir.find_meta(id: type.self);
1964 if (type_meta)
1965 {
1966 // Decoration must be set in valid SPIR-V, otherwise throw.
1967 // MatrixStride is part of OpMemberDecorate.
1968 auto &dec = type_meta->members[index];
1969 if (dec.decoration_flags.get(bit: DecorationMatrixStride))
1970 return dec.matrix_stride;
1971 else
1972 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1973 }
1974 else
1975 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1976}
1977
1978size_t Compiler::get_declared_struct_size(const SPIRType &type) const
1979{
1980 if (type.member_types.empty())
1981 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1982
1983 // Offsets can be declared out of order, so we need to deduce the actual size
1984 // based on last member instead.
1985 uint32_t member_index = 0;
1986 size_t highest_offset = 0;
1987 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1988 {
1989 size_t offset = type_struct_member_offset(type, index: i);
1990 if (offset > highest_offset)
1991 {
1992 highest_offset = offset;
1993 member_index = i;
1994 }
1995 }
1996
1997 size_t size = get_declared_struct_member_size(struct_type: type, index: member_index);
1998 return highest_offset + size;
1999}
2000
2001size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
2002{
2003 if (type.member_types.empty())
2004 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2005
2006 size_t size = get_declared_struct_size(type);
2007 auto &last_type = get<SPIRType>(id: type.member_types.back());
2008 if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array
2009 size += array_size * type_struct_member_array_stride(type, index: uint32_t(type.member_types.size() - 1));
2010
2011 return size;
2012}
2013
2014uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
2015{
2016 auto &result_type = get<SPIRType>(id: spec.basetype);
2017 if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
2018 result_type.basetype != SPIRType::Boolean)
2019 {
2020 SPIRV_CROSS_THROW(
2021 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
2022 }
2023
2024 if (!is_scalar(type: result_type))
2025 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2026
2027 uint32_t value = 0;
2028
2029 const auto eval_u32 = [&](uint32_t id) -> uint32_t {
2030 auto &type = expression_type(id);
2031 if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
2032 {
2033 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
2034 "specialization constants.\n");
2035 }
2036
2037 if (!is_scalar(type))
2038 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2039 if (const auto *c = this->maybe_get<SPIRConstant>(id))
2040 return c->scalar();
2041 else
2042 return evaluate_spec_constant_u32(spec: this->get<SPIRConstantOp>(id));
2043 };
2044
2045#define binary_spec_op(op, binary_op) \
2046 case Op##op: \
2047 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
2048 break
2049#define binary_spec_op_cast(op, binary_op, type) \
2050 case Op##op: \
2051 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
2052 break
2053
2054 // Support the basic opcodes which are typically used when computing array sizes.
2055 switch (spec.opcode)
2056 {
2057 binary_spec_op(IAdd, +);
2058 binary_spec_op(ISub, -);
2059 binary_spec_op(IMul, *);
2060 binary_spec_op(BitwiseAnd, &);
2061 binary_spec_op(BitwiseOr, |);
2062 binary_spec_op(BitwiseXor, ^);
2063 binary_spec_op(LogicalAnd, &);
2064 binary_spec_op(LogicalOr, |);
2065 binary_spec_op(ShiftLeftLogical, <<);
2066 binary_spec_op(ShiftRightLogical, >>);
2067 binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
2068 binary_spec_op(LogicalEqual, ==);
2069 binary_spec_op(LogicalNotEqual, !=);
2070 binary_spec_op(IEqual, ==);
2071 binary_spec_op(INotEqual, !=);
2072 binary_spec_op(ULessThan, <);
2073 binary_spec_op(ULessThanEqual, <=);
2074 binary_spec_op(UGreaterThan, >);
2075 binary_spec_op(UGreaterThanEqual, >=);
2076 binary_spec_op_cast(SLessThan, <, int32_t);
2077 binary_spec_op_cast(SLessThanEqual, <=, int32_t);
2078 binary_spec_op_cast(SGreaterThan, >, int32_t);
2079 binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
2080#undef binary_spec_op
2081#undef binary_spec_op_cast
2082
2083 case OpLogicalNot:
2084 value = uint32_t(!eval_u32(spec.arguments[0]));
2085 break;
2086
2087 case OpNot:
2088 value = ~eval_u32(spec.arguments[0]);
2089 break;
2090
2091 case OpSNegate:
2092 value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
2093 break;
2094
2095 case OpSelect:
2096 value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
2097 break;
2098
2099 case OpUMod:
2100 {
2101 uint32_t a = eval_u32(spec.arguments[0]);
2102 uint32_t b = eval_u32(spec.arguments[1]);
2103 if (b == 0)
2104 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
2105 value = a % b;
2106 break;
2107 }
2108
2109 case OpSRem:
2110 {
2111 auto a = int32_t(eval_u32(spec.arguments[0]));
2112 auto b = int32_t(eval_u32(spec.arguments[1]));
2113 if (b == 0)
2114 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
2115 value = a % b;
2116 break;
2117 }
2118
2119 case OpSMod:
2120 {
2121 auto a = int32_t(eval_u32(spec.arguments[0]));
2122 auto b = int32_t(eval_u32(spec.arguments[1]));
2123 if (b == 0)
2124 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
2125 auto v = a % b;
2126
2127 // Makes sure we match the sign of b, not a.
2128 if ((b < 0 && v > 0) || (b > 0 && v < 0))
2129 v += b;
2130 value = v;
2131 break;
2132 }
2133
2134 case OpUDiv:
2135 {
2136 uint32_t a = eval_u32(spec.arguments[0]);
2137 uint32_t b = eval_u32(spec.arguments[1]);
2138 if (b == 0)
2139 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
2140 value = a / b;
2141 break;
2142 }
2143
2144 case OpSDiv:
2145 {
2146 auto a = int32_t(eval_u32(spec.arguments[0]));
2147 auto b = int32_t(eval_u32(spec.arguments[1]));
2148 if (b == 0)
2149 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
2150 value = a / b;
2151 break;
2152 }
2153
2154 default:
2155 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
2156 }
2157
2158 return value;
2159}
2160
2161uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
2162{
2163 if (const auto *c = maybe_get<SPIRConstant>(id))
2164 return c->scalar();
2165 else
2166 return evaluate_spec_constant_u32(spec: get<SPIRConstantOp>(id));
2167}
2168
2169size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
2170{
2171 if (struct_type.member_types.empty())
2172 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2173
2174 auto &flags = get_member_decoration_bitset(id: struct_type.self, index);
2175 auto &type = get<SPIRType>(id: struct_type.member_types[index]);
2176
2177 switch (type.basetype)
2178 {
2179 case SPIRType::Unknown:
2180 case SPIRType::Void:
2181 case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types.
2182 case SPIRType::AtomicCounter:
2183 case SPIRType::Image:
2184 case SPIRType::SampledImage:
2185 case SPIRType::Sampler:
2186 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
2187
2188 default:
2189 break;
2190 }
2191
2192 if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
2193 {
2194 // Check if this is a top-level pointer type, and not an array of pointers.
2195 if (type.pointer_depth > get<SPIRType>(id: type.parent_type).pointer_depth)
2196 return 8;
2197 }
2198
2199 if (!type.array.empty())
2200 {
2201 // For arrays, we can use ArrayStride to get an easy check.
2202 bool array_size_literal = type.array_size_literal.back();
2203 uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(id: type.array.back());
2204 return type_struct_member_array_stride(type: struct_type, index) * array_size;
2205 }
2206 else if (type.basetype == SPIRType::Struct)
2207 {
2208 return get_declared_struct_size(type);
2209 }
2210 else
2211 {
2212 unsigned vecsize = type.vecsize;
2213 unsigned columns = type.columns;
2214
2215 // Vectors.
2216 if (columns == 1)
2217 {
2218 size_t component_size = type.width / 8;
2219 return vecsize * component_size;
2220 }
2221 else
2222 {
2223 uint32_t matrix_stride = type_struct_member_matrix_stride(type: struct_type, index);
2224
2225 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
2226 if (flags.get(bit: DecorationRowMajor))
2227 return matrix_stride * vecsize;
2228 else if (flags.get(bit: DecorationColMajor))
2229 return matrix_stride * columns;
2230 else
2231 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
2232 }
2233 }
2234}
2235
2236bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2237{
2238 if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain)
2239 return true;
2240
2241 bool ptr_chain = (opcode == OpPtrAccessChain);
2242
2243 // Invalid SPIR-V.
2244 if (length < (ptr_chain ? 5u : 4u))
2245 return false;
2246
2247 if (args[2] != id)
2248 return true;
2249
2250 // Don't bother traversing the entire access chain tree yet.
2251 // If we access a struct member, assume we access the entire member.
2252 uint32_t index = compiler.get<SPIRConstant>(id: args[ptr_chain ? 4 : 3]).scalar();
2253
2254 // Seen this index already.
2255 if (seen.find(x: index) != end(cont&: seen))
2256 return true;
2257 seen.insert(x: index);
2258
2259 auto &type = compiler.expression_type(id);
2260 uint32_t offset = compiler.type_struct_member_offset(type, index);
2261
2262 size_t range;
2263 // If we have another member in the struct, deduce the range by looking at the next member.
2264 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
2265 // monotonically increasing.
2266 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
2267 // very large amounts of padding, but that's not really a big deal.
2268 if (index + 1 < type.member_types.size())
2269 {
2270 range = compiler.type_struct_member_offset(type, index: index + 1) - offset;
2271 }
2272 else
2273 {
2274 // No padding, so just deduce it from the size of the member directly.
2275 range = compiler.get_declared_struct_member_size(struct_type: type, index);
2276 }
2277
2278 ranges.push_back(t: { .index: index, .offset: offset, .range: range });
2279 return true;
2280}
2281
2282SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
2283{
2284 SmallVector<BufferRange> ranges;
2285 BufferAccessHandler handler(*this, ranges, id);
2286 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
2287 return ranges;
2288}
2289
2290bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const
2291{
2292 if (a.basetype != b.basetype)
2293 return false;
2294 if (a.width != b.width)
2295 return false;
2296 if (a.vecsize != b.vecsize)
2297 return false;
2298 if (a.columns != b.columns)
2299 return false;
2300 if (a.array.size() != b.array.size())
2301 return false;
2302
2303 size_t array_count = a.array.size();
2304 if (array_count && memcmp(s1: a.array.data(), s2: b.array.data(), n: array_count * sizeof(uint32_t)) != 0)
2305 return false;
2306
2307 if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage)
2308 {
2309 if (memcmp(s1: &a.image, s2: &b.image, n: sizeof(SPIRType::Image)) != 0)
2310 return false;
2311 }
2312
2313 if (a.member_types.size() != b.member_types.size())
2314 return false;
2315
2316 size_t member_types = a.member_types.size();
2317 for (size_t i = 0; i < member_types; i++)
2318 {
2319 if (!types_are_logically_equivalent(a: get<SPIRType>(id: a.member_types[i]), b: get<SPIRType>(id: b.member_types[i])))
2320 return false;
2321 }
2322
2323 return true;
2324}
2325
2326const Bitset &Compiler::get_execution_mode_bitset() const
2327{
2328 return get_entry_point().flags;
2329}
2330
2331void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2)
2332{
2333 auto &execution = get_entry_point();
2334
2335 execution.flags.set(mode);
2336 switch (mode)
2337 {
2338 case ExecutionModeLocalSize:
2339 execution.workgroup_size.x = arg0;
2340 execution.workgroup_size.y = arg1;
2341 execution.workgroup_size.z = arg2;
2342 break;
2343
2344 case ExecutionModeLocalSizeId:
2345 execution.workgroup_size.id_x = arg0;
2346 execution.workgroup_size.id_y = arg1;
2347 execution.workgroup_size.id_z = arg2;
2348 break;
2349
2350 case ExecutionModeInvocations:
2351 execution.invocations = arg0;
2352 break;
2353
2354 case ExecutionModeOutputVertices:
2355 execution.output_vertices = arg0;
2356 break;
2357
2358 case ExecutionModeOutputPrimitivesEXT:
2359 execution.output_primitives = arg0;
2360 break;
2361
2362 default:
2363 break;
2364 }
2365}
2366
2367void Compiler::unset_execution_mode(ExecutionMode mode)
2368{
2369 auto &execution = get_entry_point();
2370 execution.flags.clear(bit: mode);
2371}
2372
2373uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
2374 SpecializationConstant &z) const
2375{
2376 auto &execution = get_entry_point();
2377 x = { .id: 0, .constant_id: 0 };
2378 y = { .id: 0, .constant_id: 0 };
2379 z = { .id: 0, .constant_id: 0 };
2380
2381 // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId.
2382 if (execution.workgroup_size.constant != 0)
2383 {
2384 auto &c = get<SPIRConstant>(id: execution.workgroup_size.constant);
2385
2386 if (c.m.c[0].id[0] != ID(0))
2387 {
2388 x.id = c.m.c[0].id[0];
2389 x.constant_id = get_decoration(id: c.m.c[0].id[0], decoration: DecorationSpecId);
2390 }
2391
2392 if (c.m.c[0].id[1] != ID(0))
2393 {
2394 y.id = c.m.c[0].id[1];
2395 y.constant_id = get_decoration(id: c.m.c[0].id[1], decoration: DecorationSpecId);
2396 }
2397
2398 if (c.m.c[0].id[2] != ID(0))
2399 {
2400 z.id = c.m.c[0].id[2];
2401 z.constant_id = get_decoration(id: c.m.c[0].id[2], decoration: DecorationSpecId);
2402 }
2403 }
2404 else if (execution.flags.get(bit: ExecutionModeLocalSizeId))
2405 {
2406 auto &cx = get<SPIRConstant>(id: execution.workgroup_size.id_x);
2407 if (cx.specialization)
2408 {
2409 x.id = execution.workgroup_size.id_x;
2410 x.constant_id = get_decoration(id: execution.workgroup_size.id_x, decoration: DecorationSpecId);
2411 }
2412
2413 auto &cy = get<SPIRConstant>(id: execution.workgroup_size.id_y);
2414 if (cy.specialization)
2415 {
2416 y.id = execution.workgroup_size.id_y;
2417 y.constant_id = get_decoration(id: execution.workgroup_size.id_y, decoration: DecorationSpecId);
2418 }
2419
2420 auto &cz = get<SPIRConstant>(id: execution.workgroup_size.id_z);
2421 if (cz.specialization)
2422 {
2423 z.id = execution.workgroup_size.id_z;
2424 z.constant_id = get_decoration(id: execution.workgroup_size.id_z, decoration: DecorationSpecId);
2425 }
2426 }
2427
2428 return execution.workgroup_size.constant;
2429}
2430
2431uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const
2432{
2433 auto &execution = get_entry_point();
2434 switch (mode)
2435 {
2436 case ExecutionModeLocalSizeId:
2437 if (execution.flags.get(bit: ExecutionModeLocalSizeId))
2438 {
2439 switch (index)
2440 {
2441 case 0:
2442 return execution.workgroup_size.id_x;
2443 case 1:
2444 return execution.workgroup_size.id_y;
2445 case 2:
2446 return execution.workgroup_size.id_z;
2447 default:
2448 return 0;
2449 }
2450 }
2451 else
2452 return 0;
2453
2454 case ExecutionModeLocalSize:
2455 switch (index)
2456 {
2457 case 0:
2458 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0)
2459 return get<SPIRConstant>(id: execution.workgroup_size.id_x).scalar();
2460 else
2461 return execution.workgroup_size.x;
2462 case 1:
2463 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0)
2464 return get<SPIRConstant>(id: execution.workgroup_size.id_y).scalar();
2465 else
2466 return execution.workgroup_size.y;
2467 case 2:
2468 if (execution.flags.get(bit: ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0)
2469 return get<SPIRConstant>(id: execution.workgroup_size.id_z).scalar();
2470 else
2471 return execution.workgroup_size.z;
2472 default:
2473 return 0;
2474 }
2475
2476 case ExecutionModeInvocations:
2477 return execution.invocations;
2478
2479 case ExecutionModeOutputVertices:
2480 return execution.output_vertices;
2481
2482 case ExecutionModeOutputPrimitivesEXT:
2483 return execution.output_primitives;
2484
2485 default:
2486 return 0;
2487 }
2488}
2489
2490ExecutionModel Compiler::get_execution_model() const
2491{
2492 auto &execution = get_entry_point();
2493 return execution.model;
2494}
2495
2496bool Compiler::is_tessellation_shader(ExecutionModel model)
2497{
2498 return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2499}
2500
2501bool Compiler::is_vertex_like_shader() const
2502{
2503 auto model = get_execution_model();
2504 return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
2505 model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2506}
2507
2508bool Compiler::is_tessellation_shader() const
2509{
2510 return is_tessellation_shader(model: get_execution_model());
2511}
2512
2513bool Compiler::is_tessellating_triangles() const
2514{
2515 return get_execution_mode_bitset().get(bit: ExecutionModeTriangles);
2516}
2517
2518void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
2519{
2520 get<SPIRVariable>(id).remapped_variable = remap_enable;
2521}
2522
2523bool Compiler::get_remapped_variable_state(VariableID id) const
2524{
2525 return get<SPIRVariable>(id).remapped_variable;
2526}
2527
2528void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
2529{
2530 get<SPIRVariable>(id).remapped_components = components;
2531}
2532
2533uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
2534{
2535 return get<SPIRVariable>(id).remapped_components;
2536}
2537
2538void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
2539{
2540 auto itr = find(first: begin(cont&: e.implied_read_expressions), last: end(cont&: e.implied_read_expressions), val: ID(source));
2541 if (itr == end(cont&: e.implied_read_expressions))
2542 e.implied_read_expressions.push_back(t: source);
2543}
2544
2545void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
2546{
2547 auto itr = find(first: begin(cont&: e.implied_read_expressions), last: end(cont&: e.implied_read_expressions), val: ID(source));
2548 if (itr == end(cont&: e.implied_read_expressions))
2549 e.implied_read_expressions.push_back(t: source);
2550}
2551
2552void Compiler::add_active_interface_variable(uint32_t var_id)
2553{
2554 active_interface_variables.insert(x: var_id);
2555
2556 // In SPIR-V 1.4 and up we must also track the interface variable in the entry point.
2557 if (ir.get_spirv_version() >= 0x10400)
2558 {
2559 auto &vars = get_entry_point().interface_variables;
2560 if (find(first: begin(cont&: vars), last: end(cont&: vars), val: VariableID(var_id)) == end(cont&: vars))
2561 vars.push_back(t: var_id);
2562 }
2563}
2564
2565void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
2566{
2567 // Don't inherit any expression dependencies if the expression in dst
2568 // is not a forwarded temporary.
2569 if (forwarded_temporaries.find(x: dst) == end(cont&: forwarded_temporaries) ||
2570 forced_temporaries.find(x: dst) != end(cont&: forced_temporaries))
2571 {
2572 return;
2573 }
2574
2575 auto &e = get<SPIRExpression>(id: dst);
2576 auto *phi = maybe_get<SPIRVariable>(id: source_expression);
2577 if (phi && phi->phi_variable)
2578 {
2579 // We have used a phi variable, which can change at the end of the block,
2580 // so make sure we take a dependency on this phi variable.
2581 phi->dependees.push_back(t: dst);
2582 }
2583
2584 auto *s = maybe_get<SPIRExpression>(id: source_expression);
2585 if (!s)
2586 return;
2587
2588 auto &e_deps = e.expression_dependencies;
2589 auto &s_deps = s->expression_dependencies;
2590
2591 // If we depend on a expression, we also depend on all sub-dependencies from source.
2592 e_deps.push_back(t: source_expression);
2593 e_deps.insert(itr: end(cont&: e_deps), insert_begin: begin(cont&: s_deps), insert_end: end(cont&: s_deps));
2594
2595 // Eliminate duplicated dependencies.
2596 sort(first: begin(cont&: e_deps), last: end(cont&: e_deps));
2597 e_deps.erase(start_erase: unique(first: begin(cont&: e_deps), last: end(cont&: e_deps)), end_erase: end(cont&: e_deps));
2598}
2599
2600SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const
2601{
2602 SmallVector<EntryPoint> entries;
2603 for (auto &entry : ir.entry_points)
2604 entries.push_back(t: { .name: entry.second.orig_name, .execution_model: entry.second.model });
2605 return entries;
2606}
2607
2608void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
2609{
2610 auto &entry = get_entry_point(name: old_name, execution_model: model);
2611 entry.orig_name = new_name;
2612 entry.name = new_name;
2613}
2614
2615void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
2616{
2617 auto &entry = get_entry_point(name, execution_model: model);
2618 ir.default_entry_point = entry.self;
2619}
2620
2621SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
2622{
2623 auto itr = find_if(
2624 first: begin(cont&: ir.entry_points), last: end(cont&: ir.entry_points),
2625 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2626
2627 if (itr == end(cont&: ir.entry_points))
2628 SPIRV_CROSS_THROW("Entry point does not exist.");
2629
2630 return itr->second;
2631}
2632
2633const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
2634{
2635 auto itr = find_if(
2636 first: begin(cont: ir.entry_points), last: end(cont: ir.entry_points),
2637 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2638
2639 if (itr == end(cont: ir.entry_points))
2640 SPIRV_CROSS_THROW("Entry point does not exist.");
2641
2642 return itr->second;
2643}
2644
2645SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
2646{
2647 auto itr = find_if(first: begin(cont&: ir.entry_points), last: end(cont&: ir.entry_points),
2648 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2649 return entry.second.orig_name == name && entry.second.model == model;
2650 });
2651
2652 if (itr == end(cont&: ir.entry_points))
2653 SPIRV_CROSS_THROW("Entry point does not exist.");
2654
2655 return itr->second;
2656}
2657
2658const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
2659{
2660 auto itr = find_if(first: begin(cont: ir.entry_points), last: end(cont: ir.entry_points),
2661 pred: [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2662 return entry.second.orig_name == name && entry.second.model == model;
2663 });
2664
2665 if (itr == end(cont: ir.entry_points))
2666 SPIRV_CROSS_THROW("Entry point does not exist.");
2667
2668 return itr->second;
2669}
2670
2671const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
2672{
2673 return get_entry_point(name, model).name;
2674}
2675
2676const SPIREntryPoint &Compiler::get_entry_point() const
2677{
2678 return ir.entry_points.find(x: ir.default_entry_point)->second;
2679}
2680
2681SPIREntryPoint &Compiler::get_entry_point()
2682{
2683 return ir.entry_points.find(x: ir.default_entry_point)->second;
2684}
2685
2686bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
2687{
2688 auto &var = get<SPIRVariable>(id);
2689
2690 if (ir.get_spirv_version() < 0x10400)
2691 {
2692 if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
2693 var.storage != StorageClassUniformConstant)
2694 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2695
2696 // This is to avoid potential problems with very old glslang versions which did
2697 // not emit input/output interfaces properly.
2698 // We can assume they only had a single entry point, and single entry point
2699 // shaders could easily be assumed to use every interface variable anyways.
2700 if (ir.entry_points.size() <= 1)
2701 return true;
2702 }
2703
2704 // In SPIR-V 1.4 and later, all global resource variables must be present.
2705
2706 auto &execution = get_entry_point();
2707 return find(first: begin(cont: execution.interface_variables), last: end(cont: execution.interface_variables), val: VariableID(id)) !=
2708 end(cont: execution.interface_variables);
2709}
2710
2711void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args,
2712 uint32_t length)
2713{
2714 // If possible, pipe through a remapping table so that parameters know
2715 // which variables they actually bind to in this scope.
2716 unordered_map<uint32_t, uint32_t> remapping;
2717 for (uint32_t i = 0; i < length; i++)
2718 remapping[func.arguments[i].id] = remap_parameter(id: args[i]);
2719 parameter_remapping.push(x: std::move(remapping));
2720}
2721
2722void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2723{
2724 parameter_remapping.pop();
2725}
2726
2727uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id)
2728{
2729 auto *var = compiler.maybe_get_backing_variable(chain: id);
2730 if (var)
2731 id = var->self;
2732
2733 if (parameter_remapping.empty())
2734 return id;
2735
2736 auto &remapping = parameter_remapping.top();
2737 auto itr = remapping.find(x: id);
2738 if (itr != end(cont&: remapping))
2739 return itr->second;
2740 else
2741 return id;
2742}
2743
2744bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length)
2745{
2746 if (length < 3)
2747 return false;
2748
2749 auto &callee = compiler.get<SPIRFunction>(id: args[2]);
2750 args += 3;
2751 length -= 3;
2752 push_remap_parameters(func: callee, args, length);
2753 functions.push(x: &callee);
2754 return true;
2755}
2756
2757bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length)
2758{
2759 if (length < 3)
2760 return false;
2761
2762 auto &callee = compiler.get<SPIRFunction>(id: args[2]);
2763 args += 3;
2764
2765 // There are two types of cases we have to handle,
2766 // a callee might call sampler2D(texture2D, sampler) directly where
2767 // one or more parameters originate from parameters.
2768 // Alternatively, we need to provide combined image samplers to our callees,
2769 // and in this case we need to add those as well.
2770
2771 pop_remap_parameters();
2772
2773 // Our callee has now been processed at least once.
2774 // No point in doing it again.
2775 callee.do_combined_parameters = false;
2776
2777 auto &params = functions.top()->combined_parameters;
2778 functions.pop();
2779 if (functions.empty())
2780 return true;
2781
2782 auto &caller = *functions.top();
2783 if (caller.do_combined_parameters)
2784 {
2785 for (auto &param : params)
2786 {
2787 VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
2788 VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
2789
2790 auto *i = compiler.maybe_get_backing_variable(chain: image_id);
2791 auto *s = compiler.maybe_get_backing_variable(chain: sampler_id);
2792 if (i)
2793 image_id = i->self;
2794 if (s)
2795 sampler_id = s->self;
2796
2797 register_combined_image_sampler(caller, combined_id: 0, texture_id: image_id, sampler_id, depth: param.depth);
2798 }
2799 }
2800
2801 return true;
2802}
2803
2804void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
2805 VariableID combined_module_id,
2806 VariableID image_id, VariableID sampler_id,
2807 bool depth)
2808{
2809 // We now have a texture ID and a sampler ID which will either be found as a global
2810 // or a parameter in our own function. If both are global, they will not need a parameter,
2811 // otherwise, add it to our list.
2812 SPIRFunction::CombinedImageSamplerParameter param = {
2813 .id: 0u, .image_id: image_id, .sampler_id: sampler_id, .global_image: true, .global_sampler: true, .depth: depth,
2814 };
2815
2816 auto texture_itr = find_if(first: begin(cont&: caller.arguments), last: end(cont&: caller.arguments),
2817 pred: [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; });
2818 auto sampler_itr = find_if(first: begin(cont&: caller.arguments), last: end(cont&: caller.arguments),
2819 pred: [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; });
2820
2821 if (texture_itr != end(cont&: caller.arguments))
2822 {
2823 param.global_image = false;
2824 param.image_id = uint32_t(texture_itr - begin(cont&: caller.arguments));
2825 }
2826
2827 if (sampler_itr != end(cont&: caller.arguments))
2828 {
2829 param.global_sampler = false;
2830 param.sampler_id = uint32_t(sampler_itr - begin(cont&: caller.arguments));
2831 }
2832
2833 if (param.global_image && param.global_sampler)
2834 return;
2835
2836 auto itr = find_if(first: begin(cont&: caller.combined_parameters), last: end(cont&: caller.combined_parameters),
2837 pred: [&param](const SPIRFunction::CombinedImageSamplerParameter &p) {
2838 return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
2839 param.global_image == p.global_image && param.global_sampler == p.global_sampler;
2840 });
2841
2842 if (itr == end(cont&: caller.combined_parameters))
2843 {
2844 uint32_t id = compiler.ir.increase_bound_by(count: 3);
2845 auto type_id = id + 0;
2846 auto ptr_type_id = id + 1;
2847 auto combined_id = id + 2;
2848 auto &base = compiler.expression_type(id: image_id);
2849 auto &type = compiler.set<SPIRType>(id: type_id, args: OpTypeSampledImage);
2850 auto &ptr_type = compiler.set<SPIRType>(id: ptr_type_id, args: OpTypePointer);
2851
2852 type = base;
2853 type.self = type_id;
2854 type.basetype = SPIRType::SampledImage;
2855 type.pointer = false;
2856 type.storage = StorageClassGeneric;
2857 type.image.depth = depth;
2858
2859 ptr_type = type;
2860 ptr_type.pointer = true;
2861 ptr_type.storage = StorageClassUniformConstant;
2862 ptr_type.parent_type = type_id;
2863
2864 // Build new variable.
2865 compiler.set<SPIRVariable>(id: combined_id, args&: ptr_type_id, args: StorageClassFunction, args: 0);
2866
2867 // Inherit RelaxedPrecision.
2868 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2869 bool relaxed_precision =
2870 compiler.has_decoration(id: sampler_id, decoration: DecorationRelaxedPrecision) ||
2871 compiler.has_decoration(id: image_id, decoration: DecorationRelaxedPrecision) ||
2872 (combined_module_id && compiler.has_decoration(id: combined_module_id, decoration: DecorationRelaxedPrecision));
2873
2874 if (relaxed_precision)
2875 compiler.set_decoration(id: combined_id, decoration: DecorationRelaxedPrecision);
2876
2877 param.id = combined_id;
2878
2879 compiler.set_name(id: combined_id,
2880 name: join(ts: "SPIRV_Cross_Combined", ts: compiler.to_name(id: image_id), ts: compiler.to_name(id: sampler_id)));
2881
2882 caller.combined_parameters.push_back(t: param);
2883 caller.shadow_arguments.push_back(t: { .type: ptr_type_id, .id: combined_id, .read_count: 0u, .write_count: 0u, .alias_global_variable: true });
2884 }
2885}
2886
2887bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2888{
2889 if (need_dummy_sampler)
2890 {
2891 // No need to traverse further, we know the result.
2892 return false;
2893 }
2894
2895 switch (opcode)
2896 {
2897 case OpLoad:
2898 {
2899 if (length < 3)
2900 return false;
2901
2902 uint32_t result_type = args[0];
2903
2904 auto &type = compiler.get<SPIRType>(id: result_type);
2905 bool separate_image =
2906 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2907
2908 // If not separate image, don't bother.
2909 if (!separate_image)
2910 return true;
2911
2912 uint32_t id = args[1];
2913 uint32_t ptr = args[2];
2914 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2915 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2916 break;
2917 }
2918
2919 case OpImageFetch:
2920 case OpImageQuerySizeLod:
2921 case OpImageQuerySize:
2922 case OpImageQueryLevels:
2923 case OpImageQuerySamples:
2924 {
2925 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2926 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
2927 if (var)
2928 {
2929 auto &type = compiler.get<SPIRType>(id: var->basetype);
2930 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2931 need_dummy_sampler = true;
2932 }
2933
2934 break;
2935 }
2936
2937 case OpInBoundsAccessChain:
2938 case OpAccessChain:
2939 case OpPtrAccessChain:
2940 {
2941 if (length < 3)
2942 return false;
2943
2944 uint32_t result_type = args[0];
2945 auto &type = compiler.get<SPIRType>(id: result_type);
2946 bool separate_image =
2947 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2948 if (!separate_image)
2949 return true;
2950
2951 uint32_t id = args[1];
2952 uint32_t ptr = args[2];
2953 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2954 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2955
2956 // Other backends might use SPIRAccessChain for this later.
2957 compiler.ir.ids[id].set_allow_type_rewrite();
2958 break;
2959 }
2960
2961 default:
2962 break;
2963 }
2964
2965 return true;
2966}
2967
2968bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2969{
2970 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2971 bool is_fetch = false;
2972
2973 switch (opcode)
2974 {
2975 case OpLoad:
2976 {
2977 if (length < 3)
2978 return false;
2979
2980 uint32_t result_type = args[0];
2981
2982 auto &type = compiler.get<SPIRType>(id: result_type);
2983 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2984 bool separate_sampler = type.basetype == SPIRType::Sampler;
2985
2986 // If not separate image or sampler, don't bother.
2987 if (!separate_image && !separate_sampler)
2988 return true;
2989
2990 uint32_t id = args[1];
2991 uint32_t ptr = args[2];
2992 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
2993 compiler.register_read(expr: id, chain: ptr, forwarded: true);
2994 return true;
2995 }
2996
2997 case OpInBoundsAccessChain:
2998 case OpAccessChain:
2999 case OpPtrAccessChain:
3000 {
3001 if (length < 3)
3002 return false;
3003
3004 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
3005 // impossible to implement, since we don't know which concrete sampler we are accessing.
3006 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
3007 // but this seems ridiculously complicated for a problem which is easy to work around.
3008 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
3009
3010 uint32_t result_type = args[0];
3011
3012 auto &type = compiler.get<SPIRType>(id: result_type);
3013 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3014 bool separate_sampler = type.basetype == SPIRType::Sampler;
3015 if (separate_sampler)
3016 SPIRV_CROSS_THROW(
3017 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
3018 "remap to plain GLSL.");
3019
3020 if (separate_image)
3021 {
3022 uint32_t id = args[1];
3023 uint32_t ptr = args[2];
3024 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
3025 compiler.register_read(expr: id, chain: ptr, forwarded: true);
3026 }
3027 return true;
3028 }
3029
3030 case OpImageFetch:
3031 case OpImageQuerySizeLod:
3032 case OpImageQuerySize:
3033 case OpImageQueryLevels:
3034 case OpImageQuerySamples:
3035 {
3036 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
3037 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
3038 if (!var)
3039 return true;
3040
3041 auto &type = compiler.get<SPIRType>(id: var->basetype);
3042 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
3043 {
3044 if (compiler.dummy_sampler_id == 0)
3045 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
3046 "build_dummy_sampler_for_combined_images().");
3047
3048 // Do it outside.
3049 is_fetch = true;
3050 break;
3051 }
3052
3053 return true;
3054 }
3055
3056 case OpSampledImage:
3057 // Do it outside.
3058 break;
3059
3060 default:
3061 return true;
3062 }
3063
3064 // Registers sampler2D calls used in case they are parameters so
3065 // that their callees know which combined image samplers to propagate down the call stack.
3066 if (!functions.empty())
3067 {
3068 auto &callee = *functions.top();
3069 if (callee.do_combined_parameters)
3070 {
3071 uint32_t image_id = args[2];
3072
3073 auto *image = compiler.maybe_get_backing_variable(chain: image_id);
3074 if (image)
3075 image_id = image->self;
3076
3077 uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
3078 auto *sampler = compiler.maybe_get_backing_variable(chain: sampler_id);
3079 if (sampler)
3080 sampler_id = sampler->self;
3081
3082 uint32_t combined_id = args[1];
3083
3084 auto &combined_type = compiler.get<SPIRType>(id: args[0]);
3085 register_combined_image_sampler(caller&: callee, combined_module_id: combined_id, image_id, sampler_id, depth: combined_type.image.depth);
3086 }
3087 }
3088
3089 // For function calls, we need to remap IDs which are function parameters into global variables.
3090 // This information is statically known from the current place in the call stack.
3091 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
3092 // which backing variable the image/sample came from.
3093 VariableID image_id = remap_parameter(id: args[2]);
3094 VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(id: args[3]);
3095
3096 auto itr = find_if(first: begin(cont&: compiler.combined_image_samplers), last: end(cont&: compiler.combined_image_samplers),
3097 pred: [image_id, sampler_id](const CombinedImageSampler &combined) {
3098 return combined.image_id == image_id && combined.sampler_id == sampler_id;
3099 });
3100
3101 if (itr == end(cont&: compiler.combined_image_samplers))
3102 {
3103 uint32_t sampled_type;
3104 uint32_t combined_module_id;
3105 if (is_fetch)
3106 {
3107 // Have to invent the sampled image type.
3108 sampled_type = compiler.ir.increase_bound_by(count: 1);
3109 auto &type = compiler.set<SPIRType>(id: sampled_type, args: OpTypeSampledImage);
3110 type = compiler.expression_type(id: args[2]);
3111 type.self = sampled_type;
3112 type.basetype = SPIRType::SampledImage;
3113 type.image.depth = false;
3114 combined_module_id = 0;
3115 }
3116 else
3117 {
3118 sampled_type = args[0];
3119 combined_module_id = args[1];
3120 }
3121
3122 auto id = compiler.ir.increase_bound_by(count: 2);
3123 auto type_id = id + 0;
3124 auto combined_id = id + 1;
3125
3126 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
3127 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
3128 auto &type = compiler.set<SPIRType>(id: type_id, args: OpTypePointer);
3129 auto &base = compiler.get<SPIRType>(id: sampled_type);
3130 type = base;
3131 type.pointer = true;
3132 type.storage = StorageClassUniformConstant;
3133 type.parent_type = type_id;
3134
3135 // Build new variable.
3136 compiler.set<SPIRVariable>(id: combined_id, args&: type_id, args: StorageClassUniformConstant, args: 0);
3137
3138 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
3139 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
3140 bool relaxed_precision =
3141 (sampler_id && compiler.has_decoration(id: sampler_id, decoration: DecorationRelaxedPrecision)) ||
3142 (image_id && compiler.has_decoration(id: image_id, decoration: DecorationRelaxedPrecision)) ||
3143 (combined_module_id && compiler.has_decoration(id: combined_module_id, decoration: DecorationRelaxedPrecision));
3144
3145 if (relaxed_precision)
3146 compiler.set_decoration(id: combined_id, decoration: DecorationRelaxedPrecision);
3147
3148 // Propagate the array type for the original image as well.
3149 auto *var = compiler.maybe_get_backing_variable(chain: image_id);
3150 if (var)
3151 {
3152 auto &parent_type = compiler.get<SPIRType>(id: var->basetype);
3153 type.array = parent_type.array;
3154 type.array_size_literal = parent_type.array_size_literal;
3155 }
3156
3157 compiler.combined_image_samplers.push_back(t: { .combined_id: combined_id, .image_id: image_id, .sampler_id: sampler_id });
3158 }
3159
3160 return true;
3161}
3162
3163VariableID Compiler::build_dummy_sampler_for_combined_images()
3164{
3165 DummySamplerForCombinedImageHandler handler(*this);
3166 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
3167 if (handler.need_dummy_sampler)
3168 {
3169 uint32_t offset = ir.increase_bound_by(count: 3);
3170 auto type_id = offset + 0;
3171 auto ptr_type_id = offset + 1;
3172 auto var_id = offset + 2;
3173
3174 auto &sampler = set<SPIRType>(id: type_id, args: OpTypeSampler);
3175 sampler.basetype = SPIRType::Sampler;
3176
3177 auto &ptr_sampler = set<SPIRType>(id: ptr_type_id, args: OpTypePointer);
3178 ptr_sampler = sampler;
3179 ptr_sampler.self = type_id;
3180 ptr_sampler.storage = StorageClassUniformConstant;
3181 ptr_sampler.pointer = true;
3182 ptr_sampler.parent_type = type_id;
3183
3184 set<SPIRVariable>(id: var_id, args&: ptr_type_id, args: StorageClassUniformConstant, args: 0);
3185 set_name(id: var_id, name: "SPIRV_Cross_DummySampler");
3186 dummy_sampler_id = var_id;
3187 return var_id;
3188 }
3189 else
3190 return 0;
3191}
3192
3193void Compiler::build_combined_image_samplers()
3194{
3195 ir.for_each_typed_id<SPIRFunction>(op: [&](uint32_t, SPIRFunction &func) {
3196 func.combined_parameters.clear();
3197 func.shadow_arguments.clear();
3198 func.do_combined_parameters = true;
3199 });
3200
3201 combined_image_samplers.clear();
3202 CombinedImageSamplerHandler handler(*this);
3203 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
3204}
3205
3206SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const
3207{
3208 SmallVector<SpecializationConstant> spec_consts;
3209 ir.for_each_typed_id<SPIRConstant>(op: [&](uint32_t, const SPIRConstant &c) {
3210 if (c.specialization && has_decoration(id: c.self, decoration: DecorationSpecId))
3211 spec_consts.push_back(t: { .id: c.self, .constant_id: get_decoration(id: c.self, decoration: DecorationSpecId) });
3212 });
3213 return spec_consts;
3214}
3215
3216SPIRConstant &Compiler::get_constant(ConstantID id)
3217{
3218 return get<SPIRConstant>(id);
3219}
3220
3221const SPIRConstant &Compiler::get_constant(ConstantID id) const
3222{
3223 return get<SPIRConstant>(id);
3224}
3225
3226static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
3227 unordered_set<uint32_t> &visit_cache)
3228{
3229 // This block accesses the variable.
3230 if (blocks.find(x: block) != end(cont: blocks))
3231 return false;
3232
3233 // We are at the end of the CFG.
3234 if (cfg.get_succeeding_edges(block).empty())
3235 return true;
3236
3237 // If any of our successors have a path to the end, there exists a path from block.
3238 for (auto &succ : cfg.get_succeeding_edges(block))
3239 {
3240 if (visit_cache.count(x: succ) == 0)
3241 {
3242 if (exists_unaccessed_path_to_return(cfg, block: succ, blocks, visit_cache))
3243 return true;
3244 visit_cache.insert(x: succ);
3245 }
3246 }
3247
3248 return false;
3249}
3250
3251void Compiler::analyze_parameter_preservation(
3252 SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks,
3253 const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks)
3254{
3255 for (auto &arg : entry.arguments)
3256 {
3257 // Non-pointers are always inputs.
3258 auto &type = get<SPIRType>(id: arg.type);
3259 if (!type.pointer)
3260 continue;
3261
3262 // Opaque argument types are always in
3263 bool potential_preserve;
3264 switch (type.basetype)
3265 {
3266 case SPIRType::Sampler:
3267 case SPIRType::Image:
3268 case SPIRType::SampledImage:
3269 case SPIRType::AtomicCounter:
3270 potential_preserve = false;
3271 break;
3272
3273 default:
3274 potential_preserve = true;
3275 break;
3276 }
3277
3278 if (!potential_preserve)
3279 continue;
3280
3281 auto itr = variable_to_blocks.find(x: arg.id);
3282 if (itr == end(cont: variable_to_blocks))
3283 {
3284 // Variable is never accessed.
3285 continue;
3286 }
3287
3288 // We have accessed a variable, but there was no complete writes to that variable.
3289 // We deduce that we must preserve the argument.
3290 itr = complete_write_blocks.find(x: arg.id);
3291 if (itr == end(cont: complete_write_blocks))
3292 {
3293 arg.read_count++;
3294 continue;
3295 }
3296
3297 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
3298 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
3299 // Major case here is if a function is
3300 // void foo(int &var) { if (cond) var = 10; }
3301 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
3302 // because if we don't write anything whatever we put into the function must return back to the caller.
3303 unordered_set<uint32_t> visit_cache;
3304 if (exists_unaccessed_path_to_return(cfg, block: entry.entry_block, blocks: itr->second, visit_cache))
3305 arg.read_count++;
3306 }
3307}
3308
3309Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_,
3310 SPIRFunction &entry_)
3311 : compiler(compiler_)
3312 , entry(entry_)
3313{
3314}
3315
3316bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &)
3317{
3318 // Only analyze within this function.
3319 return false;
3320}
3321
3322void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block)
3323{
3324 current_block = &block;
3325
3326 // If we're branching to a block which uses OpPhi, in GLSL
3327 // this will be a variable write when we branch,
3328 // so we need to track access to these variables as well to
3329 // have a complete picture.
3330 const auto test_phi = [this, &block](uint32_t to) {
3331 auto &next = compiler.get<SPIRBlock>(id: to);
3332 for (auto &phi : next.phi_variables)
3333 {
3334 if (phi.parent == block.self)
3335 {
3336 accessed_variables_to_block[phi.function_variable].insert(x: block.self);
3337 // Phi variables are also accessed in our target branch block.
3338 accessed_variables_to_block[phi.function_variable].insert(x: next.self);
3339
3340 notify_variable_access(id: phi.local_variable, block: block.self);
3341 }
3342 }
3343 };
3344
3345 switch (block.terminator)
3346 {
3347 case SPIRBlock::Direct:
3348 notify_variable_access(id: block.condition, block: block.self);
3349 test_phi(block.next_block);
3350 break;
3351
3352 case SPIRBlock::Select:
3353 notify_variable_access(id: block.condition, block: block.self);
3354 test_phi(block.true_block);
3355 test_phi(block.false_block);
3356 break;
3357
3358 case SPIRBlock::MultiSelect:
3359 {
3360 notify_variable_access(id: block.condition, block: block.self);
3361 auto &cases = compiler.get_case_list(block);
3362 for (auto &target : cases)
3363 test_phi(target.block);
3364 if (block.default_block)
3365 test_phi(block.default_block);
3366 break;
3367 }
3368
3369 default:
3370 break;
3371 }
3372}
3373
3374void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
3375{
3376 if (id == 0)
3377 return;
3378
3379 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3380 auto itr = rvalue_forward_children.find(x: id);
3381 if (itr != end(cont&: rvalue_forward_children))
3382 for (auto child_id : itr->second)
3383 notify_variable_access(id: child_id, block);
3384
3385 if (id_is_phi_variable(id))
3386 accessed_variables_to_block[id].insert(x: block);
3387 else if (id_is_potential_temporary(id))
3388 accessed_temporaries_to_block[id].insert(x: block);
3389}
3390
3391bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const
3392{
3393 if (id >= compiler.get_current_id_bound())
3394 return false;
3395 auto *var = compiler.maybe_get<SPIRVariable>(id);
3396 return var && var->phi_variable;
3397}
3398
3399bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const
3400{
3401 if (id >= compiler.get_current_id_bound())
3402 return false;
3403
3404 // Temporaries are not created before we start emitting code.
3405 return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
3406}
3407
3408bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block)
3409{
3410 switch (block.terminator)
3411 {
3412 case SPIRBlock::Return:
3413 if (block.return_value)
3414 notify_variable_access(id: block.return_value, block: block.self);
3415 break;
3416
3417 case SPIRBlock::Select:
3418 case SPIRBlock::MultiSelect:
3419 notify_variable_access(id: block.condition, block: block.self);
3420 break;
3421
3422 default:
3423 break;
3424 }
3425
3426 return true;
3427}
3428
3429bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3430{
3431 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3432 uint32_t result_type = 0, result_id = 0;
3433 if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
3434 {
3435 // For some opcodes, we will need to override the result id.
3436 // If we need to hoist the temporary, the temporary type is the input, not the result.
3437 if (op == OpConvertUToAccelerationStructureKHR)
3438 {
3439 auto itr = result_id_to_type.find(x: args[2]);
3440 if (itr != result_id_to_type.end())
3441 result_type = itr->second;
3442 }
3443
3444 result_id_to_type[result_id] = result_type;
3445 }
3446
3447 switch (op)
3448 {
3449 case OpStore:
3450 {
3451 if (length < 2)
3452 return false;
3453
3454 ID ptr = args[0];
3455 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3456
3457 // If we store through an access chain, we have a partial write.
3458 if (var)
3459 {
3460 accessed_variables_to_block[var->self].insert(x: current_block->self);
3461 if (var->self == ptr)
3462 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3463 else
3464 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3465 }
3466
3467 // args[0] might be an access chain we have to track use of.
3468 notify_variable_access(id: args[0], block: current_block->self);
3469 // Might try to store a Phi variable here.
3470 notify_variable_access(id: args[1], block: current_block->self);
3471 break;
3472 }
3473
3474 case OpAccessChain:
3475 case OpInBoundsAccessChain:
3476 case OpPtrAccessChain:
3477 {
3478 if (length < 3)
3479 return false;
3480
3481 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3482 uint32_t ptr = args[2];
3483 auto *var = compiler.maybe_get<SPIRVariable>(id: ptr);
3484 if (var)
3485 {
3486 accessed_variables_to_block[var->self].insert(x: current_block->self);
3487 rvalue_forward_children[args[1]].insert(x: var->self);
3488 }
3489
3490 // args[2] might be another access chain we have to track use of.
3491 for (uint32_t i = 2; i < length; i++)
3492 {
3493 notify_variable_access(id: args[i], block: current_block->self);
3494 rvalue_forward_children[args[1]].insert(x: args[i]);
3495 }
3496
3497 // Also keep track of the access chain pointer itself.
3498 // In exceptionally rare cases, we can end up with a case where
3499 // the access chain is generated in the loop body, but is consumed in continue block.
3500 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3501 notify_variable_access(id: args[1], block: current_block->self);
3502
3503 // The result of an access chain is a fixed expression and is not really considered a temporary.
3504 auto &e = compiler.set<SPIRExpression>(id: args[1], args: "", args: args[0], args: true);
3505 auto *backing_variable = compiler.maybe_get_backing_variable(chain: ptr);
3506 e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
3507
3508 // Other backends might use SPIRAccessChain for this later.
3509 compiler.ir.ids[args[1]].set_allow_type_rewrite();
3510 access_chain_expressions.insert(x: args[1]);
3511 break;
3512 }
3513
3514 case OpCopyMemory:
3515 {
3516 if (length < 2)
3517 return false;
3518
3519 ID lhs = args[0];
3520 ID rhs = args[1];
3521 auto *var = compiler.maybe_get_backing_variable(chain: lhs);
3522
3523 // If we store through an access chain, we have a partial write.
3524 if (var)
3525 {
3526 accessed_variables_to_block[var->self].insert(x: current_block->self);
3527 if (var->self == lhs)
3528 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3529 else
3530 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3531 }
3532
3533 // args[0:1] might be access chains we have to track use of.
3534 for (uint32_t i = 0; i < 2; i++)
3535 notify_variable_access(id: args[i], block: current_block->self);
3536
3537 var = compiler.maybe_get_backing_variable(chain: rhs);
3538 if (var)
3539 accessed_variables_to_block[var->self].insert(x: current_block->self);
3540 break;
3541 }
3542
3543 case OpCopyObject:
3544 {
3545 // OpCopyObject copies the underlying non-pointer type,
3546 // so any temp variable should be declared using the underlying type.
3547 // If the type is a pointer, get its base type and overwrite the result type mapping.
3548 auto &type = compiler.get<SPIRType>(id: result_type);
3549 if (type.pointer)
3550 result_id_to_type[result_id] = type.parent_type;
3551
3552 if (length < 3)
3553 return false;
3554
3555 auto *var = compiler.maybe_get_backing_variable(chain: args[2]);
3556 if (var)
3557 accessed_variables_to_block[var->self].insert(x: current_block->self);
3558
3559 // Might be an access chain which we have to keep track of.
3560 notify_variable_access(id: args[1], block: current_block->self);
3561 if (access_chain_expressions.count(x: args[2]))
3562 access_chain_expressions.insert(x: args[1]);
3563
3564 // Might try to copy a Phi variable here.
3565 notify_variable_access(id: args[2], block: current_block->self);
3566 break;
3567 }
3568
3569 case OpLoad:
3570 {
3571 if (length < 3)
3572 return false;
3573 uint32_t ptr = args[2];
3574 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3575 if (var)
3576 accessed_variables_to_block[var->self].insert(x: current_block->self);
3577
3578 // Loaded value is a temporary.
3579 notify_variable_access(id: args[1], block: current_block->self);
3580
3581 // Might be an access chain we have to track use of.
3582 notify_variable_access(id: args[2], block: current_block->self);
3583
3584 // If we're loading an opaque type we cannot lower it to a temporary,
3585 // we must defer access of args[2] until it's used.
3586 auto &type = compiler.get<SPIRType>(id: args[0]);
3587 if (compiler.type_is_opaque_value(type))
3588 rvalue_forward_children[args[1]].insert(x: args[2]);
3589 break;
3590 }
3591
3592 case OpFunctionCall:
3593 {
3594 if (length < 3)
3595 return false;
3596
3597 // Return value may be a temporary.
3598 if (compiler.get_type(id: args[0]).basetype != SPIRType::Void)
3599 notify_variable_access(id: args[1], block: current_block->self);
3600
3601 length -= 3;
3602 args += 3;
3603
3604 for (uint32_t i = 0; i < length; i++)
3605 {
3606 auto *var = compiler.maybe_get_backing_variable(chain: args[i]);
3607 if (var)
3608 {
3609 accessed_variables_to_block[var->self].insert(x: current_block->self);
3610 // Assume we can get partial writes to this variable.
3611 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3612 }
3613
3614 // Cannot easily prove if argument we pass to a function is completely written.
3615 // Usually, functions write to a dummy variable,
3616 // which is then copied to in full to the real argument.
3617
3618 // Might try to copy a Phi variable here.
3619 notify_variable_access(id: args[i], block: current_block->self);
3620 }
3621 break;
3622 }
3623
3624 case OpSelect:
3625 {
3626 // In case of variable pointers, we might access a variable here.
3627 // We cannot prove anything about these accesses however.
3628 for (uint32_t i = 1; i < length; i++)
3629 {
3630 if (i >= 3)
3631 {
3632 auto *var = compiler.maybe_get_backing_variable(chain: args[i]);
3633 if (var)
3634 {
3635 accessed_variables_to_block[var->self].insert(x: current_block->self);
3636 // Assume we can get partial writes to this variable.
3637 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3638 }
3639 }
3640
3641 // Might try to copy a Phi variable here.
3642 notify_variable_access(id: args[i], block: current_block->self);
3643 }
3644 break;
3645 }
3646
3647 case OpExtInst:
3648 {
3649 for (uint32_t i = 4; i < length; i++)
3650 notify_variable_access(id: args[i], block: current_block->self);
3651 notify_variable_access(id: args[1], block: current_block->self);
3652
3653 uint32_t extension_set = args[2];
3654 if (compiler.get<SPIRExtension>(id: extension_set).ext == SPIRExtension::GLSL)
3655 {
3656 auto op_450 = static_cast<GLSLstd450>(args[3]);
3657 switch (op_450)
3658 {
3659 case GLSLstd450Modf:
3660 case GLSLstd450Frexp:
3661 {
3662 uint32_t ptr = args[5];
3663 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
3664 if (var)
3665 {
3666 accessed_variables_to_block[var->self].insert(x: current_block->self);
3667 if (var->self == ptr)
3668 complete_write_variables_to_block[var->self].insert(x: current_block->self);
3669 else
3670 partial_write_variables_to_block[var->self].insert(x: current_block->self);
3671 }
3672 break;
3673 }
3674
3675 default:
3676 break;
3677 }
3678 }
3679 break;
3680 }
3681
3682 case OpArrayLength:
3683 // Only result is a temporary.
3684 notify_variable_access(id: args[1], block: current_block->self);
3685 break;
3686
3687 case OpLine:
3688 case OpNoLine:
3689 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3690 break;
3691
3692 // Atomics shouldn't be able to access function-local variables.
3693 // Some GLSL builtins access a pointer.
3694
3695 case OpCompositeInsert:
3696 case OpVectorShuffle:
3697 // Specialize for opcode which contains literals.
3698 for (uint32_t i = 1; i < 4; i++)
3699 notify_variable_access(id: args[i], block: current_block->self);
3700 break;
3701
3702 case OpCompositeExtract:
3703 // Specialize for opcode which contains literals.
3704 for (uint32_t i = 1; i < 3; i++)
3705 notify_variable_access(id: args[i], block: current_block->self);
3706 break;
3707
3708 case OpImageWrite:
3709 for (uint32_t i = 0; i < length; i++)
3710 {
3711 // Argument 3 is a literal.
3712 if (i != 3)
3713 notify_variable_access(id: args[i], block: current_block->self);
3714 }
3715 break;
3716
3717 case OpImageSampleImplicitLod:
3718 case OpImageSampleExplicitLod:
3719 case OpImageSparseSampleImplicitLod:
3720 case OpImageSparseSampleExplicitLod:
3721 case OpImageSampleProjImplicitLod:
3722 case OpImageSampleProjExplicitLod:
3723 case OpImageSparseSampleProjImplicitLod:
3724 case OpImageSparseSampleProjExplicitLod:
3725 case OpImageFetch:
3726 case OpImageSparseFetch:
3727 case OpImageRead:
3728 case OpImageSparseRead:
3729 for (uint32_t i = 1; i < length; i++)
3730 {
3731 // Argument 4 is a literal.
3732 if (i != 4)
3733 notify_variable_access(id: args[i], block: current_block->self);
3734 }
3735 break;
3736
3737 case OpImageSampleDrefImplicitLod:
3738 case OpImageSampleDrefExplicitLod:
3739 case OpImageSparseSampleDrefImplicitLod:
3740 case OpImageSparseSampleDrefExplicitLod:
3741 case OpImageSampleProjDrefImplicitLod:
3742 case OpImageSampleProjDrefExplicitLod:
3743 case OpImageSparseSampleProjDrefImplicitLod:
3744 case OpImageSparseSampleProjDrefExplicitLod:
3745 case OpImageGather:
3746 case OpImageSparseGather:
3747 case OpImageDrefGather:
3748 case OpImageSparseDrefGather:
3749 for (uint32_t i = 1; i < length; i++)
3750 {
3751 // Argument 5 is a literal.
3752 if (i != 5)
3753 notify_variable_access(id: args[i], block: current_block->self);
3754 }
3755 break;
3756
3757 default:
3758 {
3759 // Rather dirty way of figuring out where Phi variables are used.
3760 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3761 // the ID of a variable has been used.
3762 // There are potential false positives here where a literal is used in-place of an ID,
3763 // but worst case, it does not affect the correctness of the compile.
3764 // Exhaustive analysis would be better here, but it's not worth it for now.
3765 for (uint32_t i = 0; i < length; i++)
3766 notify_variable_access(id: args[i], block: current_block->self);
3767 break;
3768 }
3769 }
3770 return true;
3771}
3772
3773Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_)
3774 : compiler(compiler_)
3775 , variable_id(variable_id_)
3776{
3777}
3778
3779bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &)
3780{
3781 return false;
3782}
3783
3784bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3785{
3786 switch (op)
3787 {
3788 case OpStore:
3789 if (length < 2)
3790 return false;
3791 if (args[0] == variable_id)
3792 {
3793 static_expression = args[1];
3794 write_count++;
3795 }
3796 break;
3797
3798 case OpLoad:
3799 if (length < 3)
3800 return false;
3801 if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
3802 return false;
3803 break;
3804
3805 case OpAccessChain:
3806 case OpInBoundsAccessChain:
3807 case OpPtrAccessChain:
3808 if (length < 3)
3809 return false;
3810 if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail.
3811 return false;
3812 break;
3813
3814 default:
3815 break;
3816 }
3817
3818 return true;
3819}
3820
3821void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler,
3822 bool single_function)
3823{
3824 auto &cfg = *function_cfgs.find(x: entry.self)->second;
3825
3826 // For each variable which is statically accessed.
3827 for (auto &accessed_var : handler.accessed_variables_to_block)
3828 {
3829 auto &blocks = accessed_var.second;
3830 auto &var = get<SPIRVariable>(id: accessed_var.first);
3831 auto &type = expression_type(id: accessed_var.first);
3832
3833 // First check if there are writes to the variable. Later, if there are none, we'll
3834 // reconsider it as globally accessed LUT.
3835 if (!var.is_written_to)
3836 {
3837 var.is_written_to = handler.complete_write_variables_to_block.count(x: var.self) != 0 ||
3838 handler.partial_write_variables_to_block.count(x: var.self) != 0;
3839 }
3840
3841 // Only consider function local variables here.
3842 // If we only have a single function in our CFG, private storage is also fine,
3843 // since it behaves like a function local variable.
3844 bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate);
3845 if (!allow_lut)
3846 continue;
3847
3848 // We cannot be a phi variable.
3849 if (var.phi_variable)
3850 continue;
3851
3852 // Only consider arrays here.
3853 if (type.array.empty())
3854 continue;
3855
3856 // If the variable has an initializer, make sure it is a constant expression.
3857 uint32_t static_constant_expression = 0;
3858 if (var.initializer)
3859 {
3860 if (ir.ids[var.initializer].get_type() != TypeConstant)
3861 continue;
3862 static_constant_expression = var.initializer;
3863
3864 // There can be no stores to this variable, we have now proved we have a LUT.
3865 if (var.is_written_to)
3866 continue;
3867 }
3868 else
3869 {
3870 // We can have one, and only one write to the variable, and that write needs to be a constant.
3871
3872 // No partial writes allowed.
3873 if (handler.partial_write_variables_to_block.count(x: var.self) != 0)
3874 continue;
3875
3876 auto itr = handler.complete_write_variables_to_block.find(x: var.self);
3877
3878 // No writes?
3879 if (itr == end(cont: handler.complete_write_variables_to_block))
3880 continue;
3881
3882 // We write to the variable in more than one block.
3883 auto &write_blocks = itr->second;
3884 if (write_blocks.size() != 1)
3885 continue;
3886
3887 // The write needs to happen in the dominating block.
3888 DominatorBuilder builder(cfg);
3889 for (auto &block : blocks)
3890 builder.add_block(block);
3891 uint32_t dominator = builder.get_dominator();
3892
3893 // The complete write happened in a branch or similar, cannot deduce static expression.
3894 if (write_blocks.count(x: dominator) == 0)
3895 continue;
3896
3897 // Find the static expression for this variable.
3898 StaticExpressionAccessHandler static_expression_handler(*this, var.self);
3899 traverse_all_reachable_opcodes(block: get<SPIRBlock>(id: dominator), handler&: static_expression_handler);
3900
3901 // We want one, and exactly one write
3902 if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0)
3903 continue;
3904
3905 // Is it a constant expression?
3906 if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant)
3907 continue;
3908
3909 // We found a LUT!
3910 static_constant_expression = static_expression_handler.static_expression;
3911 }
3912
3913 get<SPIRConstant>(id: static_constant_expression).is_used_as_lut = true;
3914 var.static_expression = static_constant_expression;
3915 var.statically_assigned = true;
3916 var.remapped_variable = true;
3917 }
3918}
3919
3920void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler)
3921{
3922 // First, we map out all variable access within a function.
3923 // Essentially a map of block -> { variables accessed in the basic block }
3924 traverse_all_reachable_opcodes(func: entry, handler);
3925
3926 auto &cfg = *function_cfgs.find(x: entry.self)->second;
3927
3928 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3929 analyze_parameter_preservation(entry, cfg, variable_to_blocks: handler.accessed_variables_to_block,
3930 complete_write_blocks: handler.complete_write_variables_to_block);
3931
3932 unordered_map<uint32_t, uint32_t> potential_loop_variables;
3933
3934 // Find the loop dominator block for each block.
3935 for (auto &block_id : entry.blocks)
3936 {
3937 auto &block = get<SPIRBlock>(id: block_id);
3938
3939 auto itr = ir.continue_block_to_loop_header.find(x: block_id);
3940 if (itr != end(cont&: ir.continue_block_to_loop_header) && itr->second != block_id)
3941 {
3942 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3943 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3944 block.loop_dominator = itr->second;
3945 }
3946 else
3947 {
3948 uint32_t loop_dominator = cfg.find_loop_dominator(block: block_id);
3949 if (loop_dominator != block_id)
3950 block.loop_dominator = loop_dominator;
3951 else
3952 block.loop_dominator = SPIRBlock::NoDominator;
3953 }
3954 }
3955
3956 // For each variable which is statically accessed.
3957 for (auto &var : handler.accessed_variables_to_block)
3958 {
3959 // Only deal with variables which are considered local variables in this function.
3960 if (find(first: begin(cont&: entry.local_variables), last: end(cont&: entry.local_variables), val: VariableID(var.first)) ==
3961 end(cont&: entry.local_variables))
3962 continue;
3963
3964 DominatorBuilder builder(cfg);
3965 auto &blocks = var.second;
3966 auto &type = expression_type(id: var.first);
3967 BlockID potential_continue_block = 0;
3968
3969 // Figure out which block is dominating all accesses of those variables.
3970 for (auto &block : blocks)
3971 {
3972 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3973 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3974 if (is_continue(next: block))
3975 {
3976 // Potentially awkward case to check for.
3977 // We might have a variable inside a loop, which is touched by the continue block,
3978 // but is not actually a loop variable.
3979 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3980 // language output because it will be declared before the body,
3981 // so we will have to lift the dominator up to the relevant loop header instead.
3982 builder.add_block(block: ir.continue_block_to_loop_header[block]);
3983
3984 // Arrays or structs cannot be loop variables.
3985 if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
3986 {
3987 // The variable is used in multiple continue blocks, this is not a loop
3988 // candidate, signal that by setting block to -1u.
3989 if (potential_continue_block == 0)
3990 potential_continue_block = block;
3991 else
3992 potential_continue_block = ~(0u);
3993 }
3994 }
3995
3996 builder.add_block(block);
3997 }
3998
3999 builder.lift_continue_block_dominator();
4000
4001 // Add it to a per-block list of variables.
4002 BlockID dominating_block = builder.get_dominator();
4003
4004 if (dominating_block && potential_continue_block != 0 && potential_continue_block != ~0u)
4005 {
4006 auto &inner_block = get<SPIRBlock>(id: dominating_block);
4007
4008 BlockID merge_candidate = 0;
4009
4010 // Analyze the dominator. If it lives in a different loop scope than the candidate continue
4011 // block, reject the loop variable candidate.
4012 if (inner_block.merge == SPIRBlock::MergeLoop)
4013 merge_candidate = inner_block.merge_block;
4014 else if (inner_block.loop_dominator != SPIRBlock::NoDominator)
4015 merge_candidate = get<SPIRBlock>(id: inner_block.loop_dominator).merge_block;
4016
4017 if (merge_candidate != 0 && cfg.is_reachable(block: merge_candidate))
4018 {
4019 // If the merge block has a higher post-visit order, we know that continue candidate
4020 // cannot reach the merge block, and we have two separate scopes.
4021 if (!cfg.is_reachable(block: potential_continue_block) ||
4022 cfg.get_visit_order(block: merge_candidate) > cfg.get_visit_order(block: potential_continue_block))
4023 {
4024 potential_continue_block = 0;
4025 }
4026 }
4027 }
4028
4029 if (potential_continue_block != 0 && potential_continue_block != ~0u)
4030 potential_loop_variables[var.first] = potential_continue_block;
4031
4032 // For variables whose dominating block is inside a loop, there is a risk that these variables
4033 // actually need to be preserved across loop iterations. We can express this by adding
4034 // a "read" access to the loop header.
4035 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
4036 // Should that fail, we look for the outermost loop header and tack on an access there.
4037 // Phi nodes cannot have this problem.
4038 if (dominating_block)
4039 {
4040 auto &variable = get<SPIRVariable>(id: var.first);
4041 if (!variable.phi_variable)
4042 {
4043 auto *block = &get<SPIRBlock>(id: dominating_block);
4044 bool preserve = may_read_undefined_variable_in_block(block: *block, var: var.first);
4045 if (preserve)
4046 {
4047 // Find the outermost loop scope.
4048 while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
4049 block = &get<SPIRBlock>(id: block->loop_dominator);
4050
4051 if (block->self != dominating_block)
4052 {
4053 builder.add_block(block: block->self);
4054 dominating_block = builder.get_dominator();
4055 }
4056 }
4057 }
4058 }
4059
4060 // If all blocks here are dead code, this will be 0, so the variable in question
4061 // will be completely eliminated.
4062 if (dominating_block)
4063 {
4064 auto &block = get<SPIRBlock>(id: dominating_block);
4065 block.dominated_variables.push_back(t: var.first);
4066 get<SPIRVariable>(id: var.first).dominator = dominating_block;
4067 }
4068 }
4069
4070 for (auto &var : handler.accessed_temporaries_to_block)
4071 {
4072 auto itr = handler.result_id_to_type.find(x: var.first);
4073
4074 if (itr == end(cont&: handler.result_id_to_type))
4075 {
4076 // We found a false positive ID being used, ignore.
4077 // This should probably be an assert.
4078 continue;
4079 }
4080
4081 // There is no point in doing domination analysis for opaque types.
4082 auto &type = get<SPIRType>(id: itr->second);
4083 if (type_is_opaque_value(type))
4084 continue;
4085
4086 DominatorBuilder builder(cfg);
4087 bool force_temporary = false;
4088 bool used_in_header_hoisted_continue_block = false;
4089
4090 // Figure out which block is dominating all accesses of those temporaries.
4091 auto &blocks = var.second;
4092 for (auto &block : blocks)
4093 {
4094 builder.add_block(block);
4095
4096 if (blocks.size() != 1 && is_continue(next: block))
4097 {
4098 // The risk here is that inner loop can dominate the continue block.
4099 // Any temporary we access in the continue block must be declared before the loop.
4100 // This is moot for complex loops however.
4101 auto &loop_header_block = get<SPIRBlock>(id: ir.continue_block_to_loop_header[block]);
4102 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
4103 builder.add_block(block: loop_header_block.self);
4104 used_in_header_hoisted_continue_block = true;
4105 }
4106 }
4107
4108 uint32_t dominating_block = builder.get_dominator();
4109
4110 if (blocks.size() != 1 && is_single_block_loop(next: dominating_block))
4111 {
4112 // Awkward case, because the loop header is also the continue block,
4113 // so hoisting to loop header does not help.
4114 force_temporary = true;
4115 }
4116
4117 if (dominating_block)
4118 {
4119 // If we touch a variable in the dominating block, this is the expected setup.
4120 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
4121 bool first_use_is_dominator = blocks.count(x: dominating_block) != 0;
4122
4123 if (!first_use_is_dominator || force_temporary)
4124 {
4125 if (handler.access_chain_expressions.count(x: var.first))
4126 {
4127 // Exceptionally rare case.
4128 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
4129 // Rather than do that, we force the indexing expressions to be declared in the right scope by
4130 // tracking their usage to that end. There is no temporary to hoist.
4131 // However, we still need to observe declaration order of the access chain.
4132
4133 if (used_in_header_hoisted_continue_block)
4134 {
4135 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
4136 // This is a problem as we need to declare an access chain properly first with full definition.
4137 // We cannot use temporaries for these expressions,
4138 // so we must make sure the access chain is declared ahead of time.
4139 // Force a complex for loop to deal with this.
4140 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
4141 auto &loop_header_block = get<SPIRBlock>(id: dominating_block);
4142 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
4143 loop_header_block.complex_continue = true;
4144 }
4145 }
4146 else
4147 {
4148 // This should be very rare, but if we try to declare a temporary inside a loop,
4149 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
4150 // we should actually emit the temporary outside the loop.
4151 hoisted_temporaries.insert(x: var.first);
4152 forced_temporaries.insert(x: var.first);
4153
4154 auto &block_temporaries = get<SPIRBlock>(id: dominating_block).declare_temporary;
4155 block_temporaries.emplace_back(ts&: handler.result_id_to_type[var.first], ts: var.first);
4156 }
4157 }
4158 else if (blocks.size() > 1)
4159 {
4160 // Keep track of the temporary as we might have to declare this temporary.
4161 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
4162 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
4163 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
4164 // declares the temporary.
4165 auto &block_temporaries = get<SPIRBlock>(id: dominating_block).potential_declare_temporary;
4166 block_temporaries.emplace_back(ts&: handler.result_id_to_type[var.first], ts: var.first);
4167 }
4168 }
4169 }
4170
4171 unordered_set<uint32_t> seen_blocks;
4172
4173 // Now, try to analyze whether or not these variables are actually loop variables.
4174 for (auto &loop_variable : potential_loop_variables)
4175 {
4176 auto &var = get<SPIRVariable>(id: loop_variable.first);
4177 auto dominator = var.dominator;
4178 BlockID block = loop_variable.second;
4179
4180 // The variable was accessed in multiple continue blocks, ignore.
4181 if (block == BlockID(~(0u)) || block == BlockID(0))
4182 continue;
4183
4184 // Dead code.
4185 if (dominator == ID(0))
4186 continue;
4187
4188 BlockID header = 0;
4189
4190 // Find the loop header for this block if we are a continue block.
4191 {
4192 auto itr = ir.continue_block_to_loop_header.find(x: block);
4193 if (itr != end(cont&: ir.continue_block_to_loop_header))
4194 {
4195 header = itr->second;
4196 }
4197 else if (get<SPIRBlock>(id: block).continue_block == block)
4198 {
4199 // Also check for self-referential continue block.
4200 header = block;
4201 }
4202 }
4203
4204 assert(header);
4205 auto &header_block = get<SPIRBlock>(id: header);
4206 auto &blocks = handler.accessed_variables_to_block[loop_variable.first];
4207
4208 // If a loop variable is not used before the loop, it's probably not a loop variable.
4209 bool has_accessed_variable = blocks.count(x: header) != 0;
4210
4211 // Now, there are two conditions we need to meet for the variable to be a loop variable.
4212 // 1. The dominating block must have a branch-free path to the loop header,
4213 // this way we statically know which expression should be part of the loop variable initializer.
4214
4215 // Walk from the dominator, if there is one straight edge connecting
4216 // dominator and loop header, we statically know the loop initializer.
4217 bool static_loop_init = true;
4218 while (dominator != header)
4219 {
4220 if (blocks.count(x: dominator) != 0)
4221 has_accessed_variable = true;
4222
4223 auto &succ = cfg.get_succeeding_edges(block: dominator);
4224 if (succ.size() != 1)
4225 {
4226 static_loop_init = false;
4227 break;
4228 }
4229
4230 auto &pred = cfg.get_preceding_edges(block: succ.front());
4231 if (pred.size() != 1 || pred.front() != dominator)
4232 {
4233 static_loop_init = false;
4234 break;
4235 }
4236
4237 dominator = succ.front();
4238 }
4239
4240 if (!static_loop_init || !has_accessed_variable)
4241 continue;
4242
4243 // The second condition we need to meet is that no access after the loop
4244 // merge can occur. Walk the CFG to see if we find anything.
4245
4246 seen_blocks.clear();
4247 cfg.walk_from(seen_blocks, block: header_block.merge_block, op: [&](uint32_t walk_block) -> bool {
4248 // We found a block which accesses the variable outside the loop.
4249 if (blocks.find(x: walk_block) != end(cont&: blocks))
4250 static_loop_init = false;
4251 return true;
4252 });
4253
4254 if (!static_loop_init)
4255 continue;
4256
4257 // We have a loop variable.
4258 header_block.loop_variables.push_back(t: loop_variable.first);
4259 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
4260 // will break reproducability in regression runs.
4261 sort(first: begin(cont&: header_block.loop_variables), last: end(cont&: header_block.loop_variables));
4262 get<SPIRVariable>(id: loop_variable.first).loop_variable = true;
4263 }
4264}
4265
4266bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
4267{
4268 for (auto &op : block.ops)
4269 {
4270 auto *ops = stream(instr: op);
4271 switch (op.op)
4272 {
4273 case OpStore:
4274 case OpCopyMemory:
4275 if (ops[0] == var)
4276 return false;
4277 break;
4278
4279 case OpAccessChain:
4280 case OpInBoundsAccessChain:
4281 case OpPtrAccessChain:
4282 // Access chains are generally used to partially read and write. It's too hard to analyze
4283 // if all constituents are written fully before continuing, so just assume it's preserved.
4284 // This is the same as the parameter preservation analysis.
4285 if (ops[2] == var)
4286 return true;
4287 break;
4288
4289 case OpSelect:
4290 // Variable pointers.
4291 // We might read before writing.
4292 if (ops[3] == var || ops[4] == var)
4293 return true;
4294 break;
4295
4296 case OpPhi:
4297 {
4298 // Variable pointers.
4299 // We might read before writing.
4300 if (op.length < 2)
4301 break;
4302
4303 uint32_t count = op.length - 2;
4304 for (uint32_t i = 0; i < count; i += 2)
4305 if (ops[i + 2] == var)
4306 return true;
4307 break;
4308 }
4309
4310 case OpCopyObject:
4311 case OpLoad:
4312 if (ops[2] == var)
4313 return true;
4314 break;
4315
4316 case OpFunctionCall:
4317 {
4318 if (op.length < 3)
4319 break;
4320
4321 // May read before writing.
4322 uint32_t count = op.length - 3;
4323 for (uint32_t i = 0; i < count; i++)
4324 if (ops[i + 3] == var)
4325 return true;
4326 break;
4327 }
4328
4329 default:
4330 break;
4331 }
4332 }
4333
4334 // Not accessed somehow, at least not in a usual fashion.
4335 // It's likely accessed in a branch, so assume we must preserve.
4336 return true;
4337}
4338
4339Bitset Compiler::get_buffer_block_flags(VariableID id) const
4340{
4341 return ir.get_buffer_block_flags(var: get<SPIRVariable>(id));
4342}
4343
4344bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
4345{
4346 if (type.basetype == SPIRType::Struct)
4347 {
4348 base_type = SPIRType::Unknown;
4349 for (auto &member_type : type.member_types)
4350 {
4351 SPIRType::BaseType member_base;
4352 if (!get_common_basic_type(type: get<SPIRType>(id: member_type), base_type&: member_base))
4353 return false;
4354
4355 if (base_type == SPIRType::Unknown)
4356 base_type = member_base;
4357 else if (base_type != member_base)
4358 return false;
4359 }
4360 return true;
4361 }
4362 else
4363 {
4364 base_type = type.basetype;
4365 return true;
4366 }
4367}
4368
4369void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
4370 const Bitset &decoration_flags)
4371{
4372 // If used, we will need to explicitly declare a new array size for these builtins.
4373
4374 if (builtin == BuiltInClipDistance)
4375 {
4376 if (!type.array_size_literal[0])
4377 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
4378 uint32_t array_size = type.array[0];
4379 if (array_size == 0)
4380 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
4381 compiler.clip_distance_count = array_size;
4382 }
4383 else if (builtin == BuiltInCullDistance)
4384 {
4385 if (!type.array_size_literal[0])
4386 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
4387 uint32_t array_size = type.array[0];
4388 if (array_size == 0)
4389 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
4390 compiler.cull_distance_count = array_size;
4391 }
4392 else if (builtin == BuiltInPosition)
4393 {
4394 if (decoration_flags.get(bit: DecorationInvariant))
4395 compiler.position_invariant = true;
4396 }
4397}
4398
4399void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
4400{
4401 // Only handle plain variables here.
4402 // Builtins which are part of a block are handled in AccessChain.
4403 // If allow_blocks is used however, this is to handle initializers of blocks,
4404 // which implies that all members are written to.
4405
4406 auto *var = compiler.maybe_get<SPIRVariable>(id);
4407 auto *m = compiler.ir.find_meta(id);
4408 if (var && m)
4409 {
4410 auto &type = compiler.get<SPIRType>(id: var->basetype);
4411 auto &decorations = m->decoration;
4412 auto &flags = type.storage == StorageClassInput ?
4413 compiler.active_input_builtins : compiler.active_output_builtins;
4414 if (decorations.builtin)
4415 {
4416 flags.set(decorations.builtin_type);
4417 handle_builtin(type, builtin: decorations.builtin_type, decoration_flags: decorations.decoration_flags);
4418 }
4419 else if (allow_blocks && compiler.has_decoration(id: type.self, decoration: DecorationBlock))
4420 {
4421 uint32_t member_count = uint32_t(type.member_types.size());
4422 for (uint32_t i = 0; i < member_count; i++)
4423 {
4424 if (compiler.has_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn))
4425 {
4426 auto &member_type = compiler.get<SPIRType>(id: type.member_types[i]);
4427 BuiltIn builtin = BuiltIn(compiler.get_member_decoration(id: type.self, index: i, decoration: DecorationBuiltIn));
4428 flags.set(builtin);
4429 handle_builtin(type: member_type, builtin, decoration_flags: compiler.get_member_decoration_bitset(id: type.self, index: i));
4430 }
4431 }
4432 }
4433 }
4434}
4435
4436void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
4437{
4438 add_if_builtin(id, allow_blocks: false);
4439}
4440
4441void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
4442{
4443 add_if_builtin(id, allow_blocks: true);
4444}
4445
4446bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
4447{
4448 switch (opcode)
4449 {
4450 case OpStore:
4451 if (length < 1)
4452 return false;
4453
4454 add_if_builtin(id: args[0]);
4455 break;
4456
4457 case OpCopyMemory:
4458 if (length < 2)
4459 return false;
4460
4461 add_if_builtin(id: args[0]);
4462 add_if_builtin(id: args[1]);
4463 break;
4464
4465 case OpCopyObject:
4466 case OpLoad:
4467 if (length < 3)
4468 return false;
4469
4470 add_if_builtin(id: args[2]);
4471 break;
4472
4473 case OpSelect:
4474 if (length < 5)
4475 return false;
4476
4477 add_if_builtin(id: args[3]);
4478 add_if_builtin(id: args[4]);
4479 break;
4480
4481 case OpPhi:
4482 {
4483 if (length < 2)
4484 return false;
4485
4486 uint32_t count = length - 2;
4487 args += 2;
4488 for (uint32_t i = 0; i < count; i += 2)
4489 add_if_builtin(id: args[i]);
4490 break;
4491 }
4492
4493 case OpFunctionCall:
4494 {
4495 if (length < 3)
4496 return false;
4497
4498 uint32_t count = length - 3;
4499 args += 3;
4500 for (uint32_t i = 0; i < count; i++)
4501 add_if_builtin(id: args[i]);
4502 break;
4503 }
4504
4505 case OpAccessChain:
4506 case OpInBoundsAccessChain:
4507 case OpPtrAccessChain:
4508 {
4509 if (length < 4)
4510 return false;
4511
4512 // Only consider global variables, cannot consider variables in functions yet, or other
4513 // access chains as they have not been created yet.
4514 auto *var = compiler.maybe_get<SPIRVariable>(id: args[2]);
4515 if (!var)
4516 break;
4517
4518 // Required if we access chain into builtins like gl_GlobalInvocationID.
4519 add_if_builtin(id: args[2]);
4520
4521 // Start traversing type hierarchy at the proper non-pointer types.
4522 auto *type = &compiler.get_variable_data_type(var: *var);
4523
4524 auto &flags =
4525 var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
4526
4527 uint32_t count = length - 3;
4528 args += 3;
4529 for (uint32_t i = 0; i < count; i++)
4530 {
4531 // Pointers
4532 // PtrAccessChain functions more like a pointer offset. Type remains the same.
4533 if (opcode == OpPtrAccessChain && i == 0)
4534 continue;
4535
4536 // Arrays
4537 if (!type->array.empty())
4538 {
4539 type = &compiler.get<SPIRType>(id: type->parent_type);
4540 }
4541 // Structs
4542 else if (type->basetype == SPIRType::Struct)
4543 {
4544 uint32_t index = compiler.get<SPIRConstant>(id: args[i]).scalar();
4545
4546 if (index < uint32_t(compiler.ir.meta[type->self].members.size()))
4547 {
4548 auto &decorations = compiler.ir.meta[type->self].members[index];
4549 if (decorations.builtin)
4550 {
4551 flags.set(decorations.builtin_type);
4552 handle_builtin(type: compiler.get<SPIRType>(id: type->member_types[index]), builtin: decorations.builtin_type,
4553 decoration_flags: decorations.decoration_flags);
4554 }
4555 }
4556
4557 type = &compiler.get<SPIRType>(id: type->member_types[index]);
4558 }
4559 else
4560 {
4561 // No point in traversing further. We won't find any extra builtins.
4562 break;
4563 }
4564 }
4565 break;
4566 }
4567
4568 default:
4569 break;
4570 }
4571
4572 return true;
4573}
4574
4575void Compiler::update_active_builtins()
4576{
4577 active_input_builtins.reset();
4578 active_output_builtins.reset();
4579 cull_distance_count = 0;
4580 clip_distance_count = 0;
4581 ActiveBuiltinHandler handler(*this);
4582 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4583
4584 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
4585 if (var.storage != StorageClassOutput)
4586 return;
4587 if (!interface_variable_exists_in_entry_point(id: var.self))
4588 return;
4589
4590 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4591 if (var.initializer != ID(0))
4592 handler.add_if_builtin_or_block(id: var.self);
4593 });
4594}
4595
4596// Returns whether this shader uses a builtin of the storage class
4597bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
4598{
4599 const Bitset *flags;
4600 switch (storage)
4601 {
4602 case StorageClassInput:
4603 flags = &active_input_builtins;
4604 break;
4605 case StorageClassOutput:
4606 flags = &active_output_builtins;
4607 break;
4608
4609 default:
4610 return false;
4611 }
4612 return flags->get(bit: builtin);
4613}
4614
4615void Compiler::analyze_image_and_sampler_usage()
4616{
4617 CombinedImageSamplerDrefHandler dref_handler(*this);
4618 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler&: dref_handler);
4619
4620 CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
4621 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4622
4623 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4624 // down to main().
4625 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4626 handler.dependency_hierarchy.clear();
4627 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4628
4629 comparison_ids = std::move(handler.comparison_ids);
4630 need_subpass_input = handler.need_subpass_input;
4631 need_subpass_input_ms = handler.need_subpass_input_ms;
4632
4633 // Forward information from separate images and samplers into combined image samplers.
4634 for (auto &combined : combined_image_samplers)
4635 if (comparison_ids.count(x: combined.sampler_id))
4636 comparison_ids.insert(x: combined.combined_id);
4637}
4638
4639bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t)
4640{
4641 // Mark all sampled images which are used with Dref.
4642 switch (opcode)
4643 {
4644 case OpImageSampleDrefExplicitLod:
4645 case OpImageSampleDrefImplicitLod:
4646 case OpImageSampleProjDrefExplicitLod:
4647 case OpImageSampleProjDrefImplicitLod:
4648 case OpImageSparseSampleProjDrefImplicitLod:
4649 case OpImageSparseSampleDrefImplicitLod:
4650 case OpImageSparseSampleProjDrefExplicitLod:
4651 case OpImageSparseSampleDrefExplicitLod:
4652 case OpImageDrefGather:
4653 case OpImageSparseDrefGather:
4654 dref_combined_samplers.insert(x: args[2]);
4655 return true;
4656
4657 default:
4658 break;
4659 }
4660
4661 return true;
4662}
4663
4664const CFG &Compiler::get_cfg_for_current_function() const
4665{
4666 assert(current_function);
4667 return get_cfg_for_function(id: current_function->self);
4668}
4669
4670const CFG &Compiler::get_cfg_for_function(uint32_t id) const
4671{
4672 auto cfg_itr = function_cfgs.find(x: id);
4673 assert(cfg_itr != end(function_cfgs));
4674 assert(cfg_itr->second);
4675 return *cfg_itr->second;
4676}
4677
4678void Compiler::build_function_control_flow_graphs_and_analyze()
4679{
4680 CFGBuilder handler(*this);
4681 handler.function_cfgs[ir.default_entry_point].reset(p: new CFG(*this, get<SPIRFunction>(id: ir.default_entry_point)));
4682 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
4683 function_cfgs = std::move(handler.function_cfgs);
4684 bool single_function = function_cfgs.size() <= 1;
4685
4686 for (auto &f : function_cfgs)
4687 {
4688 auto &func = get<SPIRFunction>(id: f.first);
4689 AnalyzeVariableScopeAccessHandler scope_handler(*this, func);
4690 analyze_variable_scope(entry&: func, handler&: scope_handler);
4691 find_function_local_luts(entry&: func, handler: scope_handler, single_function);
4692
4693 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4694 // To use multiple initializers, we need the same type and qualifiers.
4695 for (auto block : func.blocks)
4696 {
4697 auto &b = get<SPIRBlock>(id: block);
4698 if (b.loop_variables.size() < 2)
4699 continue;
4700
4701 auto &flags = get_decoration_bitset(id: b.loop_variables.front());
4702 uint32_t type = get<SPIRVariable>(id: b.loop_variables.front()).basetype;
4703 bool invalid_initializers = false;
4704 for (auto loop_variable : b.loop_variables)
4705 {
4706 if (flags != get_decoration_bitset(id: loop_variable) ||
4707 type != get<SPIRVariable>(id: b.loop_variables.front()).basetype)
4708 {
4709 invalid_initializers = true;
4710 break;
4711 }
4712 }
4713
4714 if (invalid_initializers)
4715 {
4716 for (auto loop_variable : b.loop_variables)
4717 get<SPIRVariable>(id: loop_variable).loop_variable = false;
4718 b.loop_variables.clear();
4719 }
4720 }
4721 }
4722
4723 // Find LUTs which are not function local. Only consider this case if the CFG is multi-function,
4724 // otherwise we treat Private as Function trivially.
4725 // Needs to be analyzed from the outside since we have to block the LUT optimization if at least
4726 // one function writes to it.
4727 if (!single_function)
4728 {
4729 for (auto &id : global_variables)
4730 {
4731 auto &var = get<SPIRVariable>(id);
4732 auto &type = get_variable_data_type(var);
4733
4734 if (is_array(type) && var.storage == StorageClassPrivate &&
4735 var.initializer && !var.is_written_to &&
4736 ir.ids[var.initializer].get_type() == TypeConstant)
4737 {
4738 get<SPIRConstant>(id: var.initializer).is_used_as_lut = true;
4739 var.static_expression = var.initializer;
4740 var.statically_assigned = true;
4741 var.remapped_variable = true;
4742 }
4743 }
4744 }
4745}
4746
4747Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_)
4748 : compiler(compiler_)
4749{
4750}
4751
4752bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t)
4753{
4754 return true;
4755}
4756
4757bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
4758{
4759 if (function_cfgs.find(x: func.self) == end(cont&: function_cfgs))
4760 {
4761 function_cfgs[func.self].reset(p: new CFG(compiler, func));
4762 return true;
4763 }
4764 else
4765 return false;
4766}
4767
4768void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
4769{
4770 dependency_hierarchy[dst].insert(x: src);
4771 // Propagate up any comparison state if we're loading from one such variable.
4772 if (comparison_ids.count(x: src))
4773 comparison_ids.insert(x: dst);
4774}
4775
4776bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4777{
4778 if (length < 3)
4779 return false;
4780
4781 auto &func = compiler.get<SPIRFunction>(id: args[2]);
4782 const auto *arg = &args[3];
4783 length -= 3;
4784
4785 for (uint32_t i = 0; i < length; i++)
4786 {
4787 auto &argument = func.arguments[i];
4788 add_dependency(dst: argument.id, src: arg[i]);
4789 }
4790
4791 return true;
4792}
4793
4794void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id)
4795{
4796 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4797 comparison_ids.insert(x: id);
4798
4799 for (auto &dep_id : dependency_hierarchy[id])
4800 add_hierarchy_to_comparison_ids(id: dep_id);
4801}
4802
4803bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4804{
4805 switch (opcode)
4806 {
4807 case OpAccessChain:
4808 case OpInBoundsAccessChain:
4809 case OpPtrAccessChain:
4810 case OpLoad:
4811 {
4812 if (length < 3)
4813 return false;
4814
4815 add_dependency(dst: args[1], src: args[2]);
4816
4817 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4818 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4819 auto &type = compiler.get<SPIRType>(id: args[0]);
4820 if (type.image.dim == DimSubpassData)
4821 {
4822 need_subpass_input = true;
4823 if (type.image.ms)
4824 need_subpass_input_ms = true;
4825 }
4826
4827 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4828 if (dref_combined_samplers.count(x: args[1]) != 0)
4829 add_hierarchy_to_comparison_ids(id: args[1]);
4830 break;
4831 }
4832
4833 case OpSampledImage:
4834 {
4835 if (length < 4)
4836 return false;
4837
4838 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4839 // This image must be a depth image.
4840 uint32_t result_id = args[1];
4841 uint32_t image = args[2];
4842 uint32_t sampler = args[3];
4843
4844 if (dref_combined_samplers.count(x: result_id) != 0)
4845 {
4846 add_hierarchy_to_comparison_ids(id: image);
4847
4848 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4849 add_hierarchy_to_comparison_ids(id: sampler);
4850
4851 // Mark the OpSampledImage itself as being comparison state.
4852 comparison_ids.insert(x: result_id);
4853 }
4854 return true;
4855 }
4856
4857 default:
4858 break;
4859 }
4860
4861 return true;
4862}
4863
4864bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
4865{
4866 auto *m = ir.find_meta(id);
4867 return m && m->hlsl_is_magic_counter_buffer;
4868}
4869
4870bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
4871{
4872 auto *m = ir.find_meta(id);
4873
4874 // First, check for the proper decoration.
4875 if (m && m->hlsl_magic_counter_buffer != 0)
4876 {
4877 counter_id = m->hlsl_magic_counter_buffer;
4878 return true;
4879 }
4880 else
4881 return false;
4882}
4883
4884void Compiler::make_constant_null(uint32_t id, uint32_t type)
4885{
4886 auto &constant_type = get<SPIRType>(id: type);
4887
4888 if (constant_type.pointer)
4889 {
4890 auto &constant = set<SPIRConstant>(id, args&: type);
4891 constant.make_null(constant_type_: constant_type);
4892 }
4893 else if (!constant_type.array.empty())
4894 {
4895 assert(constant_type.parent_type);
4896 uint32_t parent_id = ir.increase_bound_by(count: 1);
4897 make_constant_null(id: parent_id, type: constant_type.parent_type);
4898
4899 if (!constant_type.array_size_literal.back())
4900 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4901
4902 SmallVector<uint32_t> elements(constant_type.array.back());
4903 for (uint32_t i = 0; i < constant_type.array.back(); i++)
4904 elements[i] = parent_id;
4905 set<SPIRConstant>(id, args&: type, args: elements.data(), args: uint32_t(elements.size()), args: false);
4906 }
4907 else if (!constant_type.member_types.empty())
4908 {
4909 uint32_t member_ids = ir.increase_bound_by(count: uint32_t(constant_type.member_types.size()));
4910 SmallVector<uint32_t> elements(constant_type.member_types.size());
4911 for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
4912 {
4913 make_constant_null(id: member_ids + i, type: constant_type.member_types[i]);
4914 elements[i] = member_ids + i;
4915 }
4916 set<SPIRConstant>(id, args&: type, args: elements.data(), args: uint32_t(elements.size()), args: false);
4917 }
4918 else
4919 {
4920 auto &constant = set<SPIRConstant>(id, args&: type);
4921 constant.make_null(constant_type_: constant_type);
4922 }
4923}
4924
4925const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const
4926{
4927 return ir.declared_capabilities;
4928}
4929
4930const SmallVector<std::string> &Compiler::get_declared_extensions() const
4931{
4932 return ir.declared_extensions;
4933}
4934
4935std::string Compiler::get_remapped_declared_block_name(VariableID id) const
4936{
4937 return get_remapped_declared_block_name(id, fallback_prefer_instance_name: false);
4938}
4939
4940std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
4941{
4942 auto itr = declared_block_names.find(x: id);
4943 if (itr != end(cont: declared_block_names))
4944 {
4945 return itr->second;
4946 }
4947 else
4948 {
4949 auto &var = get<SPIRVariable>(id);
4950
4951 if (fallback_prefer_instance_name)
4952 {
4953 return to_name(id: var.self);
4954 }
4955 else
4956 {
4957 auto &type = get<SPIRType>(id: var.basetype);
4958 auto *type_meta = ir.find_meta(id: type.self);
4959 auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
4960 return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
4961 }
4962 }
4963}
4964
4965bool Compiler::reflection_ssbo_instance_name_is_significant() const
4966{
4967 if (ir.source.known)
4968 {
4969 // UAVs from HLSL source tend to be declared in a way where the type is reused
4970 // but the instance name is significant, and that's the name we should report.
4971 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4972 return ir.source.hlsl;
4973 }
4974
4975 unordered_set<uint32_t> ssbo_type_ids;
4976 bool aliased_ssbo_types = false;
4977
4978 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4979 ir.for_each_typed_id<SPIRVariable>(op: [&](uint32_t, const SPIRVariable &var) {
4980 auto &type = this->get<SPIRType>(id: var.basetype);
4981 if (!type.pointer || var.storage == StorageClassFunction)
4982 return;
4983
4984 bool ssbo = var.storage == StorageClassStorageBuffer ||
4985 (var.storage == StorageClassUniform && has_decoration(id: type.self, decoration: DecorationBufferBlock));
4986
4987 if (ssbo)
4988 {
4989 if (ssbo_type_ids.count(x: type.self))
4990 aliased_ssbo_types = true;
4991 else
4992 ssbo_type_ids.insert(x: type.self);
4993 }
4994 });
4995
4996 // If the block name is aliased, assume we have HLSL-style UAV declarations.
4997 return aliased_ssbo_types;
4998}
4999
5000bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op,
5001 const uint32_t *args, uint32_t length)
5002{
5003 if (length < 2)
5004 return false;
5005
5006 bool has_result_id = false, has_result_type = false;
5007 HasResultAndType(opcode: op, hasResult: &has_result_id, hasResultType: &has_result_type);
5008 if (has_result_id && has_result_type)
5009 {
5010 result_type = args[0];
5011 result_id = args[1];
5012 return true;
5013 }
5014 else
5015 return false;
5016}
5017
5018Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
5019{
5020 Bitset flags;
5021 auto *type_meta = ir.find_meta(id: type.self);
5022
5023 if (type_meta)
5024 {
5025 auto &members = type_meta->members;
5026 if (index >= members.size())
5027 return flags;
5028 auto &dec = members[index];
5029
5030 flags.merge_or(other: dec.decoration_flags);
5031
5032 auto &member_type = get<SPIRType>(id: type.member_types[index]);
5033
5034 // If our member type is a struct, traverse all the child members as well recursively.
5035 auto &member_childs = member_type.member_types;
5036 for (uint32_t i = 0; i < member_childs.size(); i++)
5037 {
5038 auto &child_member_type = get<SPIRType>(id: member_childs[i]);
5039 if (!child_member_type.pointer)
5040 flags.merge_or(other: combined_decoration_for_member(type: member_type, index: i));
5041 }
5042 }
5043
5044 return flags;
5045}
5046
5047bool Compiler::is_desktop_only_format(spv::ImageFormat format)
5048{
5049 switch (format)
5050 {
5051 // Desktop-only formats
5052 case ImageFormatR11fG11fB10f:
5053 case ImageFormatR16f:
5054 case ImageFormatRgb10A2:
5055 case ImageFormatR8:
5056 case ImageFormatRg8:
5057 case ImageFormatR16:
5058 case ImageFormatRg16:
5059 case ImageFormatRgba16:
5060 case ImageFormatR16Snorm:
5061 case ImageFormatRg16Snorm:
5062 case ImageFormatRgba16Snorm:
5063 case ImageFormatR8Snorm:
5064 case ImageFormatRg8Snorm:
5065 case ImageFormatR8ui:
5066 case ImageFormatRg8ui:
5067 case ImageFormatR16ui:
5068 case ImageFormatRgb10a2ui:
5069 case ImageFormatR8i:
5070 case ImageFormatRg8i:
5071 case ImageFormatR16i:
5072 return true;
5073 default:
5074 break;
5075 }
5076
5077 return false;
5078}
5079
5080// An image is determined to be a depth image if it is marked as a depth image and is not also
5081// explicitly marked with a color format, or if there are any sample/gather compare operations on it.
5082bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
5083{
5084 return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(x: id);
5085}
5086
5087bool Compiler::type_is_opaque_value(const SPIRType &type) const
5088{
5089 return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
5090 type.basetype == SPIRType::Sampler);
5091}
5092
5093// Make these member functions so we can easily break on any force_recompile events.
5094void Compiler::force_recompile()
5095{
5096 is_force_recompile = true;
5097}
5098
5099void Compiler::force_recompile_guarantee_forward_progress()
5100{
5101 force_recompile();
5102 is_force_recompile_forward_progress = true;
5103}
5104
5105bool Compiler::is_forcing_recompilation() const
5106{
5107 return is_force_recompile;
5108}
5109
5110void Compiler::clear_force_recompile()
5111{
5112 is_force_recompile = false;
5113 is_force_recompile_forward_progress = false;
5114}
5115
5116Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
5117 : compiler(compiler_)
5118{
5119}
5120
5121Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const
5122{
5123 auto chain_itr = access_chain_to_physical_block.find(x: id);
5124 if (chain_itr != access_chain_to_physical_block.end())
5125 return chain_itr->second;
5126 else
5127 return nullptr;
5128}
5129
5130void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length)
5131{
5132 uint32_t mask = *args;
5133 args++;
5134 length--;
5135 if (length && (mask & MemoryAccessVolatileMask) != 0)
5136 {
5137 args++;
5138 length--;
5139 }
5140
5141 if (length && (mask & MemoryAccessAlignedMask) != 0)
5142 {
5143 uint32_t alignment = *args;
5144 auto *meta = find_block_meta(id);
5145
5146 // This makes the assumption that the application does not rely on insane edge cases like:
5147 // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment.
5148 // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should
5149 // actually have alignment of 8 bytes, but this is too theoretical and awkward to support.
5150 // We could potentially keep track of any offset in the access chain, but it's
5151 // practically impossible for high level compilers to emit code like that,
5152 // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine.
5153 if (meta && alignment > meta->alignment)
5154 meta->alignment = alignment;
5155 }
5156}
5157
5158bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const
5159{
5160 auto &type = compiler.get<SPIRType>(id: type_id);
5161 return compiler.is_physical_pointer(type);
5162}
5163
5164uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
5165{
5166 if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
5167 return 8;
5168 else if (type.basetype == SPIRType::Struct)
5169 {
5170 uint32_t alignment = 0;
5171 for (auto &member_type : type.member_types)
5172 {
5173 uint32_t member_align = get_minimum_scalar_alignment(type: compiler.get<SPIRType>(id: member_type));
5174 if (member_align > alignment)
5175 alignment = member_align;
5176 }
5177 return alignment;
5178 }
5179 else
5180 return type.width / 8;
5181}
5182
5183void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id)
5184{
5185 if (type_is_bda_block_entry(type_id))
5186 {
5187 auto &meta = physical_block_type_meta[type_id];
5188 access_chain_to_physical_block[var_id] = &meta;
5189
5190 auto &type = compiler.get<SPIRType>(id: type_id);
5191
5192 if (!compiler.is_physical_pointer_to_buffer_block(type))
5193 non_block_types.insert(x: type_id);
5194
5195 if (meta.alignment == 0)
5196 meta.alignment = get_minimum_scalar_alignment(type: compiler.get_pointee_type(type));
5197 }
5198}
5199
5200bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length)
5201{
5202 // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type.
5203 // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment
5204 // requirements.
5205 switch (op)
5206 {
5207 case OpConvertUToPtr:
5208 case OpBitcast:
5209 case OpCompositeExtract:
5210 // Extract can begin a new chain if we had a struct or array of pointers as input.
5211 // We don't begin chains before we have a pure scalar pointer.
5212 setup_meta_chain(type_id: args[0], var_id: args[1]);
5213 break;
5214
5215 case OpAccessChain:
5216 case OpInBoundsAccessChain:
5217 case OpPtrAccessChain:
5218 case OpCopyObject:
5219 {
5220 auto itr = access_chain_to_physical_block.find(x: args[2]);
5221 if (itr != access_chain_to_physical_block.end())
5222 access_chain_to_physical_block[args[1]] = itr->second;
5223 break;
5224 }
5225
5226 case OpLoad:
5227 {
5228 setup_meta_chain(type_id: args[0], var_id: args[1]);
5229 if (length >= 4)
5230 mark_aligned_access(id: args[2], args: args + 3, length: length - 3);
5231 break;
5232 }
5233
5234 case OpStore:
5235 {
5236 if (length >= 3)
5237 mark_aligned_access(id: args[0], args: args + 2, length: length - 2);
5238 break;
5239 }
5240
5241 default:
5242 break;
5243 }
5244
5245 return true;
5246}
5247
5248uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const
5249{
5250 auto *type = &compiler.get<SPIRType>(id: type_id);
5251 while (compiler.is_physical_pointer(type: *type) && !type_is_bda_block_entry(type_id))
5252 {
5253 type_id = type->parent_type;
5254 type = &compiler.get<SPIRType>(id: type_id);
5255 }
5256
5257 assert(type_is_bda_block_entry(type_id));
5258 return type_id;
5259}
5260
5261void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
5262{
5263 for (auto &member : type.member_types)
5264 {
5265 auto &subtype = compiler.get<SPIRType>(id: member);
5266
5267 if (compiler.is_physical_pointer(type: subtype) && !compiler.is_physical_pointer_to_buffer_block(type: subtype))
5268 non_block_types.insert(x: get_base_non_block_type_id(type_id: member));
5269 else if (subtype.basetype == SPIRType::Struct && !compiler.is_pointer(type: subtype))
5270 analyze_non_block_types_from_block(type: subtype);
5271 }
5272}
5273
5274void Compiler::analyze_non_block_pointer_types()
5275{
5276 PhysicalStorageBufferPointerHandler handler(*this);
5277 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
5278
5279 // Analyze any block declaration we have to make. It might contain
5280 // physical pointers to POD types which we never used, and thus never added to the list.
5281 // We'll need to add those pointer types to the set of types we declare.
5282 ir.for_each_typed_id<SPIRType>(op: [&](uint32_t id, SPIRType &type) {
5283 // Only analyze the raw block struct, not any pointer-to-struct, since that's just redundant.
5284 if (type.self == id &&
5285 (has_decoration(id: type.self, decoration: DecorationBlock) ||
5286 has_decoration(id: type.self, decoration: DecorationBufferBlock)))
5287 {
5288 handler.analyze_non_block_types_from_block(type);
5289 }
5290 });
5291
5292 physical_storage_non_block_pointer_types.reserve(count: handler.non_block_types.size());
5293 for (auto type : handler.non_block_types)
5294 physical_storage_non_block_pointer_types.push_back(t: type);
5295 sort(first: begin(cont&: physical_storage_non_block_pointer_types), last: end(cont&: physical_storage_non_block_pointer_types));
5296 physical_storage_type_to_alignment = std::move(handler.physical_block_type_meta);
5297}
5298
5299bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
5300{
5301 if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
5302 {
5303 if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
5304 {
5305 // Most complex case, we have no sensible way of dealing with this
5306 // other than taking the 100% conservative approach, exit early.
5307 split_function_case = true;
5308 return false;
5309 }
5310 else
5311 {
5312 interlock_function_id = call_stack.back();
5313 // If this call is performed inside control flow we have a problem.
5314 auto &cfg = compiler.get_cfg_for_function(id: interlock_function_id);
5315
5316 uint32_t from_block_id = compiler.get<SPIRFunction>(id: interlock_function_id).entry_block;
5317 bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from: from_block_id, to: current_block_id);
5318 if (!outside_control_flow)
5319 control_flow_interlock = true;
5320 }
5321 }
5322 return true;
5323}
5324
5325void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
5326{
5327 current_block_id = block.self;
5328}
5329
5330bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5331{
5332 if (length < 3)
5333 return false;
5334 call_stack.push_back(t: args[2]);
5335 return true;
5336}
5337
5338bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
5339{
5340 call_stack.pop_back();
5341 return true;
5342}
5343
5344bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5345{
5346 if (length < 3)
5347 return false;
5348
5349 if (args[2] == interlock_function_id)
5350 call_stack_is_interlocked = true;
5351
5352 call_stack.push_back(t: args[2]);
5353 return true;
5354}
5355
5356bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
5357{
5358 if (call_stack.back() == interlock_function_id)
5359 call_stack_is_interlocked = false;
5360
5361 call_stack.pop_back();
5362 return true;
5363}
5364
5365void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
5366{
5367 if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
5368 split_function_case)
5369 {
5370 compiler.interlocked_resources.insert(x: id);
5371 }
5372}
5373
5374bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
5375{
5376 // Only care about critical section analysis if we have simple case.
5377 if (use_critical_section)
5378 {
5379 if (opcode == OpBeginInvocationInterlockEXT)
5380 {
5381 in_crit_sec = true;
5382 return true;
5383 }
5384
5385 if (opcode == OpEndInvocationInterlockEXT)
5386 {
5387 // End critical section--nothing more to do.
5388 return false;
5389 }
5390 }
5391
5392 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
5393 switch (opcode)
5394 {
5395 case OpLoad:
5396 {
5397 if (length < 3)
5398 return false;
5399
5400 uint32_t ptr = args[2];
5401 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5402
5403 // We're only concerned with buffer and image memory here.
5404 if (!var)
5405 break;
5406
5407 switch (var->storage)
5408 {
5409 default:
5410 break;
5411
5412 case StorageClassUniformConstant:
5413 {
5414 uint32_t result_type = args[0];
5415 uint32_t id = args[1];
5416 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5417 compiler.register_read(expr: id, chain: ptr, forwarded: true);
5418 break;
5419 }
5420
5421 case StorageClassUniform:
5422 // Must have BufferBlock; we only care about SSBOs.
5423 if (!compiler.has_decoration(id: compiler.get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
5424 break;
5425 // fallthrough
5426 case StorageClassStorageBuffer:
5427 access_potential_resource(id: var->self);
5428 break;
5429 }
5430 break;
5431 }
5432
5433 case OpInBoundsAccessChain:
5434 case OpAccessChain:
5435 case OpPtrAccessChain:
5436 {
5437 if (length < 3)
5438 return false;
5439
5440 uint32_t result_type = args[0];
5441
5442 auto &type = compiler.get<SPIRType>(id: result_type);
5443 if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
5444 type.storage == StorageClassStorageBuffer)
5445 {
5446 uint32_t id = args[1];
5447 uint32_t ptr = args[2];
5448 compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5449 compiler.register_read(expr: id, chain: ptr, forwarded: true);
5450 compiler.ir.ids[id].set_allow_type_rewrite();
5451 }
5452 break;
5453 }
5454
5455 case OpImageTexelPointer:
5456 {
5457 if (length < 3)
5458 return false;
5459
5460 uint32_t result_type = args[0];
5461 uint32_t id = args[1];
5462 uint32_t ptr = args[2];
5463 auto &e = compiler.set<SPIRExpression>(id, args: "", args&: result_type, args: true);
5464 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5465 if (var)
5466 e.loaded_from = var->self;
5467 break;
5468 }
5469
5470 case OpStore:
5471 case OpImageWrite:
5472 case OpAtomicStore:
5473 {
5474 if (length < 1)
5475 return false;
5476
5477 uint32_t ptr = args[0];
5478 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5479 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5480 var->storage == StorageClassStorageBuffer))
5481 {
5482 access_potential_resource(id: var->self);
5483 }
5484
5485 break;
5486 }
5487
5488 case OpCopyMemory:
5489 {
5490 if (length < 2)
5491 return false;
5492
5493 uint32_t dst = args[0];
5494 uint32_t src = args[1];
5495 auto *dst_var = compiler.maybe_get_backing_variable(chain: dst);
5496 auto *src_var = compiler.maybe_get_backing_variable(chain: src);
5497
5498 if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
5499 access_potential_resource(id: dst_var->self);
5500
5501 if (src_var)
5502 {
5503 if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
5504 break;
5505
5506 if (src_var->storage == StorageClassUniform &&
5507 !compiler.has_decoration(id: compiler.get<SPIRType>(id: src_var->basetype).self, decoration: DecorationBufferBlock))
5508 {
5509 break;
5510 }
5511
5512 access_potential_resource(id: src_var->self);
5513 }
5514
5515 break;
5516 }
5517
5518 case OpImageRead:
5519 case OpAtomicLoad:
5520 {
5521 if (length < 3)
5522 return false;
5523
5524 uint32_t ptr = args[2];
5525 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5526
5527 // We're only concerned with buffer and image memory here.
5528 if (!var)
5529 break;
5530
5531 switch (var->storage)
5532 {
5533 default:
5534 break;
5535
5536 case StorageClassUniform:
5537 // Must have BufferBlock; we only care about SSBOs.
5538 if (!compiler.has_decoration(id: compiler.get<SPIRType>(id: var->basetype).self, decoration: DecorationBufferBlock))
5539 break;
5540 // fallthrough
5541 case StorageClassUniformConstant:
5542 case StorageClassStorageBuffer:
5543 access_potential_resource(id: var->self);
5544 break;
5545 }
5546 break;
5547 }
5548
5549 case OpAtomicExchange:
5550 case OpAtomicCompareExchange:
5551 case OpAtomicIIncrement:
5552 case OpAtomicIDecrement:
5553 case OpAtomicIAdd:
5554 case OpAtomicISub:
5555 case OpAtomicSMin:
5556 case OpAtomicUMin:
5557 case OpAtomicSMax:
5558 case OpAtomicUMax:
5559 case OpAtomicAnd:
5560 case OpAtomicOr:
5561 case OpAtomicXor:
5562 {
5563 if (length < 3)
5564 return false;
5565
5566 uint32_t ptr = args[2];
5567 auto *var = compiler.maybe_get_backing_variable(chain: ptr);
5568 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5569 var->storage == StorageClassStorageBuffer))
5570 {
5571 access_potential_resource(id: var->self);
5572 }
5573
5574 break;
5575 }
5576
5577 default:
5578 break;
5579 }
5580
5581 return true;
5582}
5583
5584void Compiler::analyze_interlocked_resource_usage()
5585{
5586 if (get_execution_model() == ExecutionModelFragment &&
5587 (get_entry_point().flags.get(bit: ExecutionModePixelInterlockOrderedEXT) ||
5588 get_entry_point().flags.get(bit: ExecutionModePixelInterlockUnorderedEXT) ||
5589 get_entry_point().flags.get(bit: ExecutionModeSampleInterlockOrderedEXT) ||
5590 get_entry_point().flags.get(bit: ExecutionModeSampleInterlockUnorderedEXT)))
5591 {
5592 InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
5593 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler&: prepass_handler);
5594
5595 InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
5596 handler.interlock_function_id = prepass_handler.interlock_function_id;
5597 handler.split_function_case = prepass_handler.split_function_case;
5598 handler.control_flow_interlock = prepass_handler.control_flow_interlock;
5599 handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
5600
5601 traverse_all_reachable_opcodes(func: get<SPIRFunction>(id: ir.default_entry_point), handler);
5602
5603 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
5604 interlocked_is_complex =
5605 !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
5606 }
5607}
5608
5609// Helper function
5610bool Compiler::check_internal_recursion(const SPIRType &type, std::unordered_set<uint32_t> &checked_ids)
5611{
5612 if (type.basetype != SPIRType::Struct)
5613 return false;
5614
5615 if (checked_ids.count(x: type.self))
5616 return true;
5617
5618 // Recurse into struct members
5619 bool is_recursive = false;
5620 checked_ids.insert(x: type.self);
5621 uint32_t mbr_cnt = uint32_t(type.member_types.size());
5622 for (uint32_t mbr_idx = 0; !is_recursive && mbr_idx < mbr_cnt; mbr_idx++)
5623 {
5624 uint32_t mbr_type_id = type.member_types[mbr_idx];
5625 auto &mbr_type = get<SPIRType>(id: mbr_type_id);
5626 is_recursive |= check_internal_recursion(type: mbr_type, checked_ids);
5627 }
5628 checked_ids.erase(x: type.self);
5629 return is_recursive;
5630}
5631
5632// Return whether the struct type contains a structural recursion nested somewhere within its content.
5633bool Compiler::type_contains_recursion(const SPIRType &type)
5634{
5635 std::unordered_set<uint32_t> checked_ids;
5636 return check_internal_recursion(type, checked_ids);
5637}
5638
5639bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
5640{
5641 if (!is_array(type))
5642 return false;
5643
5644 // BDA types must have parent type hierarchy.
5645 if (!type.parent_type)
5646 return false;
5647
5648 // Punch through all array layers.
5649 auto *parent = &get<SPIRType>(id: type.parent_type);
5650 while (is_array(type: *parent))
5651 parent = &get<SPIRType>(id: parent->parent_type);
5652
5653 return is_pointer(type: *parent);
5654}
5655
5656bool Compiler::flush_phi_required(BlockID from, BlockID to) const
5657{
5658 auto &child = get<SPIRBlock>(id: to);
5659 for (auto &phi : child.phi_variables)
5660 if (phi.parent == from)
5661 return true;
5662 return false;
5663}
5664
5665void Compiler::add_loop_level()
5666{
5667 current_loop_level++;
5668}
5669

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_cross.cpp