1 | // |
2 | // Copyright (C) 2018 Google, Inc. |
3 | // |
4 | // All rights reserved. |
5 | // |
6 | // Redistribution and use in source and binary forms, with or without |
7 | // modification, are permitted provided that the following conditions |
8 | // are met: |
9 | // |
10 | // Redistributions of source code must retain the above copyright |
11 | // notice, this list of conditions and the following disclaimer. |
12 | // |
13 | // Redistributions in binary form must reproduce the above |
14 | // copyright notice, this list of conditions and the following |
15 | // disclaimer in the documentation and/or other materials provided |
16 | // with the distribution. |
17 | // |
18 | // Neither the name of 3Dlabs Inc. Ltd. nor the names of its |
19 | // contributors may be used to endorse or promote products derived |
20 | // from this software without specific prior written permission. |
21 | // |
22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
25 | // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
26 | // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
27 | // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
28 | // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
29 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
30 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
32 | // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | // POSSIBILITY OF SUCH DAMAGE. |
34 | |
35 | // |
36 | // Post-processing for SPIR-V IR, in internal form, not standard binary form. |
37 | // |
38 | |
39 | #include <cassert> |
40 | #include <cstdlib> |
41 | |
42 | #include <unordered_map> |
43 | #include <unordered_set> |
44 | #include <algorithm> |
45 | |
46 | #include "SpvBuilder.h" |
47 | #include "spirv.hpp" |
48 | |
49 | namespace spv { |
50 | #include "GLSL.std.450.h" |
51 | #include "GLSL.ext.KHR.h" |
52 | #include "GLSL.ext.EXT.h" |
53 | #include "GLSL.ext.AMD.h" |
54 | #include "GLSL.ext.NV.h" |
55 | #include "GLSL.ext.ARM.h" |
56 | #include "GLSL.ext.QCOM.h" |
57 | } |
58 | |
59 | namespace spv { |
60 | |
61 | // Hook to visit each operand type and result type of an instruction. |
62 | // Will be called multiple times for one instruction, once for each typed |
63 | // operand and the result. |
64 | void Builder::postProcessType(const Instruction& inst, Id typeId) |
65 | { |
66 | // Characterize the type being questioned |
67 | Id basicTypeOp = getMostBasicTypeClass(typeId); |
68 | int width = 0; |
69 | if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt) |
70 | width = getScalarTypeWidth(typeId); |
71 | |
72 | // Do opcode-specific checks |
73 | switch (inst.getOpCode()) { |
74 | case OpLoad: |
75 | case OpStore: |
76 | if (basicTypeOp == OpTypeStruct) { |
77 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) |
78 | addCapability(cap: CapabilityInt8); |
79 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
80 | addCapability(cap: CapabilityInt16); |
81 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
82 | addCapability(cap: CapabilityFloat16); |
83 | } else { |
84 | StorageClass storageClass = getStorageClass(resultId: inst.getIdOperand(op: 0)); |
85 | if (width == 8) { |
86 | switch (storageClass) { |
87 | case StorageClassPhysicalStorageBufferEXT: |
88 | case StorageClassUniform: |
89 | case StorageClassStorageBuffer: |
90 | case StorageClassPushConstant: |
91 | break; |
92 | default: |
93 | addCapability(cap: CapabilityInt8); |
94 | break; |
95 | } |
96 | } else if (width == 16) { |
97 | switch (storageClass) { |
98 | case StorageClassPhysicalStorageBufferEXT: |
99 | case StorageClassUniform: |
100 | case StorageClassStorageBuffer: |
101 | case StorageClassPushConstant: |
102 | case StorageClassInput: |
103 | case StorageClassOutput: |
104 | break; |
105 | default: |
106 | if (basicTypeOp == OpTypeInt) |
107 | addCapability(cap: CapabilityInt16); |
108 | if (basicTypeOp == OpTypeFloat) |
109 | addCapability(cap: CapabilityFloat16); |
110 | break; |
111 | } |
112 | } |
113 | } |
114 | break; |
115 | case OpCopyObject: |
116 | break; |
117 | case OpFConvert: |
118 | case OpSConvert: |
119 | case OpUConvert: |
120 | // Look for any 8/16-bit storage capabilities. If there are none, assume that |
121 | // the convert instruction requires the Float16/Int8/16 capability. |
122 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16) || containsType(typeId, typeOp: OpTypeInt, width: 16)) { |
123 | bool foundStorage = false; |
124 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
125 | spv::Capability cap = *it; |
126 | if (cap == spv::CapabilityStorageInputOutput16 || |
127 | cap == spv::CapabilityStoragePushConstant16 || |
128 | cap == spv::CapabilityStorageUniformBufferBlock16 || |
129 | cap == spv::CapabilityStorageUniform16) { |
130 | foundStorage = true; |
131 | break; |
132 | } |
133 | } |
134 | if (!foundStorage) { |
135 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
136 | addCapability(cap: CapabilityFloat16); |
137 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
138 | addCapability(cap: CapabilityInt16); |
139 | } |
140 | } |
141 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) { |
142 | bool foundStorage = false; |
143 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
144 | spv::Capability cap = *it; |
145 | if (cap == spv::CapabilityStoragePushConstant8 || |
146 | cap == spv::CapabilityUniformAndStorageBuffer8BitAccess || |
147 | cap == spv::CapabilityStorageBuffer8BitAccess) { |
148 | foundStorage = true; |
149 | break; |
150 | } |
151 | } |
152 | if (!foundStorage) { |
153 | addCapability(cap: CapabilityInt8); |
154 | } |
155 | } |
156 | break; |
157 | case OpExtInst: |
158 | switch (inst.getImmediateOperand(op: 1)) { |
159 | case GLSLstd450Frexp: |
160 | case GLSLstd450FrexpStruct: |
161 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeInt, width: 16)) |
162 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_int16); |
163 | break; |
164 | case GLSLstd450InterpolateAtCentroid: |
165 | case GLSLstd450InterpolateAtSample: |
166 | case GLSLstd450InterpolateAtOffset: |
167 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
168 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_half_float); |
169 | break; |
170 | default: |
171 | break; |
172 | } |
173 | break; |
174 | case OpAccessChain: |
175 | case OpPtrAccessChain: |
176 | if (isPointerType(typeId)) |
177 | break; |
178 | if (basicTypeOp == OpTypeInt) { |
179 | if (width == 16) |
180 | addCapability(cap: CapabilityInt16); |
181 | else if (width == 8) |
182 | addCapability(cap: CapabilityInt8); |
183 | } |
184 | break; |
185 | default: |
186 | if (basicTypeOp == OpTypeInt) { |
187 | if (width == 16) |
188 | addCapability(cap: CapabilityInt16); |
189 | else if (width == 8) |
190 | addCapability(cap: CapabilityInt8); |
191 | else if (width == 64) |
192 | addCapability(cap: CapabilityInt64); |
193 | } else if (basicTypeOp == OpTypeFloat) { |
194 | if (width == 16) |
195 | addCapability(cap: CapabilityFloat16); |
196 | else if (width == 64) |
197 | addCapability(cap: CapabilityFloat64); |
198 | } |
199 | break; |
200 | } |
201 | } |
202 | |
203 | // Called for each instruction that resides in a block. |
204 | void Builder::postProcess(Instruction& inst) |
205 | { |
206 | // Add capabilities based simply on the opcode. |
207 | switch (inst.getOpCode()) { |
208 | case OpExtInst: |
209 | switch (inst.getImmediateOperand(op: 1)) { |
210 | case GLSLstd450InterpolateAtCentroid: |
211 | case GLSLstd450InterpolateAtSample: |
212 | case GLSLstd450InterpolateAtOffset: |
213 | addCapability(cap: CapabilityInterpolationFunction); |
214 | break; |
215 | default: |
216 | break; |
217 | } |
218 | break; |
219 | case OpDPdxFine: |
220 | case OpDPdyFine: |
221 | case OpFwidthFine: |
222 | case OpDPdxCoarse: |
223 | case OpDPdyCoarse: |
224 | case OpFwidthCoarse: |
225 | addCapability(cap: CapabilityDerivativeControl); |
226 | break; |
227 | |
228 | case OpImageQueryLod: |
229 | case OpImageQuerySize: |
230 | case OpImageQuerySizeLod: |
231 | case OpImageQuerySamples: |
232 | case OpImageQueryLevels: |
233 | addCapability(cap: CapabilityImageQuery); |
234 | break; |
235 | |
236 | case OpGroupNonUniformPartitionNV: |
237 | addExtension(ext: E_SPV_NV_shader_subgroup_partitioned); |
238 | addCapability(cap: CapabilityGroupNonUniformPartitionedNV); |
239 | break; |
240 | |
241 | case OpLoad: |
242 | case OpStore: |
243 | { |
244 | // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain |
245 | // index list to compute the misalignment. The pre-existing alignment value |
246 | // (set via Builder::AccessChain::alignment) only accounts for the base of |
247 | // the reference type and any scalar component selection in the accesschain, |
248 | // and this function computes the rest from the SPIR-V Offset decorations. |
249 | Instruction *accessChain = module.getInstruction(id: inst.getIdOperand(op: 0)); |
250 | if (accessChain->getOpCode() == OpAccessChain) { |
251 | Instruction *base = module.getInstruction(id: accessChain->getIdOperand(op: 0)); |
252 | // Get the type of the base of the access chain. It must be a pointer type. |
253 | Id typeId = base->getTypeId(); |
254 | Instruction *type = module.getInstruction(id: typeId); |
255 | assert(type->getOpCode() == OpTypePointer); |
256 | if (type->getImmediateOperand(op: 0) != StorageClassPhysicalStorageBufferEXT) { |
257 | break; |
258 | } |
259 | // Get the pointee type. |
260 | typeId = type->getIdOperand(op: 1); |
261 | type = module.getInstruction(id: typeId); |
262 | // Walk the index list for the access chain. For each index, find any |
263 | // misalignment that can apply when accessing the member/element via |
264 | // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all |
265 | // together. |
266 | int alignment = 0; |
267 | for (int i = 1; i < accessChain->getNumOperands(); ++i) { |
268 | Instruction *idx = module.getInstruction(id: accessChain->getIdOperand(op: i)); |
269 | if (type->getOpCode() == OpTypeStruct) { |
270 | assert(idx->getOpCode() == OpConstant); |
271 | unsigned int c = idx->getImmediateOperand(op: 0); |
272 | |
273 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
274 | if (decoration.get()->getOpCode() == OpMemberDecorate && |
275 | decoration.get()->getIdOperand(op: 0) == typeId && |
276 | decoration.get()->getImmediateOperand(op: 1) == c && |
277 | (decoration.get()->getImmediateOperand(op: 2) == DecorationOffset || |
278 | decoration.get()->getImmediateOperand(op: 2) == DecorationMatrixStride)) { |
279 | alignment |= decoration.get()->getImmediateOperand(op: 3); |
280 | } |
281 | }; |
282 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
283 | // get the next member type |
284 | typeId = type->getIdOperand(op: c); |
285 | type = module.getInstruction(id: typeId); |
286 | } else if (type->getOpCode() == OpTypeArray || |
287 | type->getOpCode() == OpTypeRuntimeArray) { |
288 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
289 | if (decoration.get()->getOpCode() == OpDecorate && |
290 | decoration.get()->getIdOperand(op: 0) == typeId && |
291 | decoration.get()->getImmediateOperand(op: 1) == DecorationArrayStride) { |
292 | alignment |= decoration.get()->getImmediateOperand(op: 2); |
293 | } |
294 | }; |
295 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
296 | // Get the element type |
297 | typeId = type->getIdOperand(op: 0); |
298 | type = module.getInstruction(id: typeId); |
299 | } else { |
300 | // Once we get to any non-aggregate type, we're done. |
301 | break; |
302 | } |
303 | } |
304 | assert(inst.getNumOperands() >= 3); |
305 | unsigned int memoryAccess = inst.getImmediateOperand(op: (inst.getOpCode() == OpStore) ? 2 : 1); |
306 | assert(memoryAccess & MemoryAccessAlignedMask); |
307 | static_cast<void>(memoryAccess); |
308 | // Compute the index of the alignment operand. |
309 | int alignmentIdx = 2; |
310 | if (inst.getOpCode() == OpStore) |
311 | alignmentIdx++; |
312 | // Merge new and old (mis)alignment |
313 | alignment |= inst.getImmediateOperand(op: alignmentIdx); |
314 | // Pick the LSB |
315 | alignment = alignment & ~(alignment & (alignment-1)); |
316 | // update the Aligned operand |
317 | inst.setImmediateOperand(idx: alignmentIdx, immediate: alignment); |
318 | } |
319 | break; |
320 | } |
321 | |
322 | default: |
323 | break; |
324 | } |
325 | |
326 | // Checks based on type |
327 | if (inst.getTypeId() != NoType) |
328 | postProcessType(inst, typeId: inst.getTypeId()); |
329 | for (int op = 0; op < inst.getNumOperands(); ++op) { |
330 | if (inst.isIdOperand(op)) { |
331 | // In blocks, these are always result ids, but we are relying on |
332 | // getTypeId() to return NoType for things like OpLabel. |
333 | if (getTypeId(resultId: inst.getIdOperand(op)) != NoType) |
334 | postProcessType(inst, typeId: getTypeId(resultId: inst.getIdOperand(op))); |
335 | } |
336 | } |
337 | } |
338 | |
339 | // comment in header |
340 | void Builder::postProcessCFG() |
341 | { |
342 | // reachableBlocks is the set of blockss reached via control flow, or which are |
343 | // unreachable continue targert or unreachable merge. |
344 | std::unordered_set<const Block*> reachableBlocks; |
345 | std::unordered_map<Block*, Block*> ; |
346 | std::unordered_set<Block*> unreachableMerges; |
347 | std::unordered_set<Id> unreachableDefinitions; |
348 | // Collect IDs defined in unreachable blocks. For each function, label the |
349 | // reachable blocks first. Then for each unreachable block, collect the |
350 | // result IDs of the instructions in it. |
351 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
352 | Function* f = *fi; |
353 | Block* entry = f->getEntryBlock(); |
354 | inReadableOrder(root: entry, |
355 | callback: [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue] |
356 | (Block* b, ReachReason why, Block* ) { |
357 | reachableBlocks.insert(x: b); |
358 | if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header; |
359 | if (why == ReachDeadMerge) unreachableMerges.insert(x: b); |
360 | }); |
361 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
362 | Block* b = *bi; |
363 | if (unreachableMerges.count(x: b) != 0 || headerForUnreachableContinue.count(x: b) != 0) { |
364 | auto ii = b->getInstructions().cbegin(); |
365 | ++ii; // Keep potential decorations on the label. |
366 | for (; ii != b->getInstructions().cend(); ++ii) |
367 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
368 | } else if (reachableBlocks.count(x: b) == 0) { |
369 | // The normal case for unreachable code. All definitions are considered dead. |
370 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii) |
371 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
372 | } |
373 | } |
374 | } |
375 | |
376 | // Modify unreachable merge blocks and unreachable continue targets. |
377 | // Delete their contents. |
378 | for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) { |
379 | (*mergeIter)->rewriteAsCanonicalUnreachableMerge(); |
380 | } |
381 | for (auto continueIter = headerForUnreachableContinue.begin(); |
382 | continueIter != headerForUnreachableContinue.end(); |
383 | ++continueIter) { |
384 | Block* continue_target = continueIter->first; |
385 | Block* = continueIter->second; |
386 | continue_target->rewriteAsCanonicalUnreachableContinue(header); |
387 | } |
388 | |
389 | // Remove unneeded decorations, for unreachable instructions |
390 | decorations.erase(first: std::remove_if(first: decorations.begin(), last: decorations.end(), |
391 | pred: [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool { |
392 | Id decoration_id = I.get()->getIdOperand(op: 0); |
393 | return unreachableDefinitions.count(x: decoration_id) != 0; |
394 | }), |
395 | last: decorations.end()); |
396 | } |
397 | |
398 | // comment in header |
399 | void Builder::postProcessFeatures() { |
400 | // Add per-instruction capabilities, extensions, etc., |
401 | |
402 | // Look for any 8/16 bit type in physical storage buffer class, and set the |
403 | // appropriate capability. This happens in createSpvVariable for other storage |
404 | // classes, but there isn't always a variable for physical storage buffer. |
405 | for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { |
406 | Instruction* type = groupedTypes[OpTypePointer][t]; |
407 | if (type->getImmediateOperand(op: 0) == (unsigned)StorageClassPhysicalStorageBufferEXT) { |
408 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 8)) { |
409 | addIncorporatedExtension(ext: spv::E_SPV_KHR_8bit_storage, incorporatedVersion: spv::Spv_1_5); |
410 | addCapability(cap: spv::CapabilityStorageBuffer8BitAccess); |
411 | } |
412 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 16) || |
413 | containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeFloat, width: 16)) { |
414 | addIncorporatedExtension(ext: spv::E_SPV_KHR_16bit_storage, incorporatedVersion: spv::Spv_1_3); |
415 | addCapability(cap: spv::CapabilityStorageBuffer16BitAccess); |
416 | } |
417 | } |
418 | } |
419 | |
420 | // process all block-contained instructions |
421 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
422 | Function* f = *fi; |
423 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
424 | Block* b = *bi; |
425 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++) |
426 | postProcess(inst&: *ii->get()); |
427 | |
428 | // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether |
429 | // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the |
430 | // default. |
431 | for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) { |
432 | const Instruction& inst = *vi->get(); |
433 | Id resultId = inst.getResultId(); |
434 | if (containsPhysicalStorageBufferOrArray(typeId: getDerefTypeId(resultId))) { |
435 | bool foundDecoration = false; |
436 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
437 | if (decoration.get()->getIdOperand(op: 0) == resultId && |
438 | decoration.get()->getOpCode() == OpDecorate && |
439 | (decoration.get()->getImmediateOperand(op: 1) == spv::DecorationAliasedPointerEXT || |
440 | decoration.get()->getImmediateOperand(op: 1) == spv::DecorationRestrictPointerEXT)) { |
441 | foundDecoration = true; |
442 | } |
443 | }; |
444 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
445 | if (!foundDecoration) { |
446 | addDecoration(resultId, spv::DecorationAliasedPointerEXT); |
447 | } |
448 | } |
449 | } |
450 | } |
451 | } |
452 | |
453 | // If any Vulkan memory model-specific functionality is used, update the |
454 | // OpMemoryModel to match. |
455 | if (capabilities.find(x: spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) { |
456 | memoryModel = spv::MemoryModelVulkanKHR; |
457 | addIncorporatedExtension(ext: spv::E_SPV_KHR_vulkan_memory_model, incorporatedVersion: spv::Spv_1_5); |
458 | } |
459 | |
460 | // Add Aliased decoration if there's more than one Workgroup Block variable. |
461 | if (capabilities.find(x: spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) { |
462 | assert(entryPoints.size() == 1); |
463 | auto &ep = entryPoints[0]; |
464 | |
465 | std::vector<Id> workgroup_variables; |
466 | for (int i = 0; i < (int)ep->getNumOperands(); i++) { |
467 | if (!ep->isIdOperand(op: i)) |
468 | continue; |
469 | |
470 | const Id id = ep->getIdOperand(op: i); |
471 | const Instruction *instr = module.getInstruction(id); |
472 | if (instr->getOpCode() != spv::OpVariable) |
473 | continue; |
474 | |
475 | if (instr->getImmediateOperand(op: 0) == spv::StorageClassWorkgroup) |
476 | workgroup_variables.push_back(x: id); |
477 | } |
478 | |
479 | if (workgroup_variables.size() > 1) { |
480 | for (size_t i = 0; i < workgroup_variables.size(); i++) |
481 | addDecoration(workgroup_variables[i], spv::DecorationAliased); |
482 | } |
483 | } |
484 | } |
485 | |
486 | // comment in header |
487 | void Builder::postProcess(bool compileOnly) |
488 | { |
489 | // postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint |
490 | if (!compileOnly) |
491 | postProcessCFG(); |
492 | |
493 | postProcessFeatures(); |
494 | } |
495 | |
496 | }; // end spv namespace |
497 | |