1 | // |
2 | // Copyright (C) 2018 Google, Inc. |
3 | // |
4 | // All rights reserved. |
5 | // |
6 | // Redistribution and use in source and binary forms, with or without |
7 | // modification, are permitted provided that the following conditions |
8 | // are met: |
9 | // |
10 | // Redistributions of source code must retain the above copyright |
11 | // notice, this list of conditions and the following disclaimer. |
12 | // |
13 | // Redistributions in binary form must reproduce the above |
14 | // copyright notice, this list of conditions and the following |
15 | // disclaimer in the documentation and/or other materials provided |
16 | // with the distribution. |
17 | // |
18 | // Neither the name of 3Dlabs Inc. Ltd. nor the names of its |
19 | // contributors may be used to endorse or promote products derived |
20 | // from this software without specific prior written permission. |
21 | // |
22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
25 | // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
26 | // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
27 | // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
28 | // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
29 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
30 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
32 | // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | // POSSIBILITY OF SUCH DAMAGE. |
34 | |
35 | // |
36 | // Post-processing for SPIR-V IR, in internal form, not standard binary form. |
37 | // |
38 | |
39 | #include <cassert> |
40 | #include <cstdlib> |
41 | |
42 | #include <unordered_map> |
43 | #include <unordered_set> |
44 | #include <algorithm> |
45 | |
46 | #include "SpvBuilder.h" |
47 | #include "spirv.hpp" |
48 | |
49 | namespace spv { |
50 | #include "GLSL.std.450.h" |
51 | #include "GLSL.ext.KHR.h" |
52 | #include "GLSL.ext.EXT.h" |
53 | #include "GLSL.ext.AMD.h" |
54 | #include "GLSL.ext.NV.h" |
55 | } |
56 | |
57 | namespace spv { |
58 | |
59 | #ifndef GLSLANG_WEB |
60 | // Hook to visit each operand type and result type of an instruction. |
61 | // Will be called multiple times for one instruction, once for each typed |
62 | // operand and the result. |
63 | void Builder::postProcessType(const Instruction& inst, Id typeId) |
64 | { |
65 | // Characterize the type being questioned |
66 | Id basicTypeOp = getMostBasicTypeClass(typeId); |
67 | int width = 0; |
68 | if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt) |
69 | width = getScalarTypeWidth(typeId); |
70 | |
71 | // Do opcode-specific checks |
72 | switch (inst.getOpCode()) { |
73 | case OpLoad: |
74 | case OpStore: |
75 | if (basicTypeOp == OpTypeStruct) { |
76 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) |
77 | addCapability(cap: CapabilityInt8); |
78 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
79 | addCapability(cap: CapabilityInt16); |
80 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
81 | addCapability(cap: CapabilityFloat16); |
82 | } else { |
83 | StorageClass storageClass = getStorageClass(resultId: inst.getIdOperand(op: 0)); |
84 | if (width == 8) { |
85 | switch (storageClass) { |
86 | case StorageClassPhysicalStorageBufferEXT: |
87 | case StorageClassUniform: |
88 | case StorageClassStorageBuffer: |
89 | case StorageClassPushConstant: |
90 | break; |
91 | default: |
92 | addCapability(cap: CapabilityInt8); |
93 | break; |
94 | } |
95 | } else if (width == 16) { |
96 | switch (storageClass) { |
97 | case StorageClassPhysicalStorageBufferEXT: |
98 | case StorageClassUniform: |
99 | case StorageClassStorageBuffer: |
100 | case StorageClassPushConstant: |
101 | case StorageClassInput: |
102 | case StorageClassOutput: |
103 | break; |
104 | default: |
105 | if (basicTypeOp == OpTypeInt) |
106 | addCapability(cap: CapabilityInt16); |
107 | if (basicTypeOp == OpTypeFloat) |
108 | addCapability(cap: CapabilityFloat16); |
109 | break; |
110 | } |
111 | } |
112 | } |
113 | break; |
114 | case OpCopyObject: |
115 | break; |
116 | case OpFConvert: |
117 | case OpSConvert: |
118 | case OpUConvert: |
119 | // Look for any 8/16-bit storage capabilities. If there are none, assume that |
120 | // the convert instruction requires the Float16/Int8/16 capability. |
121 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16) || containsType(typeId, typeOp: OpTypeInt, width: 16)) { |
122 | bool foundStorage = false; |
123 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
124 | spv::Capability cap = *it; |
125 | if (cap == spv::CapabilityStorageInputOutput16 || |
126 | cap == spv::CapabilityStoragePushConstant16 || |
127 | cap == spv::CapabilityStorageUniformBufferBlock16 || |
128 | cap == spv::CapabilityStorageUniform16) { |
129 | foundStorage = true; |
130 | break; |
131 | } |
132 | } |
133 | if (!foundStorage) { |
134 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
135 | addCapability(cap: CapabilityFloat16); |
136 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
137 | addCapability(cap: CapabilityInt16); |
138 | } |
139 | } |
140 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) { |
141 | bool foundStorage = false; |
142 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
143 | spv::Capability cap = *it; |
144 | if (cap == spv::CapabilityStoragePushConstant8 || |
145 | cap == spv::CapabilityUniformAndStorageBuffer8BitAccess || |
146 | cap == spv::CapabilityStorageBuffer8BitAccess) { |
147 | foundStorage = true; |
148 | break; |
149 | } |
150 | } |
151 | if (!foundStorage) { |
152 | addCapability(cap: CapabilityInt8); |
153 | } |
154 | } |
155 | break; |
156 | case OpExtInst: |
157 | switch (inst.getImmediateOperand(op: 1)) { |
158 | case GLSLstd450Frexp: |
159 | case GLSLstd450FrexpStruct: |
160 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeInt, width: 16)) |
161 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_int16); |
162 | break; |
163 | case GLSLstd450InterpolateAtCentroid: |
164 | case GLSLstd450InterpolateAtSample: |
165 | case GLSLstd450InterpolateAtOffset: |
166 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
167 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_half_float); |
168 | break; |
169 | default: |
170 | break; |
171 | } |
172 | break; |
173 | case OpAccessChain: |
174 | case OpPtrAccessChain: |
175 | if (isPointerType(typeId)) |
176 | break; |
177 | if (basicTypeOp == OpTypeInt) { |
178 | if (width == 16) |
179 | addCapability(cap: CapabilityInt16); |
180 | else if (width == 8) |
181 | addCapability(cap: CapabilityInt8); |
182 | } |
183 | default: |
184 | if (basicTypeOp == OpTypeInt) { |
185 | if (width == 16) |
186 | addCapability(cap: CapabilityInt16); |
187 | else if (width == 8) |
188 | addCapability(cap: CapabilityInt8); |
189 | else if (width == 64) |
190 | addCapability(cap: CapabilityInt64); |
191 | } else if (basicTypeOp == OpTypeFloat) { |
192 | if (width == 16) |
193 | addCapability(cap: CapabilityFloat16); |
194 | else if (width == 64) |
195 | addCapability(cap: CapabilityFloat64); |
196 | } |
197 | break; |
198 | } |
199 | } |
200 | |
201 | // Called for each instruction that resides in a block. |
202 | void Builder::postProcess(Instruction& inst) |
203 | { |
204 | // Add capabilities based simply on the opcode. |
205 | switch (inst.getOpCode()) { |
206 | case OpExtInst: |
207 | switch (inst.getImmediateOperand(op: 1)) { |
208 | case GLSLstd450InterpolateAtCentroid: |
209 | case GLSLstd450InterpolateAtSample: |
210 | case GLSLstd450InterpolateAtOffset: |
211 | addCapability(cap: CapabilityInterpolationFunction); |
212 | break; |
213 | default: |
214 | break; |
215 | } |
216 | break; |
217 | case OpDPdxFine: |
218 | case OpDPdyFine: |
219 | case OpFwidthFine: |
220 | case OpDPdxCoarse: |
221 | case OpDPdyCoarse: |
222 | case OpFwidthCoarse: |
223 | addCapability(cap: CapabilityDerivativeControl); |
224 | break; |
225 | |
226 | case OpImageQueryLod: |
227 | case OpImageQuerySize: |
228 | case OpImageQuerySizeLod: |
229 | case OpImageQuerySamples: |
230 | case OpImageQueryLevels: |
231 | addCapability(cap: CapabilityImageQuery); |
232 | break; |
233 | |
234 | case OpGroupNonUniformPartitionNV: |
235 | addExtension(ext: E_SPV_NV_shader_subgroup_partitioned); |
236 | addCapability(cap: CapabilityGroupNonUniformPartitionedNV); |
237 | break; |
238 | |
239 | case OpLoad: |
240 | case OpStore: |
241 | { |
242 | // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain |
243 | // index list to compute the misalignment. The pre-existing alignment value |
244 | // (set via Builder::AccessChain::alignment) only accounts for the base of |
245 | // the reference type and any scalar component selection in the accesschain, |
246 | // and this function computes the rest from the SPIR-V Offset decorations. |
247 | Instruction *accessChain = module.getInstruction(id: inst.getIdOperand(op: 0)); |
248 | if (accessChain->getOpCode() == OpAccessChain) { |
249 | Instruction *base = module.getInstruction(id: accessChain->getIdOperand(op: 0)); |
250 | // Get the type of the base of the access chain. It must be a pointer type. |
251 | Id typeId = base->getTypeId(); |
252 | Instruction *type = module.getInstruction(id: typeId); |
253 | assert(type->getOpCode() == OpTypePointer); |
254 | if (type->getImmediateOperand(op: 0) != StorageClassPhysicalStorageBufferEXT) { |
255 | break; |
256 | } |
257 | // Get the pointee type. |
258 | typeId = type->getIdOperand(op: 1); |
259 | type = module.getInstruction(id: typeId); |
260 | // Walk the index list for the access chain. For each index, find any |
261 | // misalignment that can apply when accessing the member/element via |
262 | // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all |
263 | // together. |
264 | int alignment = 0; |
265 | for (int i = 1; i < accessChain->getNumOperands(); ++i) { |
266 | Instruction *idx = module.getInstruction(id: accessChain->getIdOperand(op: i)); |
267 | if (type->getOpCode() == OpTypeStruct) { |
268 | assert(idx->getOpCode() == OpConstant); |
269 | unsigned int c = idx->getImmediateOperand(op: 0); |
270 | |
271 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
272 | if (decoration.get()->getOpCode() == OpMemberDecorate && |
273 | decoration.get()->getIdOperand(op: 0) == typeId && |
274 | decoration.get()->getImmediateOperand(op: 1) == c && |
275 | (decoration.get()->getImmediateOperand(op: 2) == DecorationOffset || |
276 | decoration.get()->getImmediateOperand(op: 2) == DecorationMatrixStride)) { |
277 | alignment |= decoration.get()->getImmediateOperand(op: 3); |
278 | } |
279 | }; |
280 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
281 | // get the next member type |
282 | typeId = type->getIdOperand(op: c); |
283 | type = module.getInstruction(id: typeId); |
284 | } else if (type->getOpCode() == OpTypeArray || |
285 | type->getOpCode() == OpTypeRuntimeArray) { |
286 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
287 | if (decoration.get()->getOpCode() == OpDecorate && |
288 | decoration.get()->getIdOperand(op: 0) == typeId && |
289 | decoration.get()->getImmediateOperand(op: 1) == DecorationArrayStride) { |
290 | alignment |= decoration.get()->getImmediateOperand(op: 2); |
291 | } |
292 | }; |
293 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
294 | // Get the element type |
295 | typeId = type->getIdOperand(op: 0); |
296 | type = module.getInstruction(id: typeId); |
297 | } else { |
298 | // Once we get to any non-aggregate type, we're done. |
299 | break; |
300 | } |
301 | } |
302 | assert(inst.getNumOperands() >= 3); |
303 | unsigned int memoryAccess = inst.getImmediateOperand(op: (inst.getOpCode() == OpStore) ? 2 : 1); |
304 | assert(memoryAccess & MemoryAccessAlignedMask); |
305 | static_cast<void>(memoryAccess); |
306 | // Compute the index of the alignment operand. |
307 | int alignmentIdx = 2; |
308 | if (inst.getOpCode() == OpStore) |
309 | alignmentIdx++; |
310 | // Merge new and old (mis)alignment |
311 | alignment |= inst.getImmediateOperand(op: alignmentIdx); |
312 | // Pick the LSB |
313 | alignment = alignment & ~(alignment & (alignment-1)); |
314 | // update the Aligned operand |
315 | inst.setImmediateOperand(idx: alignmentIdx, immediate: alignment); |
316 | } |
317 | break; |
318 | } |
319 | |
320 | default: |
321 | break; |
322 | } |
323 | |
324 | // Checks based on type |
325 | if (inst.getTypeId() != NoType) |
326 | postProcessType(inst, typeId: inst.getTypeId()); |
327 | for (int op = 0; op < inst.getNumOperands(); ++op) { |
328 | if (inst.isIdOperand(op)) { |
329 | // In blocks, these are always result ids, but we are relying on |
330 | // getTypeId() to return NoType for things like OpLabel. |
331 | if (getTypeId(resultId: inst.getIdOperand(op)) != NoType) |
332 | postProcessType(inst, typeId: getTypeId(resultId: inst.getIdOperand(op))); |
333 | } |
334 | } |
335 | } |
336 | #endif |
337 | |
338 | // comment in header |
339 | void Builder::postProcessCFG() |
340 | { |
341 | // reachableBlocks is the set of blockss reached via control flow, or which are |
342 | // unreachable continue targert or unreachable merge. |
343 | std::unordered_set<const Block*> reachableBlocks; |
344 | std::unordered_map<Block*, Block*> ; |
345 | std::unordered_set<Block*> unreachableMerges; |
346 | std::unordered_set<Id> unreachableDefinitions; |
347 | // Collect IDs defined in unreachable blocks. For each function, label the |
348 | // reachable blocks first. Then for each unreachable block, collect the |
349 | // result IDs of the instructions in it. |
350 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
351 | Function* f = *fi; |
352 | Block* entry = f->getEntryBlock(); |
353 | inReadableOrder(root: entry, |
354 | callback: [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue] |
355 | (Block* b, ReachReason why, Block* ) { |
356 | reachableBlocks.insert(x: b); |
357 | if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header; |
358 | if (why == ReachDeadMerge) unreachableMerges.insert(x: b); |
359 | }); |
360 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
361 | Block* b = *bi; |
362 | if (unreachableMerges.count(x: b) != 0 || headerForUnreachableContinue.count(x: b) != 0) { |
363 | auto ii = b->getInstructions().cbegin(); |
364 | ++ii; // Keep potential decorations on the label. |
365 | for (; ii != b->getInstructions().cend(); ++ii) |
366 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
367 | } else if (reachableBlocks.count(x: b) == 0) { |
368 | // The normal case for unreachable code. All definitions are considered dead. |
369 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii) |
370 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
371 | } |
372 | } |
373 | } |
374 | |
375 | // Modify unreachable merge blocks and unreachable continue targets. |
376 | // Delete their contents. |
377 | for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) { |
378 | (*mergeIter)->rewriteAsCanonicalUnreachableMerge(); |
379 | } |
380 | for (auto continueIter = headerForUnreachableContinue.begin(); |
381 | continueIter != headerForUnreachableContinue.end(); |
382 | ++continueIter) { |
383 | Block* continue_target = continueIter->first; |
384 | Block* = continueIter->second; |
385 | continue_target->rewriteAsCanonicalUnreachableContinue(header); |
386 | } |
387 | |
388 | // Remove unneeded decorations, for unreachable instructions |
389 | decorations.erase(first: std::remove_if(first: decorations.begin(), last: decorations.end(), |
390 | pred: [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool { |
391 | Id decoration_id = I.get()->getIdOperand(op: 0); |
392 | return unreachableDefinitions.count(x: decoration_id) != 0; |
393 | }), |
394 | last: decorations.end()); |
395 | } |
396 | |
397 | #ifndef GLSLANG_WEB |
398 | // comment in header |
399 | void Builder::postProcessFeatures() { |
400 | // Add per-instruction capabilities, extensions, etc., |
401 | |
402 | // Look for any 8/16 bit type in physical storage buffer class, and set the |
403 | // appropriate capability. This happens in createSpvVariable for other storage |
404 | // classes, but there isn't always a variable for physical storage buffer. |
405 | for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { |
406 | Instruction* type = groupedTypes[OpTypePointer][t]; |
407 | if (type->getImmediateOperand(op: 0) == (unsigned)StorageClassPhysicalStorageBufferEXT) { |
408 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 8)) { |
409 | addIncorporatedExtension(ext: spv::E_SPV_KHR_8bit_storage, incorporatedVersion: spv::Spv_1_5); |
410 | addCapability(cap: spv::CapabilityStorageBuffer8BitAccess); |
411 | } |
412 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 16) || |
413 | containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeFloat, width: 16)) { |
414 | addIncorporatedExtension(ext: spv::E_SPV_KHR_16bit_storage, incorporatedVersion: spv::Spv_1_3); |
415 | addCapability(cap: spv::CapabilityStorageBuffer16BitAccess); |
416 | } |
417 | } |
418 | } |
419 | |
420 | // process all block-contained instructions |
421 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
422 | Function* f = *fi; |
423 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
424 | Block* b = *bi; |
425 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++) |
426 | postProcess(inst&: *ii->get()); |
427 | |
428 | // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether |
429 | // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the |
430 | // default. |
431 | for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) { |
432 | const Instruction& inst = *vi->get(); |
433 | Id resultId = inst.getResultId(); |
434 | if (containsPhysicalStorageBufferOrArray(typeId: getDerefTypeId(resultId))) { |
435 | bool foundDecoration = false; |
436 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
437 | if (decoration.get()->getIdOperand(op: 0) == resultId && |
438 | decoration.get()->getOpCode() == OpDecorate && |
439 | (decoration.get()->getImmediateOperand(op: 1) == spv::DecorationAliasedPointerEXT || |
440 | decoration.get()->getImmediateOperand(op: 1) == spv::DecorationRestrictPointerEXT)) { |
441 | foundDecoration = true; |
442 | } |
443 | }; |
444 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
445 | if (!foundDecoration) { |
446 | addDecoration(resultId, spv::DecorationAliasedPointerEXT); |
447 | } |
448 | } |
449 | } |
450 | } |
451 | } |
452 | |
453 | // If any Vulkan memory model-specific functionality is used, update the |
454 | // OpMemoryModel to match. |
455 | if (capabilities.find(x: spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) { |
456 | memoryModel = spv::MemoryModelVulkanKHR; |
457 | addIncorporatedExtension(ext: spv::E_SPV_KHR_vulkan_memory_model, incorporatedVersion: spv::Spv_1_5); |
458 | } |
459 | |
460 | // Add Aliased decoration if there's more than one Workgroup Block variable. |
461 | if (capabilities.find(x: spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) { |
462 | assert(entryPoints.size() == 1); |
463 | auto &ep = entryPoints[0]; |
464 | |
465 | std::vector<Id> workgroup_variables; |
466 | for (int i = 0; i < (int)ep->getNumOperands(); i++) { |
467 | if (!ep->isIdOperand(op: i)) |
468 | continue; |
469 | |
470 | const Id id = ep->getIdOperand(op: i); |
471 | const Instruction *instr = module.getInstruction(id); |
472 | if (instr->getOpCode() != spv::OpVariable) |
473 | continue; |
474 | |
475 | if (instr->getImmediateOperand(op: 0) == spv::StorageClassWorkgroup) |
476 | workgroup_variables.push_back(x: id); |
477 | } |
478 | |
479 | if (workgroup_variables.size() > 1) { |
480 | for (size_t i = 0; i < workgroup_variables.size(); i++) |
481 | addDecoration(workgroup_variables[i], spv::DecorationAliased); |
482 | } |
483 | } |
484 | } |
485 | #endif |
486 | |
487 | // comment in header |
488 | void Builder::postProcess() { |
489 | postProcessCFG(); |
490 | #ifndef GLSLANG_WEB |
491 | postProcessFeatures(); |
492 | #endif |
493 | } |
494 | |
495 | }; // end spv namespace |
496 | |