| 1 | // |
| 2 | // Copyright (C) 2018 Google, Inc. |
| 3 | // |
| 4 | // All rights reserved. |
| 5 | // |
| 6 | // Redistribution and use in source and binary forms, with or without |
| 7 | // modification, are permitted provided that the following conditions |
| 8 | // are met: |
| 9 | // |
| 10 | // Redistributions of source code must retain the above copyright |
| 11 | // notice, this list of conditions and the following disclaimer. |
| 12 | // |
| 13 | // Redistributions in binary form must reproduce the above |
| 14 | // copyright notice, this list of conditions and the following |
| 15 | // disclaimer in the documentation and/or other materials provided |
| 16 | // with the distribution. |
| 17 | // |
| 18 | // Neither the name of 3Dlabs Inc. Ltd. nor the names of its |
| 19 | // contributors may be used to endorse or promote products derived |
| 20 | // from this software without specific prior written permission. |
| 21 | // |
| 22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 25 | // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 26 | // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 27 | // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 28 | // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 29 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 30 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 31 | // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| 32 | // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 33 | // POSSIBILITY OF SUCH DAMAGE. |
| 34 | |
| 35 | // |
| 36 | // Post-processing for SPIR-V IR, in internal form, not standard binary form. |
| 37 | // |
| 38 | |
| 39 | #include <cassert> |
| 40 | #include <cstdlib> |
| 41 | |
| 42 | #include <unordered_map> |
| 43 | #include <unordered_set> |
| 44 | #include <algorithm> |
| 45 | |
| 46 | #include "SpvBuilder.h" |
| 47 | #include "spirv.hpp" |
| 48 | |
| 49 | namespace spv { |
| 50 | #include "GLSL.std.450.h" |
| 51 | #include "GLSL.ext.KHR.h" |
| 52 | #include "GLSL.ext.EXT.h" |
| 53 | #include "GLSL.ext.AMD.h" |
| 54 | #include "GLSL.ext.NV.h" |
| 55 | #include "GLSL.ext.ARM.h" |
| 56 | #include "GLSL.ext.QCOM.h" |
| 57 | } |
| 58 | |
| 59 | namespace spv { |
| 60 | |
| 61 | // Hook to visit each operand type and result type of an instruction. |
| 62 | // Will be called multiple times for one instruction, once for each typed |
| 63 | // operand and the result. |
| 64 | void Builder::postProcessType(const Instruction& inst, Id typeId) |
| 65 | { |
| 66 | // Characterize the type being questioned |
| 67 | Id basicTypeOp = getMostBasicTypeClass(typeId); |
| 68 | int width = 0; |
| 69 | if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt) |
| 70 | width = getScalarTypeWidth(typeId); |
| 71 | |
| 72 | // Do opcode-specific checks |
| 73 | switch (inst.getOpCode()) { |
| 74 | case OpLoad: |
| 75 | case OpStore: |
| 76 | if (basicTypeOp == OpTypeStruct) { |
| 77 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) |
| 78 | addCapability(cap: CapabilityInt8); |
| 79 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
| 80 | addCapability(cap: CapabilityInt16); |
| 81 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
| 82 | addCapability(cap: CapabilityFloat16); |
| 83 | } else { |
| 84 | StorageClass storageClass = getStorageClass(resultId: inst.getIdOperand(op: 0)); |
| 85 | if (width == 8) { |
| 86 | switch (storageClass) { |
| 87 | case StorageClassPhysicalStorageBufferEXT: |
| 88 | case StorageClassUniform: |
| 89 | case StorageClassStorageBuffer: |
| 90 | case StorageClassPushConstant: |
| 91 | break; |
| 92 | default: |
| 93 | addCapability(cap: CapabilityInt8); |
| 94 | break; |
| 95 | } |
| 96 | } else if (width == 16) { |
| 97 | switch (storageClass) { |
| 98 | case StorageClassPhysicalStorageBufferEXT: |
| 99 | case StorageClassUniform: |
| 100 | case StorageClassStorageBuffer: |
| 101 | case StorageClassPushConstant: |
| 102 | case StorageClassInput: |
| 103 | case StorageClassOutput: |
| 104 | break; |
| 105 | default: |
| 106 | if (basicTypeOp == OpTypeInt) |
| 107 | addCapability(cap: CapabilityInt16); |
| 108 | if (basicTypeOp == OpTypeFloat) |
| 109 | addCapability(cap: CapabilityFloat16); |
| 110 | break; |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | break; |
| 115 | case OpCopyObject: |
| 116 | break; |
| 117 | case OpFConvert: |
| 118 | case OpSConvert: |
| 119 | case OpUConvert: |
| 120 | // Look for any 8/16-bit storage capabilities. If there are none, assume that |
| 121 | // the convert instruction requires the Float16/Int8/16 capability. |
| 122 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16) || containsType(typeId, typeOp: OpTypeInt, width: 16)) { |
| 123 | bool foundStorage = false; |
| 124 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
| 125 | spv::Capability cap = *it; |
| 126 | if (cap == spv::CapabilityStorageInputOutput16 || |
| 127 | cap == spv::CapabilityStoragePushConstant16 || |
| 128 | cap == spv::CapabilityStorageUniformBufferBlock16 || |
| 129 | cap == spv::CapabilityStorageUniform16) { |
| 130 | foundStorage = true; |
| 131 | break; |
| 132 | } |
| 133 | } |
| 134 | if (!foundStorage) { |
| 135 | if (containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
| 136 | addCapability(cap: CapabilityFloat16); |
| 137 | if (containsType(typeId, typeOp: OpTypeInt, width: 16)) |
| 138 | addCapability(cap: CapabilityInt16); |
| 139 | } |
| 140 | } |
| 141 | if (containsType(typeId, typeOp: OpTypeInt, width: 8)) { |
| 142 | bool foundStorage = false; |
| 143 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
| 144 | spv::Capability cap = *it; |
| 145 | if (cap == spv::CapabilityStoragePushConstant8 || |
| 146 | cap == spv::CapabilityUniformAndStorageBuffer8BitAccess || |
| 147 | cap == spv::CapabilityStorageBuffer8BitAccess) { |
| 148 | foundStorage = true; |
| 149 | break; |
| 150 | } |
| 151 | } |
| 152 | if (!foundStorage) { |
| 153 | addCapability(cap: CapabilityInt8); |
| 154 | } |
| 155 | } |
| 156 | break; |
| 157 | case OpExtInst: |
| 158 | switch (inst.getImmediateOperand(op: 1)) { |
| 159 | case GLSLstd450Frexp: |
| 160 | case GLSLstd450FrexpStruct: |
| 161 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeInt, width: 16)) |
| 162 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_int16); |
| 163 | break; |
| 164 | case GLSLstd450InterpolateAtCentroid: |
| 165 | case GLSLstd450InterpolateAtSample: |
| 166 | case GLSLstd450InterpolateAtOffset: |
| 167 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, typeOp: OpTypeFloat, width: 16)) |
| 168 | addExtension(ext: spv::E_SPV_AMD_gpu_shader_half_float); |
| 169 | break; |
| 170 | default: |
| 171 | break; |
| 172 | } |
| 173 | break; |
| 174 | case OpAccessChain: |
| 175 | case OpPtrAccessChain: |
| 176 | if (isPointerType(typeId)) |
| 177 | break; |
| 178 | if (basicTypeOp == OpTypeInt) { |
| 179 | if (width == 16) |
| 180 | addCapability(cap: CapabilityInt16); |
| 181 | else if (width == 8) |
| 182 | addCapability(cap: CapabilityInt8); |
| 183 | } |
| 184 | break; |
| 185 | default: |
| 186 | if (basicTypeOp == OpTypeInt) { |
| 187 | if (width == 16) |
| 188 | addCapability(cap: CapabilityInt16); |
| 189 | else if (width == 8) |
| 190 | addCapability(cap: CapabilityInt8); |
| 191 | else if (width == 64) |
| 192 | addCapability(cap: CapabilityInt64); |
| 193 | } else if (basicTypeOp == OpTypeFloat) { |
| 194 | if (width == 16) |
| 195 | addCapability(cap: CapabilityFloat16); |
| 196 | else if (width == 64) |
| 197 | addCapability(cap: CapabilityFloat64); |
| 198 | } |
| 199 | break; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | // Called for each instruction that resides in a block. |
| 204 | void Builder::postProcess(Instruction& inst) |
| 205 | { |
| 206 | // Add capabilities based simply on the opcode. |
| 207 | switch (inst.getOpCode()) { |
| 208 | case OpExtInst: |
| 209 | switch (inst.getImmediateOperand(op: 1)) { |
| 210 | case GLSLstd450InterpolateAtCentroid: |
| 211 | case GLSLstd450InterpolateAtSample: |
| 212 | case GLSLstd450InterpolateAtOffset: |
| 213 | addCapability(cap: CapabilityInterpolationFunction); |
| 214 | break; |
| 215 | default: |
| 216 | break; |
| 217 | } |
| 218 | break; |
| 219 | case OpDPdxFine: |
| 220 | case OpDPdyFine: |
| 221 | case OpFwidthFine: |
| 222 | case OpDPdxCoarse: |
| 223 | case OpDPdyCoarse: |
| 224 | case OpFwidthCoarse: |
| 225 | addCapability(cap: CapabilityDerivativeControl); |
| 226 | break; |
| 227 | |
| 228 | case OpImageQueryLod: |
| 229 | case OpImageQuerySize: |
| 230 | case OpImageQuerySizeLod: |
| 231 | case OpImageQuerySamples: |
| 232 | case OpImageQueryLevels: |
| 233 | addCapability(cap: CapabilityImageQuery); |
| 234 | break; |
| 235 | |
| 236 | case OpGroupNonUniformPartitionNV: |
| 237 | addExtension(ext: E_SPV_NV_shader_subgroup_partitioned); |
| 238 | addCapability(cap: CapabilityGroupNonUniformPartitionedNV); |
| 239 | break; |
| 240 | |
| 241 | case OpLoad: |
| 242 | case OpStore: |
| 243 | { |
| 244 | // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain |
| 245 | // index list to compute the misalignment. The pre-existing alignment value |
| 246 | // (set via Builder::AccessChain::alignment) only accounts for the base of |
| 247 | // the reference type and any scalar component selection in the accesschain, |
| 248 | // and this function computes the rest from the SPIR-V Offset decorations. |
| 249 | Instruction *accessChain = module.getInstruction(id: inst.getIdOperand(op: 0)); |
| 250 | if (accessChain->getOpCode() == OpAccessChain) { |
| 251 | Instruction *base = module.getInstruction(id: accessChain->getIdOperand(op: 0)); |
| 252 | // Get the type of the base of the access chain. It must be a pointer type. |
| 253 | Id typeId = base->getTypeId(); |
| 254 | Instruction *type = module.getInstruction(id: typeId); |
| 255 | assert(type->getOpCode() == OpTypePointer); |
| 256 | if (type->getImmediateOperand(op: 0) != StorageClassPhysicalStorageBufferEXT) { |
| 257 | break; |
| 258 | } |
| 259 | // Get the pointee type. |
| 260 | typeId = type->getIdOperand(op: 1); |
| 261 | type = module.getInstruction(id: typeId); |
| 262 | // Walk the index list for the access chain. For each index, find any |
| 263 | // misalignment that can apply when accessing the member/element via |
| 264 | // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all |
| 265 | // together. |
| 266 | int alignment = 0; |
| 267 | for (int i = 1; i < accessChain->getNumOperands(); ++i) { |
| 268 | Instruction *idx = module.getInstruction(id: accessChain->getIdOperand(op: i)); |
| 269 | if (type->getOpCode() == OpTypeStruct) { |
| 270 | assert(idx->getOpCode() == OpConstant); |
| 271 | unsigned int c = idx->getImmediateOperand(op: 0); |
| 272 | |
| 273 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
| 274 | if (decoration.get()->getOpCode() == OpMemberDecorate && |
| 275 | decoration.get()->getIdOperand(op: 0) == typeId && |
| 276 | decoration.get()->getImmediateOperand(op: 1) == c && |
| 277 | (decoration.get()->getImmediateOperand(op: 2) == DecorationOffset || |
| 278 | decoration.get()->getImmediateOperand(op: 2) == DecorationMatrixStride)) { |
| 279 | alignment |= decoration.get()->getImmediateOperand(op: 3); |
| 280 | } |
| 281 | }; |
| 282 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
| 283 | // get the next member type |
| 284 | typeId = type->getIdOperand(op: c); |
| 285 | type = module.getInstruction(id: typeId); |
| 286 | } else if (type->getOpCode() == OpTypeArray || |
| 287 | type->getOpCode() == OpTypeRuntimeArray) { |
| 288 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
| 289 | if (decoration.get()->getOpCode() == OpDecorate && |
| 290 | decoration.get()->getIdOperand(op: 0) == typeId && |
| 291 | decoration.get()->getImmediateOperand(op: 1) == DecorationArrayStride) { |
| 292 | alignment |= decoration.get()->getImmediateOperand(op: 2); |
| 293 | } |
| 294 | }; |
| 295 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
| 296 | // Get the element type |
| 297 | typeId = type->getIdOperand(op: 0); |
| 298 | type = module.getInstruction(id: typeId); |
| 299 | } else { |
| 300 | // Once we get to any non-aggregate type, we're done. |
| 301 | break; |
| 302 | } |
| 303 | } |
| 304 | assert(inst.getNumOperands() >= 3); |
| 305 | unsigned int memoryAccess = inst.getImmediateOperand(op: (inst.getOpCode() == OpStore) ? 2 : 1); |
| 306 | assert(memoryAccess & MemoryAccessAlignedMask); |
| 307 | static_cast<void>(memoryAccess); |
| 308 | // Compute the index of the alignment operand. |
| 309 | int alignmentIdx = 2; |
| 310 | if (inst.getOpCode() == OpStore) |
| 311 | alignmentIdx++; |
| 312 | // Merge new and old (mis)alignment |
| 313 | alignment |= inst.getImmediateOperand(op: alignmentIdx); |
| 314 | // Pick the LSB |
| 315 | alignment = alignment & ~(alignment & (alignment-1)); |
| 316 | // update the Aligned operand |
| 317 | inst.setImmediateOperand(idx: alignmentIdx, immediate: alignment); |
| 318 | } |
| 319 | break; |
| 320 | } |
| 321 | |
| 322 | default: |
| 323 | break; |
| 324 | } |
| 325 | |
| 326 | // Checks based on type |
| 327 | if (inst.getTypeId() != NoType) |
| 328 | postProcessType(inst, typeId: inst.getTypeId()); |
| 329 | for (int op = 0; op < inst.getNumOperands(); ++op) { |
| 330 | if (inst.isIdOperand(op)) { |
| 331 | // In blocks, these are always result ids, but we are relying on |
| 332 | // getTypeId() to return NoType for things like OpLabel. |
| 333 | if (getTypeId(resultId: inst.getIdOperand(op)) != NoType) |
| 334 | postProcessType(inst, typeId: getTypeId(resultId: inst.getIdOperand(op))); |
| 335 | } |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | // comment in header |
| 340 | void Builder::postProcessCFG() |
| 341 | { |
| 342 | // reachableBlocks is the set of blockss reached via control flow, or which are |
| 343 | // unreachable continue targert or unreachable merge. |
| 344 | std::unordered_set<const Block*> reachableBlocks; |
| 345 | std::unordered_map<Block*, Block*> ; |
| 346 | std::unordered_set<Block*> unreachableMerges; |
| 347 | std::unordered_set<Id> unreachableDefinitions; |
| 348 | // Collect IDs defined in unreachable blocks. For each function, label the |
| 349 | // reachable blocks first. Then for each unreachable block, collect the |
| 350 | // result IDs of the instructions in it. |
| 351 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
| 352 | Function* f = *fi; |
| 353 | Block* entry = f->getEntryBlock(); |
| 354 | inReadableOrder(root: entry, |
| 355 | callback: [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue] |
| 356 | (Block* b, ReachReason why, Block* ) { |
| 357 | reachableBlocks.insert(x: b); |
| 358 | if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header; |
| 359 | if (why == ReachDeadMerge) unreachableMerges.insert(x: b); |
| 360 | }); |
| 361 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
| 362 | Block* b = *bi; |
| 363 | if (unreachableMerges.count(x: b) != 0 || headerForUnreachableContinue.count(x: b) != 0) { |
| 364 | auto ii = b->getInstructions().cbegin(); |
| 365 | ++ii; // Keep potential decorations on the label. |
| 366 | for (; ii != b->getInstructions().cend(); ++ii) |
| 367 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
| 368 | } else if (reachableBlocks.count(x: b) == 0) { |
| 369 | // The normal case for unreachable code. All definitions are considered dead. |
| 370 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii) |
| 371 | unreachableDefinitions.insert(x: ii->get()->getResultId()); |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | // Modify unreachable merge blocks and unreachable continue targets. |
| 377 | // Delete their contents. |
| 378 | for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) { |
| 379 | (*mergeIter)->rewriteAsCanonicalUnreachableMerge(); |
| 380 | } |
| 381 | for (auto continueIter = headerForUnreachableContinue.begin(); |
| 382 | continueIter != headerForUnreachableContinue.end(); |
| 383 | ++continueIter) { |
| 384 | Block* continue_target = continueIter->first; |
| 385 | Block* = continueIter->second; |
| 386 | continue_target->rewriteAsCanonicalUnreachableContinue(header); |
| 387 | } |
| 388 | |
| 389 | // Remove unneeded decorations, for unreachable instructions |
| 390 | for (auto decorationIter = decorations.begin(); decorationIter != decorations.end();) { |
| 391 | Id decorationId = (*decorationIter)->getIdOperand(op: 0); |
| 392 | if (unreachableDefinitions.count(x: decorationId) != 0) { |
| 393 | decorationIter = decorations.erase(position: decorationIter); |
| 394 | } else { |
| 395 | ++decorationIter; |
| 396 | } |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | // comment in header |
| 401 | void Builder::postProcessFeatures() { |
| 402 | // Add per-instruction capabilities, extensions, etc., |
| 403 | |
| 404 | // Look for any 8/16 bit type in physical storage buffer class, and set the |
| 405 | // appropriate capability. This happens in createSpvVariable for other storage |
| 406 | // classes, but there isn't always a variable for physical storage buffer. |
| 407 | for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { |
| 408 | Instruction* type = groupedTypes[OpTypePointer][t]; |
| 409 | if (type->getImmediateOperand(op: 0) == (unsigned)StorageClassPhysicalStorageBufferEXT) { |
| 410 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 8)) { |
| 411 | addIncorporatedExtension(ext: spv::E_SPV_KHR_8bit_storage, incorporatedVersion: spv::Spv_1_5); |
| 412 | addCapability(cap: spv::CapabilityStorageBuffer8BitAccess); |
| 413 | } |
| 414 | if (containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeInt, width: 16) || |
| 415 | containsType(typeId: type->getIdOperand(op: 1), typeOp: OpTypeFloat, width: 16)) { |
| 416 | addIncorporatedExtension(ext: spv::E_SPV_KHR_16bit_storage, incorporatedVersion: spv::Spv_1_3); |
| 417 | addCapability(cap: spv::CapabilityStorageBuffer16BitAccess); |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | // process all block-contained instructions |
| 423 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
| 424 | Function* f = *fi; |
| 425 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
| 426 | Block* b = *bi; |
| 427 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++) |
| 428 | postProcess(inst&: *ii->get()); |
| 429 | |
| 430 | // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether |
| 431 | // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the |
| 432 | // default. |
| 433 | for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) { |
| 434 | const Instruction& inst = *vi->get(); |
| 435 | Id resultId = inst.getResultId(); |
| 436 | if (containsPhysicalStorageBufferOrArray(typeId: getDerefTypeId(resultId))) { |
| 437 | bool foundDecoration = false; |
| 438 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
| 439 | if (decoration.get()->getIdOperand(op: 0) == resultId && |
| 440 | decoration.get()->getOpCode() == OpDecorate && |
| 441 | (decoration.get()->getImmediateOperand(op: 1) == spv::DecorationAliasedPointerEXT || |
| 442 | decoration.get()->getImmediateOperand(op: 1) == spv::DecorationRestrictPointerEXT)) { |
| 443 | foundDecoration = true; |
| 444 | } |
| 445 | }; |
| 446 | std::for_each(first: decorations.begin(), last: decorations.end(), f: function); |
| 447 | if (!foundDecoration) { |
| 448 | addDecoration(resultId, spv::DecorationAliasedPointerEXT); |
| 449 | } |
| 450 | } |
| 451 | } |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | // If any Vulkan memory model-specific functionality is used, update the |
| 456 | // OpMemoryModel to match. |
| 457 | if (capabilities.find(x: spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) { |
| 458 | memoryModel = spv::MemoryModelVulkanKHR; |
| 459 | addIncorporatedExtension(ext: spv::E_SPV_KHR_vulkan_memory_model, incorporatedVersion: spv::Spv_1_5); |
| 460 | } |
| 461 | |
| 462 | // Add Aliased decoration if there's more than one Workgroup Block variable. |
| 463 | if (capabilities.find(x: spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) { |
| 464 | assert(entryPoints.size() == 1); |
| 465 | auto &ep = entryPoints[0]; |
| 466 | |
| 467 | std::vector<Id> workgroup_variables; |
| 468 | for (int i = 0; i < (int)ep->getNumOperands(); i++) { |
| 469 | if (!ep->isIdOperand(op: i)) |
| 470 | continue; |
| 471 | |
| 472 | const Id id = ep->getIdOperand(op: i); |
| 473 | const Instruction *instr = module.getInstruction(id); |
| 474 | if (instr->getOpCode() != spv::OpVariable) |
| 475 | continue; |
| 476 | |
| 477 | if (instr->getImmediateOperand(op: 0) == spv::StorageClassWorkgroup) |
| 478 | workgroup_variables.push_back(x: id); |
| 479 | } |
| 480 | |
| 481 | if (workgroup_variables.size() > 1) { |
| 482 | for (size_t i = 0; i < workgroup_variables.size(); i++) |
| 483 | addDecoration(workgroup_variables[i], spv::DecorationAliased); |
| 484 | } |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | // SPIR-V requires that any instruction consuming the result of an OpSampledImage |
| 489 | // be in the same block as the OpSampledImage instruction. This pass goes finds |
| 490 | // uses of OpSampledImage where that is not the case and duplicates the |
| 491 | // OpSampledImage to be immediately before the instruction that consumes it. |
| 492 | // The old OpSampledImage is left in place, potentially with no users. |
| 493 | void Builder::postProcessSamplers() |
| 494 | { |
| 495 | // first, find all OpSampledImage instructions and store them in a map. |
| 496 | std::map<Id, Instruction*> sampledImageInstrs; |
| 497 | for (auto f: module.getFunctions()) { |
| 498 | for (auto b: f->getBlocks()) { |
| 499 | for (auto &i: b->getInstructions()) { |
| 500 | if (i->getOpCode() == spv::OpSampledImage) { |
| 501 | sampledImageInstrs[i->getResultId()] = i.get(); |
| 502 | } |
| 503 | } |
| 504 | } |
| 505 | } |
| 506 | // next find all uses of the given ids and rewrite them if needed. |
| 507 | for (auto f: module.getFunctions()) { |
| 508 | for (auto b: f->getBlocks()) { |
| 509 | auto &instrs = b->getInstructions(); |
| 510 | for (size_t idx = 0; idx < instrs.size(); idx++) { |
| 511 | Instruction *i = instrs[idx].get(); |
| 512 | for (int opnum = 0; opnum < i->getNumOperands(); opnum++) { |
| 513 | // Is this operand of the current instruction the result of an OpSampledImage? |
| 514 | if (i->isIdOperand(op: opnum) && |
| 515 | sampledImageInstrs.count(x: i->getIdOperand(op: opnum))) |
| 516 | { |
| 517 | Instruction *opSampImg = sampledImageInstrs[i->getIdOperand(op: opnum)]; |
| 518 | if (i->getBlock() != opSampImg->getBlock()) { |
| 519 | Instruction *newInstr = new Instruction(getUniqueId(), |
| 520 | opSampImg->getTypeId(), |
| 521 | spv::OpSampledImage); |
| 522 | newInstr->addIdOperand(id: opSampImg->getIdOperand(op: 0)); |
| 523 | newInstr->addIdOperand(id: opSampImg->getIdOperand(op: 1)); |
| 524 | newInstr->setBlock(b); |
| 525 | |
| 526 | // rewrite the user of the OpSampledImage to use the new instruction. |
| 527 | i->setIdOperand(idx: opnum, id: newInstr->getResultId()); |
| 528 | // insert the new OpSampledImage right before the current instruction. |
| 529 | instrs.insert(position: instrs.begin() + idx, |
| 530 | x: std::unique_ptr<Instruction>(newInstr)); |
| 531 | idx++; |
| 532 | } |
| 533 | } |
| 534 | } |
| 535 | } |
| 536 | } |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | // comment in header |
| 541 | void Builder::postProcess(bool compileOnly) |
| 542 | { |
| 543 | // postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint |
| 544 | if (!compileOnly) |
| 545 | postProcessCFG(); |
| 546 | |
| 547 | postProcessFeatures(); |
| 548 | postProcessSamplers(); |
| 549 | } |
| 550 | |
| 551 | } // end spv namespace |
| 552 | |