| 1 | /* |
| 2 | * Copyright 2016-2021 The Brenwill Workshop Ltd. |
| 3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | /* |
| 19 | * At your option, you may choose to accept this material under either: |
| 20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
| 21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
| 22 | */ |
| 23 | |
| 24 | #ifndef SPIRV_CROSS_MSL_HPP |
| 25 | #define SPIRV_CROSS_MSL_HPP |
| 26 | |
| 27 | #include "spirv_glsl.hpp" |
| 28 | #include <map> |
| 29 | #include <set> |
| 30 | #include <stddef.h> |
| 31 | #include <unordered_map> |
| 32 | #include <unordered_set> |
| 33 | |
| 34 | namespace SPIRV_CROSS_NAMESPACE |
| 35 | { |
| 36 | |
| 37 | // Indicates the format of a shader interface variable. Currently limited to specifying |
| 38 | // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or |
| 39 | // some other format. |
| 40 | enum MSLShaderVariableFormat |
| 41 | { |
| 42 | MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, |
| 43 | MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, |
| 44 | MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, |
| 45 | MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, |
| 46 | MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, |
| 47 | |
| 48 | // Deprecated aliases. |
| 49 | MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, |
| 50 | MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, |
| 51 | MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, |
| 52 | MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, |
| 53 | MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, |
| 54 | MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, |
| 55 | MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16, |
| 56 | MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32, |
| 57 | |
| 58 | MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff |
| 59 | }; |
| 60 | |
| 61 | // Indicates the rate at which a variable changes value, one of: per-vertex, |
| 62 | // per-primitive, or per-patch. |
| 63 | enum MSLShaderVariableRate |
| 64 | { |
| 65 | MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, |
| 66 | MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, |
| 67 | MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, |
| 68 | |
| 69 | MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, |
| 70 | }; |
| 71 | |
| 72 | // Defines MSL characteristics of a shader interface variable at a particular location. |
| 73 | // After compilation, it is possible to query whether or not this location was used. |
| 74 | // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, |
| 75 | // or behavior is undefined. |
| 76 | struct MSLShaderInterfaceVariable |
| 77 | { |
| 78 | uint32_t location = 0; |
| 79 | uint32_t component = 0; |
| 80 | MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER; |
| 81 | spv::BuiltIn builtin = spv::BuiltInMax; |
| 82 | uint32_t vecsize = 0; |
| 83 | MSLShaderVariableRate rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; |
| 84 | }; |
| 85 | |
| 86 | // Matches the binding index of a MSL resource for a binding within a descriptor set. |
| 87 | // Taken together, the stage, desc_set and binding combine to form a reference to a resource |
| 88 | // descriptor used in a particular shading stage. The count field indicates the number of |
| 89 | // resources consumed by this binding, if the binding represents an array of resources. |
| 90 | // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value |
| 91 | // will be used to declare the array size in MSL, which does not support run-time-sized arrays. |
| 92 | // If pad_argument_buffer_resources is enabled, the base_type and count values are used to |
| 93 | // specify the base type and array size of the resource in the argument buffer, if that resource |
| 94 | // is not defined and used by the shader. With pad_argument_buffer_resources enabled, this |
| 95 | // information will be used to pad the argument buffer structure, in order to align that |
| 96 | // structure consistently for all uses, across all shaders, of the descriptor set represented |
| 97 | // by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not |
| 98 | // need to be populated, and if the resource is also not a run-time sized array, the count |
| 99 | // field does not need to be populated. |
| 100 | // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, |
| 101 | // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we |
| 102 | // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. |
| 103 | // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will |
| 104 | // become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. |
| 105 | struct MSLResourceBinding |
| 106 | { |
| 107 | spv::ExecutionModel stage = spv::ExecutionModelMax; |
| 108 | SPIRType::BaseType basetype = SPIRType::Unknown; |
| 109 | uint32_t desc_set = 0; |
| 110 | uint32_t binding = 0; |
| 111 | uint32_t count = 0; |
| 112 | uint32_t msl_buffer = 0; |
| 113 | uint32_t msl_texture = 0; |
| 114 | uint32_t msl_sampler = 0; |
| 115 | }; |
| 116 | |
| 117 | enum MSLSamplerCoord |
| 118 | { |
| 119 | MSL_SAMPLER_COORD_NORMALIZED = 0, |
| 120 | MSL_SAMPLER_COORD_PIXEL = 1, |
| 121 | MSL_SAMPLER_INT_MAX = 0x7fffffff |
| 122 | }; |
| 123 | |
| 124 | enum MSLSamplerFilter |
| 125 | { |
| 126 | MSL_SAMPLER_FILTER_NEAREST = 0, |
| 127 | MSL_SAMPLER_FILTER_LINEAR = 1, |
| 128 | MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff |
| 129 | }; |
| 130 | |
| 131 | enum MSLSamplerMipFilter |
| 132 | { |
| 133 | MSL_SAMPLER_MIP_FILTER_NONE = 0, |
| 134 | MSL_SAMPLER_MIP_FILTER_NEAREST = 1, |
| 135 | MSL_SAMPLER_MIP_FILTER_LINEAR = 2, |
| 136 | MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff |
| 137 | }; |
| 138 | |
| 139 | enum MSLSamplerAddress |
| 140 | { |
| 141 | MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, |
| 142 | MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, |
| 143 | MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, |
| 144 | MSL_SAMPLER_ADDRESS_REPEAT = 3, |
| 145 | MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, |
| 146 | MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff |
| 147 | }; |
| 148 | |
| 149 | enum MSLSamplerCompareFunc |
| 150 | { |
| 151 | MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, |
| 152 | MSL_SAMPLER_COMPARE_FUNC_LESS = 1, |
| 153 | MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, |
| 154 | MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, |
| 155 | MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, |
| 156 | MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, |
| 157 | MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, |
| 158 | MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, |
| 159 | MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff |
| 160 | }; |
| 161 | |
| 162 | enum MSLSamplerBorderColor |
| 163 | { |
| 164 | MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, |
| 165 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, |
| 166 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, |
| 167 | MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff |
| 168 | }; |
| 169 | |
| 170 | enum MSLFormatResolution |
| 171 | { |
| 172 | MSL_FORMAT_RESOLUTION_444 = 0, |
| 173 | MSL_FORMAT_RESOLUTION_422, |
| 174 | MSL_FORMAT_RESOLUTION_420, |
| 175 | MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff |
| 176 | }; |
| 177 | |
| 178 | enum MSLChromaLocation |
| 179 | { |
| 180 | MSL_CHROMA_LOCATION_COSITED_EVEN = 0, |
| 181 | MSL_CHROMA_LOCATION_MIDPOINT, |
| 182 | MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff |
| 183 | }; |
| 184 | |
| 185 | enum MSLComponentSwizzle |
| 186 | { |
| 187 | MSL_COMPONENT_SWIZZLE_IDENTITY = 0, |
| 188 | MSL_COMPONENT_SWIZZLE_ZERO, |
| 189 | MSL_COMPONENT_SWIZZLE_ONE, |
| 190 | MSL_COMPONENT_SWIZZLE_R, |
| 191 | MSL_COMPONENT_SWIZZLE_G, |
| 192 | MSL_COMPONENT_SWIZZLE_B, |
| 193 | MSL_COMPONENT_SWIZZLE_A, |
| 194 | MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff |
| 195 | }; |
| 196 | |
| 197 | enum MSLSamplerYCbCrModelConversion |
| 198 | { |
| 199 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, |
| 200 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, |
| 201 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, |
| 202 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, |
| 203 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, |
| 204 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff |
| 205 | }; |
| 206 | |
| 207 | enum MSLSamplerYCbCrRange |
| 208 | { |
| 209 | MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, |
| 210 | MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, |
| 211 | MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff |
| 212 | }; |
| 213 | |
| 214 | struct MSLConstexprSampler |
| 215 | { |
| 216 | MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; |
| 217 | MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; |
| 218 | MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; |
| 219 | MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; |
| 220 | MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
| 221 | MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
| 222 | MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
| 223 | MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; |
| 224 | MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; |
| 225 | float lod_clamp_min = 0.0f; |
| 226 | float lod_clamp_max = 1000.0f; |
| 227 | int max_anisotropy = 1; |
| 228 | |
| 229 | // Sampler Y'CbCr conversion parameters |
| 230 | uint32_t planes = 0; |
| 231 | MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; |
| 232 | MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; |
| 233 | MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
| 234 | MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
| 235 | MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY |
| 236 | MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; |
| 237 | MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; |
| 238 | uint32_t bpc = 8; |
| 239 | |
| 240 | bool compare_enable = false; |
| 241 | bool lod_clamp_enable = false; |
| 242 | bool anisotropy_enable = false; |
| 243 | bool ycbcr_conversion_enable = false; |
| 244 | |
| 245 | MSLConstexprSampler() |
| 246 | { |
| 247 | for (uint32_t i = 0; i < 4; i++) |
| 248 | swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; |
| 249 | } |
| 250 | bool swizzle_is_identity() const |
| 251 | { |
| 252 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && |
| 253 | swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); |
| 254 | } |
| 255 | bool swizzle_has_one_or_zero() const |
| 256 | { |
| 257 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || |
| 258 | swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || |
| 259 | swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || |
| 260 | swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); |
| 261 | } |
| 262 | }; |
| 263 | |
| 264 | // Special constant used in a MSLResourceBinding desc_set |
| 265 | // element to indicate the bindings for the push constants. |
| 266 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
| 267 | static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; |
| 268 | |
| 269 | // Special constant used in a MSLResourceBinding binding |
| 270 | // element to indicate the bindings for the push constants. |
| 271 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
| 272 | static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; |
| 273 | |
| 274 | // Special constant used in a MSLResourceBinding binding |
| 275 | // element to indicate the buffer binding for swizzle buffers. |
| 276 | static const uint32_t kSwizzleBufferBinding = ~(1u); |
| 277 | |
| 278 | // Special constant used in a MSLResourceBinding binding |
| 279 | // element to indicate the buffer binding for buffer size buffers to support OpArrayLength. |
| 280 | static const uint32_t kBufferSizeBufferBinding = ~(2u); |
| 281 | |
| 282 | // Special constant used in a MSLResourceBinding binding |
| 283 | // element to indicate the buffer binding used for the argument buffer itself. |
| 284 | // This buffer binding should be kept as small as possible as all automatic bindings for buffers |
| 285 | // will start at max(kArgumentBufferBinding) + 1. |
| 286 | static const uint32_t kArgumentBufferBinding = ~(3u); |
| 287 | |
| 288 | static const uint32_t kMaxArgumentBuffers = 8; |
| 289 | |
| 290 | // Decompiles SPIR-V to Metal Shading Language |
| 291 | class CompilerMSL : public CompilerGLSL |
| 292 | { |
| 293 | public: |
| 294 | // Options for compiling to Metal Shading Language |
| 295 | struct Options |
| 296 | { |
| 297 | typedef enum |
| 298 | { |
| 299 | iOS = 0, |
| 300 | macOS = 1 |
| 301 | } Platform; |
| 302 | |
| 303 | Platform platform = macOS; |
| 304 | uint32_t msl_version = make_msl_version(major: 1, minor: 2); |
| 305 | uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers |
| 306 | uint32_t r32ui_linear_texture_alignment = 4; |
| 307 | uint32_t r32ui_alignment_constant_id = 65535; |
| 308 | uint32_t swizzle_buffer_index = 30; |
| 309 | uint32_t indirect_params_buffer_index = 29; |
| 310 | uint32_t shader_output_buffer_index = 28; |
| 311 | uint32_t shader_patch_output_buffer_index = 27; |
| 312 | uint32_t shader_tess_factor_buffer_index = 26; |
| 313 | uint32_t buffer_size_buffer_index = 25; |
| 314 | uint32_t view_mask_buffer_index = 24; |
| 315 | uint32_t dynamic_offsets_buffer_index = 23; |
| 316 | uint32_t shader_input_buffer_index = 22; |
| 317 | uint32_t shader_index_buffer_index = 21; |
| 318 | uint32_t shader_patch_input_buffer_index = 20; |
| 319 | uint32_t shader_input_wg_index = 0; |
| 320 | uint32_t device_index = 0; |
| 321 | uint32_t enable_frag_output_mask = 0xffffffff; |
| 322 | // Metal doesn't allow setting a fixed sample mask directly in the pipeline. |
| 323 | // We can evade this restriction by ANDing the internal sample_mask output |
| 324 | // of the shader with the additional fixed sample mask. |
| 325 | uint32_t additional_fixed_sample_mask = 0xffffffff; |
| 326 | bool enable_point_size_builtin = true; |
| 327 | bool enable_frag_depth_builtin = true; |
| 328 | bool enable_frag_stencil_ref_builtin = true; |
| 329 | bool disable_rasterization = false; |
| 330 | bool capture_output_to_buffer = false; |
| 331 | bool swizzle_texture_samples = false; |
| 332 | bool tess_domain_origin_lower_left = false; |
| 333 | bool multiview = false; |
| 334 | bool multiview_layered_rendering = true; |
| 335 | bool view_index_from_device_index = false; |
| 336 | bool dispatch_base = false; |
| 337 | bool texture_1D_as_2D = false; |
| 338 | |
| 339 | // Enable use of Metal argument buffers. |
| 340 | // MSL 2.0 must also be enabled. |
| 341 | bool argument_buffers = false; |
| 342 | |
| 343 | // Defines Metal argument buffer tier levels. |
| 344 | // Uses same values as Metal MTLArgumentBuffersTier enumeration. |
| 345 | enum class ArgumentBuffersTier |
| 346 | { |
| 347 | Tier1 = 0, |
| 348 | Tier2 = 1, |
| 349 | }; |
| 350 | |
| 351 | // When using Metal argument buffers, indicates the Metal argument buffer tier level supported by the Metal platform. |
| 352 | // Ignored when Options::argument_buffers is disabled. |
| 353 | // - Tier1 supports writable images on macOS, but not on iOS. |
| 354 | // - Tier2 supports writable images on macOS and iOS, and higher resource count limits. |
| 355 | // Tier capabilities based on recommendations from Apple engineering. |
| 356 | ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; |
| 357 | |
| 358 | // Enables specifick argument buffer format with extra information to track SSBO-length |
| 359 | bool runtime_array_rich_descriptor = false; |
| 360 | |
| 361 | // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. |
| 362 | bool enable_base_index_zero = false; |
| 363 | |
| 364 | // Fragment output in MSL must have at least as many components as the render pass. |
| 365 | // Add support to explicit pad out components. |
| 366 | bool pad_fragment_output_components = false; |
| 367 | |
| 368 | // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. |
| 369 | bool ios_support_base_vertex_instance = false; |
| 370 | |
| 371 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
| 372 | bool use_framebuffer_fetch_subpasses = false; |
| 373 | |
| 374 | // Enables use of "fma" intrinsic for invariant float math |
| 375 | bool invariant_float_math = false; |
| 376 | |
| 377 | // Emulate texturecube_array with texture2d_array for iOS where this type is not available |
| 378 | bool emulate_cube_array = false; |
| 379 | |
| 380 | // Allow user to enable decoration binding |
| 381 | bool enable_decoration_binding = false; |
| 382 | |
| 383 | // Requires MSL 2.1, use the native support for texel buffers. |
| 384 | bool texture_buffer_native = false; |
| 385 | |
| 386 | // Forces all resources which are part of an argument buffer to be considered active. |
| 387 | // This ensures ABI compatibility between shaders where some resources might be unused, |
| 388 | // and would otherwise declare a different IAB. |
| 389 | bool force_active_argument_buffer_resources = false; |
| 390 | |
| 391 | // Aligns each resource in an argument buffer to its assigned index value, id(N), |
| 392 | // by adding synthetic padding members in the argument buffer struct for any resources |
| 393 | // in the argument buffer that are not defined and used by the shader. This allows |
| 394 | // the shader to index into the correct argument in a descriptor set argument buffer |
| 395 | // that is shared across shaders, where not all resources in the argument buffer are |
| 396 | // defined in each shader. For this to work, an MSLResourceBinding must be provided for |
| 397 | // all descriptors in any descriptor set held in an argument buffer in the shader, and |
| 398 | // that MSLResourceBinding must have the basetype and count members populated correctly. |
| 399 | // The implementation here assumes any inline blocks in the argument buffer is provided |
| 400 | // in a Metal buffer, and doesn't take into consideration inline blocks that are |
| 401 | // optionally embedded directly into the argument buffer via add_inline_uniform_block(). |
| 402 | bool pad_argument_buffer_resources = false; |
| 403 | |
| 404 | // Forces the use of plain arrays, which works around certain driver bugs on certain versions |
| 405 | // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. |
| 406 | // May reduce performance in scenarios where arrays are copied around as value-types. |
| 407 | bool force_native_arrays = false; |
| 408 | |
| 409 | // If a shader writes clip distance, also emit user varyings which |
| 410 | // can be read in subsequent stages. |
| 411 | bool enable_clip_distance_user_varying = true; |
| 412 | |
| 413 | // In a tessellation control shader, assume that more than one patch can be processed in a |
| 414 | // single workgroup. This requires changes to the way the InvocationId and PrimitiveId |
| 415 | // builtins are processed, but should result in more efficient usage of the GPU. |
| 416 | bool multi_patch_workgroup = false; |
| 417 | |
| 418 | // Use storage buffers instead of vertex-style attributes for tessellation evaluation |
| 419 | // input. This may require conversion of inputs in the generated post-tessellation |
| 420 | // vertex shader, but allows the use of nested arrays. |
| 421 | bool raw_buffer_tese_input = false; |
| 422 | |
| 423 | // If set, a vertex shader will be compiled as part of a tessellation pipeline. |
| 424 | // It will be translated as a compute kernel, so it can use the global invocation ID |
| 425 | // to index the output buffer. |
| 426 | bool vertex_for_tessellation = false; |
| 427 | |
| 428 | // Assume that SubpassData images have multiple layers. Layered input attachments |
| 429 | // are addressed relative to the Layer output from the vertex pipeline. This option |
| 430 | // has no effect with multiview, since all input attachments are assumed to be layered |
| 431 | // and will be addressed using the current ViewIndex. |
| 432 | bool arrayed_subpass_input = false; |
| 433 | |
| 434 | // Whether to use SIMD-group or quadgroup functions to implement group non-uniform |
| 435 | // operations. Some GPUs on iOS do not support the SIMD-group functions, only the |
| 436 | // quadgroup functions. |
| 437 | bool ios_use_simdgroup_functions = false; |
| 438 | |
| 439 | // If set, the subgroup size will be assumed to be one, and subgroup-related |
| 440 | // builtins and operations will be emitted accordingly. This mode is intended to |
| 441 | // be used by MoltenVK on hardware/software configurations which do not provide |
| 442 | // sufficient support for subgroups. |
| 443 | bool emulate_subgroups = false; |
| 444 | |
| 445 | // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, |
| 446 | // allows the SIMD-group size (aka thread execution width) to vary depending on |
| 447 | // register usage and requirements. In certain circumstances--for example, a pipeline |
| 448 | // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- |
| 449 | // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of |
| 450 | // mapping it to the Metal builtin [[thread_execution_width]]. If the thread |
| 451 | // execution width is reduced, the extra invocations will appear to be inactive. |
| 452 | // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped |
| 453 | // to the Metal [[thread_execution_width]] builtin. |
| 454 | uint32_t fixed_subgroup_size = 0; |
| 455 | |
| 456 | enum class IndexType |
| 457 | { |
| 458 | None = 0, |
| 459 | UInt16 = 1, |
| 460 | UInt32 = 2 |
| 461 | }; |
| 462 | |
| 463 | // The type of index in the index buffer, if present. For a compute shader, Metal |
| 464 | // requires specifying the indexing at pipeline creation, rather than at draw time |
| 465 | // as with graphics pipelines. This means we must create three different pipelines, |
| 466 | // for no indexing, 16-bit indices, and 32-bit indices. Each requires different |
| 467 | // handling for the gl_VertexIndex builtin. We may as well, then, create three |
| 468 | // different shaders for these three scenarios. |
| 469 | IndexType vertex_index_type = IndexType::None; |
| 470 | |
| 471 | // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. |
| 472 | // This will force the shader to run at sample rate, assuming Metal does not optimize |
| 473 | // the extra threads away. |
| 474 | bool force_sample_rate_shading = false; |
| 475 | |
| 476 | // If set, gl_HelperInvocation will be set manually whenever a fragment is discarded. |
| 477 | // Some Metal devices have a bug where simd_is_helper_thread() does not return true |
| 478 | // after a fragment has been discarded. This is a workaround that is only expected to be needed |
| 479 | // until the bug is fixed in Metal; it is provided as an option to allow disabling it when that occurs. |
| 480 | bool manual_helper_invocation_updates = true; |
| 481 | |
| 482 | // If set, extra checks will be emitted in fragment shaders to prevent writes |
| 483 | // from discarded fragments. Some Metal devices have a bug where writes to storage resources |
| 484 | // from discarded fragment threads continue to occur, despite the fragment being |
| 485 | // discarded. This is a workaround that is only expected to be needed until the |
| 486 | // bug is fixed in Metal; it is provided as an option so it can be enabled |
| 487 | // only when the bug is present. |
| 488 | bool check_discarded_frag_stores = false; |
| 489 | |
| 490 | // If set, Lod operands to OpImageSample*DrefExplicitLod for 1D and 2D array images |
| 491 | // will be implemented using a gradient instead of passing the level operand directly. |
| 492 | // Some Metal devices have a bug where the level() argument to depth2d_array<T>::sample_compare() |
| 493 | // in a fragment shader is biased by some unknown amount, possibly dependent on the |
| 494 | // partial derivatives of the texture coordinates. This is a workaround that is only |
| 495 | // expected to be needed until the bug is fixed in Metal; it is provided as an option |
| 496 | // so it can be enabled only when the bug is present. |
| 497 | bool sample_dref_lod_array_as_grad = false; |
| 498 | |
| 499 | // MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures. |
| 500 | // This inserts fences before each read of a read_write texture to ensure coherency. |
| 501 | // If you're sure you never rely on this, you can set this to false for a possible performance improvement. |
| 502 | // Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this. |
| 503 | bool readwrite_texture_fences = true; |
| 504 | |
| 505 | // Metal 3.1 introduced a Metal regression bug which causes infinite recursion during |
| 506 | // Metal's analysis of an entry point input structure that is itself recursive. Enabling |
| 507 | // this option will replace the recursive input declaration with a alternate variable of |
| 508 | // type void*, and then cast to the correct type at the top of the entry point function. |
| 509 | // The bug has been reported to Apple, and will hopefully be fixed in future releases. |
| 510 | bool replace_recursive_inputs = false; |
| 511 | |
| 512 | // If set, manual fixups of gradient vectors for cube texture lookups will be performed. |
| 513 | // All released Apple Silicon GPUs to date behave incorrectly when sampling a cube texture |
| 514 | // with explicit gradients. They will ignore one of the three partial derivatives based |
| 515 | // on the selected major axis, and expect the remaining derivatives to be partially |
| 516 | // transformed. |
| 517 | bool agx_manual_cube_grad_fixup = false; |
| 518 | |
| 519 | // Metal will discard fragments with side effects under certain circumstances prematurely. |
| 520 | // Example: CTS test dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth |
| 521 | // Test will render a full screen quad with varying depth [0,1] for each fragment. |
| 522 | // Each fragment will do an operation with side effects, modify the depth value and |
| 523 | // discard the fragment. The test expects the fragment to be run due to: |
| 524 | // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/vkspec.html#fragops-shader-depthreplacement |
| 525 | // which states that the fragment shader must be run due to replacing the depth in shader. |
| 526 | // However, Metal may prematurely discards fragments without executing them |
| 527 | // (I believe this to be due to a greedy optimization on their end) making the test fail. |
| 528 | // This option enforces fragment execution for such cases where the fragment has operations |
| 529 | // with side effects. Provided as an option hoping Metal will fix this issue in the future. |
| 530 | bool force_fragment_with_side_effects_execution = false; |
| 531 | |
| 532 | // If set, adds a depth pass through statement to circumvent the following issue: |
| 533 | // When the same depth/stencil is used as input and depth/stencil attachment, we need to |
| 534 | // force Metal to perform the depth/stencil write after fragment execution. Otherwise, |
| 535 | // Metal will write to the depth attachment before fragment execution. This happens |
| 536 | // if the fragment does not modify the depth value. |
| 537 | bool input_attachment_is_ds_attachment = false; |
| 538 | |
| 539 | bool is_ios() const |
| 540 | { |
| 541 | return platform == iOS; |
| 542 | } |
| 543 | |
| 544 | bool is_macos() const |
| 545 | { |
| 546 | return platform == macOS; |
| 547 | } |
| 548 | |
| 549 | bool use_quadgroup_operation() const |
| 550 | { |
| 551 | return is_ios() && !ios_use_simdgroup_functions; |
| 552 | } |
| 553 | |
| 554 | void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
| 555 | { |
| 556 | msl_version = make_msl_version(major, minor, patch); |
| 557 | } |
| 558 | |
| 559 | bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const |
| 560 | { |
| 561 | return msl_version >= make_msl_version(major, minor, patch); |
| 562 | } |
| 563 | |
| 564 | static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
| 565 | { |
| 566 | return (major * 10000) + (minor * 100) + patch; |
| 567 | } |
| 568 | }; |
| 569 | |
| 570 | const Options &get_msl_options() const |
| 571 | { |
| 572 | return msl_options; |
| 573 | } |
| 574 | |
| 575 | void set_msl_options(const Options &opts) |
| 576 | { |
| 577 | msl_options = opts; |
| 578 | } |
| 579 | |
| 580 | // Provide feedback to calling API to allow runtime to disable pipeline |
| 581 | // rasterization if vertex shader requires rasterization to be disabled. |
| 582 | bool get_is_rasterization_disabled() const |
| 583 | { |
| 584 | return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || |
| 585 | get_entry_point().model == spv::ExecutionModelTessellationControl || |
| 586 | get_entry_point().model == spv::ExecutionModelTessellationEvaluation); |
| 587 | } |
| 588 | |
| 589 | // Provide feedback to calling API to allow it to pass an auxiliary |
| 590 | // swizzle buffer if the shader needs it. |
| 591 | bool needs_swizzle_buffer() const |
| 592 | { |
| 593 | return used_swizzle_buffer; |
| 594 | } |
| 595 | |
| 596 | // Provide feedback to calling API to allow it to pass a buffer |
| 597 | // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. |
| 598 | bool needs_buffer_size_buffer() const |
| 599 | { |
| 600 | return !buffers_requiring_array_length.empty(); |
| 601 | } |
| 602 | |
| 603 | bool buffer_requires_array_length(VariableID id) const |
| 604 | { |
| 605 | return buffers_requiring_array_length.count(x: id) != 0; |
| 606 | } |
| 607 | |
| 608 | // Provide feedback to calling API to allow it to pass a buffer |
| 609 | // containing the view mask for the current multiview subpass. |
| 610 | bool needs_view_mask_buffer() const |
| 611 | { |
| 612 | return msl_options.multiview && !msl_options.view_index_from_device_index; |
| 613 | } |
| 614 | |
| 615 | // Provide feedback to calling API to allow it to pass a buffer |
| 616 | // containing the dispatch base workgroup ID. |
| 617 | bool needs_dispatch_base_buffer() const |
| 618 | { |
| 619 | return msl_options.dispatch_base && !msl_options.supports_msl_version(major: 1, minor: 2); |
| 620 | } |
| 621 | |
| 622 | // Provide feedback to calling API to allow it to pass an output |
| 623 | // buffer if the shader needs it. |
| 624 | bool needs_output_buffer() const |
| 625 | { |
| 626 | return capture_output_to_buffer && stage_out_var_id != ID(0); |
| 627 | } |
| 628 | |
| 629 | // Provide feedback to calling API to allow it to pass a patch output |
| 630 | // buffer if the shader needs it. |
| 631 | bool needs_patch_output_buffer() const |
| 632 | { |
| 633 | return capture_output_to_buffer && patch_stage_out_var_id != ID(0); |
| 634 | } |
| 635 | |
| 636 | // Provide feedback to calling API to allow it to pass an input threadgroup |
| 637 | // buffer if the shader needs it. |
| 638 | bool needs_input_threadgroup_mem() const |
| 639 | { |
| 640 | return capture_output_to_buffer && stage_in_var_id != ID(0); |
| 641 | } |
| 642 | |
| 643 | explicit CompilerMSL(std::vector<uint32_t> spirv); |
| 644 | CompilerMSL(const uint32_t *ir, size_t word_count); |
| 645 | explicit CompilerMSL(const ParsedIR &ir); |
| 646 | explicit CompilerMSL(ParsedIR &&ir); |
| 647 | |
| 648 | // input is a shader interface variable description used to fix up shader input variables. |
| 649 | // If shader inputs are provided, is_msl_shader_input_used() will return true after |
| 650 | // calling ::compile() if the location were used by the MSL code. |
| 651 | void add_msl_shader_input(const MSLShaderInterfaceVariable &input); |
| 652 | |
| 653 | // output is a shader interface variable description used to fix up shader output variables. |
| 654 | // If shader outputs are provided, is_msl_shader_output_used() will return true after |
| 655 | // calling ::compile() if the location were used by the MSL code. |
| 656 | void add_msl_shader_output(const MSLShaderInterfaceVariable &output); |
| 657 | |
| 658 | // resource is a resource binding to indicate the MSL buffer, |
| 659 | // texture or sampler index to use for a particular SPIR-V description set |
| 660 | // and binding. If resource bindings are provided, |
| 661 | // is_msl_resource_binding_used() will return true after calling ::compile() if |
| 662 | // the set/binding combination was used by the MSL code. |
| 663 | void add_msl_resource_binding(const MSLResourceBinding &resource); |
| 664 | |
| 665 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
| 666 | // in this shader. index is the index within the dynamic offset buffer to use. This |
| 667 | // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC |
| 668 | // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers |
| 669 | // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with |
| 670 | // an offset taken from the dynamic offset buffer. |
| 671 | void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); |
| 672 | |
| 673 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
| 674 | // in this shader. This function marks that resource as an inline uniform block |
| 675 | // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers |
| 676 | // are enabled. If so, the buffer block will be directly embedded into the argument |
| 677 | // buffer, instead of being referenced indirectly via pointer. |
| 678 | void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); |
| 679 | |
| 680 | // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. |
| 681 | // This corresponds to VK_KHR_push_descriptor in Vulkan. |
| 682 | void add_discrete_descriptor_set(uint32_t desc_set); |
| 683 | |
| 684 | // If an argument buffer is large enough, it may need to be in the device storage space rather than |
| 685 | // constant. Opt-in to this behavior here on a per set basis. |
| 686 | void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); |
| 687 | |
| 688 | // Query after compilation is done. This allows you to check if an input location was used by the shader. |
| 689 | bool is_msl_shader_input_used(uint32_t location); |
| 690 | |
| 691 | // Query after compilation is done. This allows you to check if an output location were used by the shader. |
| 692 | bool is_msl_shader_output_used(uint32_t location); |
| 693 | |
| 694 | // If not using add_msl_shader_input, it's possible |
| 695 | // that certain builtin attributes need to be automatically assigned locations. |
| 696 | // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. |
| 697 | // This returns k_unknown_location if the location was explicitly assigned with |
| 698 | // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. |
| 699 | uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; |
| 700 | |
| 701 | // If not using add_msl_shader_output, it's possible |
| 702 | // that certain builtin attributes need to be automatically assigned locations. |
| 703 | // This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc. |
| 704 | // This returns k_unknown_location if the location were explicitly assigned with |
| 705 | // add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]]. |
| 706 | uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const; |
| 707 | |
| 708 | // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. |
| 709 | // Constexpr samplers are always assumed to be emitted. |
| 710 | // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped |
| 711 | // by remap_constexpr_sampler(_by_binding). |
| 712 | bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; |
| 713 | |
| 714 | // This must only be called after a successful call to CompilerMSL::compile(). |
| 715 | // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. |
| 716 | // If the descriptor set was part of an argument buffer, report the [[id(N)]], |
| 717 | // or [[buffer/texture/sampler]] binding for other resources. |
| 718 | // If the resource was a combined image sampler, report the image binding here, |
| 719 | // use the _secondary version of this call to query the sampler half of the resource. |
| 720 | // If no binding exists, uint32_t(-1) is returned. |
| 721 | uint32_t get_automatic_msl_resource_binding(uint32_t id) const; |
| 722 | |
| 723 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the |
| 724 | // sampler's binding is returned instead. For any other resource type, -1 is returned. |
| 725 | // Secondary bindings are also used for the auxillary image atomic buffer. |
| 726 | uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; |
| 727 | |
| 728 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, |
| 729 | // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. |
| 730 | uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; |
| 731 | |
| 732 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, |
| 733 | // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. |
| 734 | uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; |
| 735 | |
| 736 | // Compiles the SPIR-V code into Metal Shading Language. |
| 737 | std::string compile() override; |
| 738 | |
| 739 | // Remap a sampler with ID to a constexpr sampler. |
| 740 | // Older iOS targets must use constexpr samplers in certain cases (PCF), |
| 741 | // so a static sampler must be used. |
| 742 | // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. |
| 743 | // This can be used on both combined image/samplers (sampler2D) or standalone samplers. |
| 744 | // The remapped sampler must not be an array of samplers. |
| 745 | // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. |
| 746 | void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); |
| 747 | |
| 748 | // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. |
| 749 | // Remaps based on ID take priority over set/binding remaps. |
| 750 | void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); |
| 751 | |
| 752 | // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect |
| 753 | // to use for a particular location. The default is 4 if number of components is not overridden. |
| 754 | void set_fragment_output_components(uint32_t location, uint32_t components); |
| 755 | |
| 756 | void set_combined_sampler_suffix(const char *suffix); |
| 757 | const char *get_combined_sampler_suffix() const; |
| 758 | |
| 759 | protected: |
| 760 | // An enum of SPIR-V functions that are implemented in additional |
| 761 | // source code that is added to the shader if necessary. |
| 762 | enum SPVFuncImpl : uint8_t |
| 763 | { |
| 764 | SPVFuncImplNone, |
| 765 | SPVFuncImplMod, |
| 766 | SPVFuncImplRadians, |
| 767 | SPVFuncImplDegrees, |
| 768 | SPVFuncImplFindILsb, |
| 769 | SPVFuncImplFindSMsb, |
| 770 | SPVFuncImplFindUMsb, |
| 771 | SPVFuncImplSSign, |
| 772 | SPVFuncImplArrayCopy, |
| 773 | SPVFuncImplArrayCopyMultidim, |
| 774 | SPVFuncImplTexelBufferCoords, |
| 775 | SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations |
| 776 | SPVFuncImplGradientCube, |
| 777 | SPVFuncImplFMul, |
| 778 | SPVFuncImplFAdd, |
| 779 | SPVFuncImplFSub, |
| 780 | SPVFuncImplQuantizeToF16, |
| 781 | SPVFuncImplCubemapTo2DArrayFace, |
| 782 | SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type |
| 783 | SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices |
| 784 | SPVFuncImplInverse4x4, |
| 785 | SPVFuncImplInverse3x3, |
| 786 | SPVFuncImplInverse2x2, |
| 787 | // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's |
| 788 | // emitted before them. |
| 789 | SPVFuncImplForwardArgs, |
| 790 | // Likewise, this must come before *Swizzle. |
| 791 | SPVFuncImplGetSwizzle, |
| 792 | SPVFuncImplTextureSwizzle, |
| 793 | SPVFuncImplGatherSwizzle, |
| 794 | SPVFuncImplGatherCompareSwizzle, |
| 795 | SPVFuncImplGatherConstOffsets, |
| 796 | SPVFuncImplGatherCompareConstOffsets, |
| 797 | SPVFuncImplSubgroupBroadcast, |
| 798 | SPVFuncImplSubgroupBroadcastFirst, |
| 799 | SPVFuncImplSubgroupBallot, |
| 800 | , |
| 801 | SPVFuncImplSubgroupBallotFindLSB, |
| 802 | SPVFuncImplSubgroupBallotFindMSB, |
| 803 | SPVFuncImplSubgroupBallotBitCount, |
| 804 | SPVFuncImplSubgroupAllEqual, |
| 805 | SPVFuncImplSubgroupShuffle, |
| 806 | SPVFuncImplSubgroupShuffleXor, |
| 807 | SPVFuncImplSubgroupShuffleUp, |
| 808 | SPVFuncImplSubgroupShuffleDown, |
| 809 | SPVFuncImplQuadBroadcast, |
| 810 | SPVFuncImplQuadSwap, |
| 811 | SPVFuncImplReflectScalar, |
| 812 | SPVFuncImplRefractScalar, |
| 813 | SPVFuncImplFaceForwardScalar, |
| 814 | SPVFuncImplChromaReconstructNearest2Plane, |
| 815 | SPVFuncImplChromaReconstructNearest3Plane, |
| 816 | SPVFuncImplChromaReconstructLinear422CositedEven2Plane, |
| 817 | SPVFuncImplChromaReconstructLinear422CositedEven3Plane, |
| 818 | SPVFuncImplChromaReconstructLinear422Midpoint2Plane, |
| 819 | SPVFuncImplChromaReconstructLinear422Midpoint3Plane, |
| 820 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, |
| 821 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, |
| 822 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, |
| 823 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, |
| 824 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, |
| 825 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, |
| 826 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, |
| 827 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, |
| 828 | SPVFuncImplExpandITUFullRange, |
| 829 | SPVFuncImplExpandITUNarrowRange, |
| 830 | SPVFuncImplConvertYCbCrBT709, |
| 831 | SPVFuncImplConvertYCbCrBT601, |
| 832 | SPVFuncImplConvertYCbCrBT2020, |
| 833 | SPVFuncImplDynamicImageSampler, |
| 834 | SPVFuncImplRayQueryIntersectionParams, |
| 835 | SPVFuncImplVariableDescriptor, |
| 836 | SPVFuncImplVariableSizedDescriptor, |
| 837 | SPVFuncImplVariableDescriptorArray, |
| 838 | SPVFuncImplPaddedStd140, |
| 839 | SPVFuncImplReduceAdd, |
| 840 | SPVFuncImplImageFence, |
| 841 | SPVFuncImplTextureCast, |
| 842 | SPVFuncImplMulExtended, |
| 843 | SPVFuncImplSetMeshOutputsEXT, |
| 844 | }; |
| 845 | |
| 846 | // If the underlying resource has been used for comparison then duplicate loads of that resource must be too |
| 847 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
| 848 | void emit_texture_op(const Instruction &i, bool sparse) override; |
| 849 | void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
| 850 | std::string to_ptr_expression(uint32_t id, bool register_expression_read = true); |
| 851 | void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
| 852 | void emit_instruction(const Instruction &instr) override; |
| 853 | void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, |
| 854 | uint32_t count) override; |
| 855 | void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, |
| 856 | const uint32_t *args, uint32_t count) override; |
| 857 | void () override; |
| 858 | void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; |
| 859 | void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; |
| 860 | void emit_subgroup_op(const Instruction &i) override; |
| 861 | std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, |
| 862 | SmallVector<uint32_t> &inherited_expressions) override; |
| 863 | void emit_fixup() override; |
| 864 | std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
| 865 | const std::string &qualifier = "" ); |
| 866 | void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
| 867 | const std::string &qualifier = "" , uint32_t base_offset = 0) override; |
| 868 | void emit_struct_padding_target(const SPIRType &type) override; |
| 869 | std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member); |
| 870 | std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; |
| 871 | void emit_block_hints(const SPIRBlock &block) override; |
| 872 | void emit_mesh_entry_point(); |
| 873 | void emit_mesh_outputs(); |
| 874 | void emit_mesh_tasks(SPIRBlock &block) override; |
| 875 | |
| 876 | // Allow Metal to use the array<T> template to make arrays a value type |
| 877 | std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override; |
| 878 | std::string constant_op_expression(const SPIRConstantOp &cop) override; |
| 879 | |
| 880 | bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; |
| 881 | |
| 882 | // GCC workaround of lambdas calling protected functions (for older GCC versions) |
| 883 | std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; |
| 884 | |
| 885 | std::string image_type_glsl(const SPIRType &type, uint32_t id, bool member) override; |
| 886 | std::string sampler_type(const SPIRType &type, uint32_t id, bool member); |
| 887 | std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; |
| 888 | std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; |
| 889 | std::string to_name(uint32_t id, bool allow_alias = true) const override; |
| 890 | std::string to_function_name(const TextureFunctionNameArguments &args) override; |
| 891 | std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; |
| 892 | std::string to_initializer_expression(const SPIRVariable &var) override; |
| 893 | std::string to_zero_initialized_expression(uint32_t type_id) override; |
| 894 | |
| 895 | std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, |
| 896 | bool is_packed, bool row_major) override; |
| 897 | |
| 898 | // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. |
| 899 | bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; |
| 900 | |
| 901 | std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; |
| 902 | bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; |
| 903 | bool skip_argument(uint32_t id) const override; |
| 904 | std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override; |
| 905 | std::string to_qualifiers_glsl(uint32_t id) override; |
| 906 | void replace_illegal_names() override; |
| 907 | void declare_constant_arrays(); |
| 908 | |
| 909 | void replace_illegal_entry_point_names(); |
| 910 | void sync_entry_point_aliases_and_names(); |
| 911 | |
| 912 | static const std::unordered_set<std::string> &get_reserved_keyword_set(); |
| 913 | static const std::unordered_set<std::string> &get_illegal_func_names(); |
| 914 | |
| 915 | // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries |
| 916 | void declare_complex_constant_arrays(); |
| 917 | |
| 918 | bool is_patch_block(const SPIRType &type); |
| 919 | bool is_non_native_row_major_matrix(uint32_t id) override; |
| 920 | bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; |
| 921 | std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, |
| 922 | bool is_packed, bool relaxed) override; |
| 923 | |
| 924 | bool is_tesc_shader() const; |
| 925 | bool is_tese_shader() const; |
| 926 | bool is_mesh_shader() const; |
| 927 | |
| 928 | void preprocess_op_codes(); |
| 929 | void localize_global_variables(); |
| 930 | void (); |
| 931 | void mark_packable_structs(); |
| 932 | void mark_as_packable(SPIRType &type); |
| 933 | void mark_as_workgroup_struct(SPIRType &type); |
| 934 | |
| 935 | std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars; |
| 936 | void (uint32_t func_id, std::set<uint32_t> &added_arg_ids, |
| 937 | std::unordered_set<uint32_t> &global_var_ids, |
| 938 | std::unordered_set<uint32_t> &processed_func_ids); |
| 939 | uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); |
| 940 | uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); |
| 941 | uint32_t add_meshlet_block(bool per_primitive); |
| 942 | |
| 943 | struct InterfaceBlockMeta |
| 944 | { |
| 945 | struct LocationMeta |
| 946 | { |
| 947 | uint32_t base_type_id = 0; |
| 948 | uint32_t num_components = 0; |
| 949 | bool flat = false; |
| 950 | bool noperspective = false; |
| 951 | bool centroid = false; |
| 952 | bool sample = false; |
| 953 | }; |
| 954 | std::unordered_map<uint32_t, LocationMeta> location_meta; |
| 955 | bool strip_array = false; |
| 956 | bool allow_local_declaration = false; |
| 957 | }; |
| 958 | |
| 959 | std::string to_tesc_invocation_id(); |
| 960 | void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); |
| 961 | void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, |
| 962 | SPIRVariable &var, InterfaceBlockMeta &meta); |
| 963 | void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
| 964 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
| 965 | void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
| 966 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
| 967 | bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
| 968 | SPIRVariable &var, const SPIRType &type, |
| 969 | InterfaceBlockMeta &meta); |
| 970 | void add_plain_member_variable_to_interface_block(spv::StorageClass storage, |
| 971 | const std::string &ib_var_ref, SPIRType &ib_type, |
| 972 | SPIRVariable &var, SPIRType &var_type, |
| 973 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
| 974 | const std::string &mbr_name_qual, |
| 975 | const std::string &var_chain_qual, |
| 976 | uint32_t &location, uint32_t &var_mbr_idx); |
| 977 | void add_composite_member_variable_to_interface_block(spv::StorageClass storage, |
| 978 | const std::string &ib_var_ref, SPIRType &ib_type, |
| 979 | SPIRVariable &var, SPIRType &var_type, |
| 980 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
| 981 | const std::string &mbr_name_qual, |
| 982 | const std::string &var_chain_qual, |
| 983 | uint32_t &location, uint32_t &var_mbr_idx, |
| 984 | const Bitset &interpolation_qual); |
| 985 | void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); |
| 986 | void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var); |
| 987 | |
| 988 | void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); |
| 989 | |
| 990 | void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, |
| 991 | spv::StorageClass storage, bool fallback = false); |
| 992 | uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); |
| 993 | uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, |
| 994 | uint32_t num_components, bool strip_array); |
| 995 | |
| 996 | void emit_custom_templates(); |
| 997 | void emit_custom_functions(); |
| 998 | void emit_resources(); |
| 999 | void emit_specialization_constants_and_structs(); |
| 1000 | void emit_interface_block(uint32_t ib_var_id); |
| 1001 | bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); |
| 1002 | bool is_var_runtime_size_array(const SPIRVariable &var) const; |
| 1003 | uint32_t get_resource_array_size(const SPIRType &type, uint32_t id) const; |
| 1004 | |
| 1005 | void fix_up_shader_inputs_outputs(); |
| 1006 | |
| 1007 | std::string func_type_decl(SPIRType &type); |
| 1008 | std::string entry_point_args_classic(bool append_comma); |
| 1009 | std::string entry_point_args_argument_buffer(bool append_comma); |
| 1010 | std::string entry_point_arg_stage_in(); |
| 1011 | void entry_point_args_builtin(std::string &args); |
| 1012 | void entry_point_args_discrete_descriptors(std::string &args); |
| 1013 | std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index); |
| 1014 | std::string ensure_valid_name(std::string name, std::string pfx); |
| 1015 | std::string to_sampler_expression(uint32_t id); |
| 1016 | std::string to_swizzle_expression(uint32_t id); |
| 1017 | std::string to_buffer_size_expression(uint32_t id); |
| 1018 | bool is_sample_rate() const; |
| 1019 | bool is_intersection_query() const; |
| 1020 | bool is_direct_input_builtin(spv::BuiltIn builtin); |
| 1021 | std::string builtin_qualifier(spv::BuiltIn builtin); |
| 1022 | std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); |
| 1023 | std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); |
| 1024 | std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); |
| 1025 | std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); |
| 1026 | std::string argument_decl(const SPIRFunction::Parameter &arg); |
| 1027 | const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; |
| 1028 | std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); |
| 1029 | uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); |
| 1030 | uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; |
| 1031 | uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, |
| 1032 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
| 1033 | uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, |
| 1034 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
| 1035 | |
| 1036 | uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; |
| 1037 | |
| 1038 | uint32_t get_physical_type_stride(const SPIRType &type) const override; |
| 1039 | |
| 1040 | // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. |
| 1041 | // These values can change depending on various extended decorations which control packing rules. |
| 1042 | // We need to make these rules match up with SPIR-V declared rules. |
| 1043 | uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; |
| 1044 | uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
| 1045 | uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
| 1046 | uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; |
| 1047 | |
| 1048 | uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1049 | uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1050 | uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1051 | uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1052 | |
| 1053 | uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1054 | uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1055 | uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1056 | uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
| 1057 | |
| 1058 | const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; |
| 1059 | SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; |
| 1060 | |
| 1061 | uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, |
| 1062 | bool ignore_padding = false) const; |
| 1063 | |
| 1064 | std::string to_component_argument(uint32_t id); |
| 1065 | void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs); |
| 1066 | void mark_scalar_layout_structs(const SPIRType &ib_type); |
| 1067 | void mark_struct_members_packed(const SPIRType &type); |
| 1068 | void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); |
| 1069 | bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; |
| 1070 | std::string get_argument_address_space(const SPIRVariable &argument); |
| 1071 | std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); |
| 1072 | static bool decoration_flags_signal_volatile(const Bitset &flags); |
| 1073 | const char *to_restrict(uint32_t id, bool space); |
| 1074 | SPIRType &get_stage_in_struct_type(); |
| 1075 | SPIRType &get_stage_out_struct_type(); |
| 1076 | SPIRType &get_patch_stage_in_struct_type(); |
| 1077 | SPIRType &get_patch_stage_out_struct_type(); |
| 1078 | std::string get_tess_factor_struct_name(); |
| 1079 | SPIRType &get_uint_type(); |
| 1080 | uint32_t get_uint_type_id(); |
| 1081 | void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode, |
| 1082 | uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, |
| 1083 | bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); |
| 1084 | const char *get_memory_order(uint32_t spv_mem_sem); |
| 1085 | void add_pragma_line(const std::string &line); |
| 1086 | void add_typedef_line(const std::string &line); |
| 1087 | void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); |
| 1088 | bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, |
| 1089 | spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; |
| 1090 | void build_implicit_builtins(); |
| 1091 | uint32_t build_constant_uint_array_pointer(); |
| 1092 | void emit_entry_point_declarations() override; |
| 1093 | bool uses_explicit_early_fragment_test(); |
| 1094 | |
| 1095 | uint32_t builtin_frag_coord_id = 0; |
| 1096 | uint32_t builtin_sample_id_id = 0; |
| 1097 | uint32_t builtin_sample_mask_id = 0; |
| 1098 | uint32_t builtin_helper_invocation_id = 0; |
| 1099 | uint32_t builtin_vertex_idx_id = 0; |
| 1100 | uint32_t builtin_base_vertex_id = 0; |
| 1101 | uint32_t builtin_instance_idx_id = 0; |
| 1102 | uint32_t builtin_base_instance_id = 0; |
| 1103 | uint32_t builtin_view_idx_id = 0; |
| 1104 | uint32_t builtin_layer_id = 0; |
| 1105 | uint32_t builtin_invocation_id_id = 0; |
| 1106 | uint32_t builtin_primitive_id_id = 0; |
| 1107 | uint32_t builtin_subgroup_invocation_id_id = 0; |
| 1108 | uint32_t builtin_subgroup_size_id = 0; |
| 1109 | uint32_t builtin_dispatch_base_id = 0; |
| 1110 | uint32_t builtin_stage_input_size_id = 0; |
| 1111 | uint32_t builtin_local_invocation_index_id = 0; |
| 1112 | uint32_t builtin_workgroup_size_id = 0; |
| 1113 | uint32_t builtin_mesh_primitive_indices_id = 0; |
| 1114 | uint32_t builtin_mesh_sizes_id = 0; |
| 1115 | uint32_t builtin_task_grid_id = 0; |
| 1116 | uint32_t builtin_frag_depth_id = 0; |
| 1117 | uint32_t swizzle_buffer_id = 0; |
| 1118 | uint32_t buffer_size_buffer_id = 0; |
| 1119 | uint32_t view_mask_buffer_id = 0; |
| 1120 | uint32_t dynamic_offsets_buffer_id = 0; |
| 1121 | uint32_t uint_type_id = 0; |
| 1122 | uint32_t shared_uint_type_id = 0; |
| 1123 | uint32_t meshlet_type_id = 0; |
| 1124 | uint32_t argument_buffer_padding_buffer_type_id = 0; |
| 1125 | uint32_t argument_buffer_padding_image_type_id = 0; |
| 1126 | uint32_t argument_buffer_padding_sampler_type_id = 0; |
| 1127 | |
| 1128 | bool does_shader_write_sample_mask = false; |
| 1129 | bool frag_shader_needs_discard_checks = false; |
| 1130 | |
| 1131 | void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; |
| 1132 | void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; |
| 1133 | void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; |
| 1134 | |
| 1135 | void analyze_sampled_image_usage(); |
| 1136 | |
| 1137 | bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; |
| 1138 | bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, |
| 1139 | bool &is_packed) override; |
| 1140 | void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); |
| 1141 | void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; |
| 1142 | |
| 1143 | bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); |
| 1144 | bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); |
| 1145 | bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); |
| 1146 | |
| 1147 | void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); |
| 1148 | |
| 1149 | void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); |
| 1150 | |
| 1151 | std::string convert_to_f32(const std::string &expr, uint32_t components); |
| 1152 | |
| 1153 | Options msl_options; |
| 1154 | std::set<SPVFuncImpl> spv_function_implementations; |
| 1155 | // Must be ordered to ensure declarations are in a specific order. |
| 1156 | std::map<LocationComponentPair, MSLShaderInterfaceVariable> inputs_by_location; |
| 1157 | std::unordered_map<uint32_t, MSLShaderInterfaceVariable> inputs_by_builtin; |
| 1158 | std::map<LocationComponentPair, MSLShaderInterfaceVariable> outputs_by_location; |
| 1159 | std::unordered_map<uint32_t, MSLShaderInterfaceVariable> outputs_by_builtin; |
| 1160 | std::unordered_set<uint32_t> location_inputs_in_use; |
| 1161 | std::unordered_set<uint32_t> location_inputs_in_use_fallback; |
| 1162 | std::unordered_set<uint32_t> location_outputs_in_use; |
| 1163 | std::unordered_set<uint32_t> location_outputs_in_use_fallback; |
| 1164 | std::unordered_map<uint32_t, uint32_t> fragment_output_components; |
| 1165 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location; |
| 1166 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location; |
| 1167 | std::set<std::string> pragma_lines; |
| 1168 | std::set<std::string> typedef_lines; |
| 1169 | SmallVector<uint32_t> vars_needing_early_declaration; |
| 1170 | |
| 1171 | std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings; |
| 1172 | std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number; |
| 1173 | |
| 1174 | uint32_t next_metal_resource_index_buffer = 0; |
| 1175 | uint32_t next_metal_resource_index_texture = 0; |
| 1176 | uint32_t next_metal_resource_index_sampler = 0; |
| 1177 | // Intentionally uninitialized, works around MSVC 2013 bug. |
| 1178 | uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; |
| 1179 | |
| 1180 | VariableID stage_in_var_id = 0; |
| 1181 | VariableID stage_out_var_id = 0; |
| 1182 | VariableID patch_stage_in_var_id = 0; |
| 1183 | VariableID patch_stage_out_var_id = 0; |
| 1184 | VariableID stage_in_ptr_var_id = 0; |
| 1185 | VariableID stage_out_ptr_var_id = 0; |
| 1186 | VariableID tess_level_inner_var_id = 0; |
| 1187 | VariableID tess_level_outer_var_id = 0; |
| 1188 | VariableID mesh_out_per_vertex = 0; |
| 1189 | VariableID mesh_out_per_primitive = 0; |
| 1190 | VariableID stage_out_masked_builtin_type_id = 0; |
| 1191 | |
| 1192 | // Handle HLSL-style 0-based vertex/instance index. |
| 1193 | enum class TriState |
| 1194 | { |
| 1195 | Neutral, |
| 1196 | No, |
| 1197 | Yes |
| 1198 | }; |
| 1199 | TriState needs_base_vertex_arg = TriState::Neutral; |
| 1200 | TriState needs_base_instance_arg = TriState::Neutral; |
| 1201 | |
| 1202 | bool has_sampled_images = false; |
| 1203 | bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. |
| 1204 | |
| 1205 | bool is_using_builtin_array = false; // Force the use of C style array declaration. |
| 1206 | bool using_builtin_array() const; |
| 1207 | |
| 1208 | bool is_rasterization_disabled = false; |
| 1209 | bool capture_output_to_buffer = false; |
| 1210 | bool needs_swizzle_buffer_def = false; |
| 1211 | bool used_swizzle_buffer = false; |
| 1212 | bool added_builtin_tess_level = false; |
| 1213 | bool needs_subgroup_invocation_id = false; |
| 1214 | bool needs_subgroup_size = false; |
| 1215 | bool needs_sample_id = false; |
| 1216 | bool needs_helper_invocation = false; |
| 1217 | bool writes_to_depth = false; |
| 1218 | std::string qual_pos_var_name; |
| 1219 | std::string stage_in_var_name = "in" ; |
| 1220 | std::string stage_out_var_name = "out" ; |
| 1221 | std::string patch_stage_in_var_name = "patchIn" ; |
| 1222 | std::string patch_stage_out_var_name = "patchOut" ; |
| 1223 | std::string sampler_name_suffix = "Smplr" ; |
| 1224 | std::string swizzle_name_suffix = "Swzl" ; |
| 1225 | std::string buffer_size_name_suffix = "BufferSize" ; |
| 1226 | std::string plane_name_suffix = "Plane" ; |
| 1227 | std::string input_wg_var_name = "gl_in" ; |
| 1228 | std::string input_buffer_var_name = "spvIn" ; |
| 1229 | std::string output_buffer_var_name = "spvOut" ; |
| 1230 | std::string patch_input_buffer_var_name = "spvPatchIn" ; |
| 1231 | std::string patch_output_buffer_var_name = "spvPatchOut" ; |
| 1232 | std::string tess_factor_buffer_var_name = "spvTessLevel" ; |
| 1233 | std::string index_buffer_var_name = "spvIndices" ; |
| 1234 | spv::Op previous_instruction_opcode = spv::OpNop; |
| 1235 | |
| 1236 | // Must be ordered since declaration is in a specific order. |
| 1237 | std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id; |
| 1238 | std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding; |
| 1239 | const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; |
| 1240 | |
| 1241 | std::unordered_set<uint32_t> buffers_requiring_array_length; |
| 1242 | SmallVector<std::pair<uint32_t, uint32_t>> buffer_aliases_argument; |
| 1243 | SmallVector<uint32_t> buffer_aliases_discrete; |
| 1244 | std::unordered_set<uint32_t> atomic_image_vars_emulated; // Emulate texture2D atomic operations |
| 1245 | std::unordered_set<uint32_t> pull_model_inputs; |
| 1246 | std::unordered_set<uint32_t> recursive_inputs; |
| 1247 | |
| 1248 | SmallVector<SPIRVariable *> entry_point_bindings; |
| 1249 | |
| 1250 | // Must be ordered since array is in a specific order. |
| 1251 | std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; |
| 1252 | |
| 1253 | SmallVector<uint32_t> disabled_frag_outputs; |
| 1254 | |
| 1255 | std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks; |
| 1256 | |
| 1257 | uint32_t argument_buffer_ids[kMaxArgumentBuffers]; |
| 1258 | uint32_t argument_buffer_discrete_mask = 0; |
| 1259 | uint32_t argument_buffer_device_storage_mask = 0; |
| 1260 | |
| 1261 | void emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var, |
| 1262 | const SPIRVariable &base_var); |
| 1263 | |
| 1264 | void analyze_argument_buffers(); |
| 1265 | bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; |
| 1266 | const MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const; |
| 1267 | void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
| 1268 | void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
| 1269 | void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
| 1270 | void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); |
| 1271 | |
| 1272 | uint32_t get_target_components_for_fragment_location(uint32_t location) const; |
| 1273 | uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, |
| 1274 | SPIRType::BaseType basetype = SPIRType::Unknown); |
| 1275 | uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); |
| 1276 | |
| 1277 | bool suppress_missing_prototypes = false; |
| 1278 | bool suppress_incompatible_pointer_types_discard_qualifiers = false; |
| 1279 | |
| 1280 | void add_spv_func_and_recompile(SPVFuncImpl spv_func); |
| 1281 | |
| 1282 | void activate_argument_buffer_resources(); |
| 1283 | |
| 1284 | bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; |
| 1285 | bool is_supported_argument_buffer_type(const SPIRType &type) const; |
| 1286 | |
| 1287 | bool variable_storage_requires_stage_io(spv::StorageClass storage) const; |
| 1288 | |
| 1289 | bool needs_manual_helper_invocation_updates() const |
| 1290 | { |
| 1291 | return msl_options.manual_helper_invocation_updates && msl_options.supports_msl_version(major: 2, minor: 3); |
| 1292 | } |
| 1293 | bool needs_frag_discard_checks() const |
| 1294 | { |
| 1295 | return get_execution_model() == spv::ExecutionModelFragment && msl_options.supports_msl_version(major: 2, minor: 3) && |
| 1296 | msl_options.check_discarded_frag_stores && frag_shader_needs_discard_checks; |
| 1297 | } |
| 1298 | |
| 1299 | bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; } |
| 1300 | std::string additional_fixed_sample_mask_str() const; |
| 1301 | |
| 1302 | // OpcodeHandler that handles several MSL preprocessing operations. |
| 1303 | struct OpCodePreprocessor : OpcodeHandler |
| 1304 | { |
| 1305 | OpCodePreprocessor(CompilerMSL &compiler_) |
| 1306 | : compiler(compiler_) |
| 1307 | { |
| 1308 | } |
| 1309 | |
| 1310 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; |
| 1311 | CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); |
| 1312 | void check_resource_write(uint32_t var_id); |
| 1313 | |
| 1314 | CompilerMSL &compiler; |
| 1315 | std::unordered_map<uint32_t, uint32_t> result_types; |
| 1316 | std::unordered_map<uint32_t, uint32_t> image_pointers_emulated; // Emulate texture2D atomic operations |
| 1317 | bool suppress_missing_prototypes = false; |
| 1318 | bool uses_atomics = false; |
| 1319 | bool uses_image_write = false; |
| 1320 | bool uses_buffer_write = false; |
| 1321 | bool uses_discard = false; |
| 1322 | bool needs_subgroup_invocation_id = false; |
| 1323 | bool needs_subgroup_size = false; |
| 1324 | bool needs_sample_id = false; |
| 1325 | bool needs_helper_invocation = false; |
| 1326 | }; |
| 1327 | |
| 1328 | // OpcodeHandler that scans for uses of sampled images |
| 1329 | struct SampledImageScanner : OpcodeHandler |
| 1330 | { |
| 1331 | SampledImageScanner(CompilerMSL &compiler_) |
| 1332 | : compiler(compiler_) |
| 1333 | { |
| 1334 | } |
| 1335 | |
| 1336 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; |
| 1337 | |
| 1338 | CompilerMSL &compiler; |
| 1339 | }; |
| 1340 | |
| 1341 | // Sorts the members of a SPIRType and associated Meta info based on a settable sorting |
| 1342 | // aspect, which defines which aspect of the struct members will be used to sort them. |
| 1343 | // Regardless of the sorting aspect, built-in members always appear at the end of the struct. |
| 1344 | struct MemberSorter |
| 1345 | { |
| 1346 | enum SortAspect |
| 1347 | { |
| 1348 | LocationThenBuiltInType, |
| 1349 | Offset |
| 1350 | }; |
| 1351 | |
| 1352 | void sort(); |
| 1353 | bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); |
| 1354 | MemberSorter(SPIRType &t, Meta &m, SortAspect sa); |
| 1355 | |
| 1356 | SPIRType &type; |
| 1357 | Meta &meta; |
| 1358 | SortAspect sort_aspect; |
| 1359 | }; |
| 1360 | }; |
| 1361 | } // namespace SPIRV_CROSS_NAMESPACE |
| 1362 | |
| 1363 | #endif |
| 1364 | |