1/*
2 * Copyright 2016-2021 The Brenwill Workshop Ltd.
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#ifndef SPIRV_CROSS_MSL_HPP
25#define SPIRV_CROSS_MSL_HPP
26
27#include "spirv_glsl.hpp"
28#include <map>
29#include <set>
30#include <stddef.h>
31#include <unordered_map>
32#include <unordered_set>
33
34namespace SPIRV_CROSS_NAMESPACE
35{
36
37// Indicates the format of a shader interface variable. Currently limited to specifying
38// if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or
39// some other format.
40enum MSLShaderVariableFormat
41{
42 MSL_SHADER_VARIABLE_FORMAT_OTHER = 0,
43 MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1,
44 MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2,
45 MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3,
46 MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4,
47
48 // Deprecated aliases.
49 MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
50 MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
51 MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
52 MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
53 MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
54 MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
55 MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16,
56 MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32,
57
58 MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff
59};
60
61// Indicates the rate at which a variable changes value, one of: per-vertex,
62// per-primitive, or per-patch.
63enum MSLShaderVariableRate
64{
65 MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0,
66 MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1,
67 MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2,
68
69 MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff,
70};
71
72// Defines MSL characteristics of a shader interface variable at a particular location.
73// After compilation, it is possible to query whether or not this location was used.
74// If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader,
75// or behavior is undefined.
76struct MSLShaderInterfaceVariable
77{
78 uint32_t location = 0;
79 uint32_t component = 0;
80 MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
81 spv::BuiltIn builtin = spv::BuiltInMax;
82 uint32_t vecsize = 0;
83 MSLShaderVariableRate rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX;
84};
85
86// Matches the binding index of a MSL resource for a binding within a descriptor set.
87// Taken together, the stage, desc_set and binding combine to form a reference to a resource
88// descriptor used in a particular shading stage. The count field indicates the number of
89// resources consumed by this binding, if the binding represents an array of resources.
90// If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value
91// will be used to declare the array size in MSL, which does not support run-time-sized arrays.
92// If pad_argument_buffer_resources is enabled, the base_type and count values are used to
93// specify the base type and array size of the resource in the argument buffer, if that resource
94// is not defined and used by the shader. With pad_argument_buffer_resources enabled, this
95// information will be used to pad the argument buffer structure, in order to align that
96// structure consistently for all uses, across all shaders, of the descriptor set represented
97// by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not
98// need to be populated, and if the resource is also not a run-time sized array, the count
99// field does not need to be populated.
100// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set,
101// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we
102// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure.
103// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will
104// become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
105struct MSLResourceBinding
106{
107 spv::ExecutionModel stage = spv::ExecutionModelMax;
108 SPIRType::BaseType basetype = SPIRType::Unknown;
109 uint32_t desc_set = 0;
110 uint32_t binding = 0;
111 uint32_t count = 0;
112 uint32_t msl_buffer = 0;
113 uint32_t msl_texture = 0;
114 uint32_t msl_sampler = 0;
115};
116
117enum MSLSamplerCoord
118{
119 MSL_SAMPLER_COORD_NORMALIZED = 0,
120 MSL_SAMPLER_COORD_PIXEL = 1,
121 MSL_SAMPLER_INT_MAX = 0x7fffffff
122};
123
124enum MSLSamplerFilter
125{
126 MSL_SAMPLER_FILTER_NEAREST = 0,
127 MSL_SAMPLER_FILTER_LINEAR = 1,
128 MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff
129};
130
131enum MSLSamplerMipFilter
132{
133 MSL_SAMPLER_MIP_FILTER_NONE = 0,
134 MSL_SAMPLER_MIP_FILTER_NEAREST = 1,
135 MSL_SAMPLER_MIP_FILTER_LINEAR = 2,
136 MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff
137};
138
139enum MSLSamplerAddress
140{
141 MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0,
142 MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1,
143 MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2,
144 MSL_SAMPLER_ADDRESS_REPEAT = 3,
145 MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4,
146 MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff
147};
148
149enum MSLSamplerCompareFunc
150{
151 MSL_SAMPLER_COMPARE_FUNC_NEVER = 0,
152 MSL_SAMPLER_COMPARE_FUNC_LESS = 1,
153 MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2,
154 MSL_SAMPLER_COMPARE_FUNC_GREATER = 3,
155 MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4,
156 MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5,
157 MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6,
158 MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7,
159 MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff
160};
161
162enum MSLSamplerBorderColor
163{
164 MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0,
165 MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1,
166 MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2,
167 MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff
168};
169
170enum MSLFormatResolution
171{
172 MSL_FORMAT_RESOLUTION_444 = 0,
173 MSL_FORMAT_RESOLUTION_422,
174 MSL_FORMAT_RESOLUTION_420,
175 MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
176};
177
178enum MSLChromaLocation
179{
180 MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
181 MSL_CHROMA_LOCATION_MIDPOINT,
182 MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
183};
184
185enum MSLComponentSwizzle
186{
187 MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
188 MSL_COMPONENT_SWIZZLE_ZERO,
189 MSL_COMPONENT_SWIZZLE_ONE,
190 MSL_COMPONENT_SWIZZLE_R,
191 MSL_COMPONENT_SWIZZLE_G,
192 MSL_COMPONENT_SWIZZLE_B,
193 MSL_COMPONENT_SWIZZLE_A,
194 MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
195};
196
197enum MSLSamplerYCbCrModelConversion
198{
199 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
200 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
201 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
202 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
203 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
204 MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
205};
206
207enum MSLSamplerYCbCrRange
208{
209 MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
210 MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
211 MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
212};
213
214struct MSLConstexprSampler
215{
216 MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED;
217 MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST;
218 MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST;
219 MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE;
220 MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
221 MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
222 MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
223 MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER;
224 MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK;
225 float lod_clamp_min = 0.0f;
226 float lod_clamp_max = 1000.0f;
227 int max_anisotropy = 1;
228
229 // Sampler Y'CbCr conversion parameters
230 uint32_t planes = 0;
231 MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444;
232 MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST;
233 MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
234 MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
235 MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY
236 MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
237 MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL;
238 uint32_t bpc = 8;
239
240 bool compare_enable = false;
241 bool lod_clamp_enable = false;
242 bool anisotropy_enable = false;
243 bool ycbcr_conversion_enable = false;
244
245 MSLConstexprSampler()
246 {
247 for (uint32_t i = 0; i < 4; i++)
248 swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY;
249 }
250 bool swizzle_is_identity() const
251 {
252 return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY &&
253 swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY);
254 }
255 bool swizzle_has_one_or_zero() const
256 {
257 return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE ||
258 swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE ||
259 swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE ||
260 swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE);
261 }
262};
263
264// Special constant used in a MSLResourceBinding desc_set
265// element to indicate the bindings for the push constants.
266// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly.
267static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet;
268
269// Special constant used in a MSLResourceBinding binding
270// element to indicate the bindings for the push constants.
271// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly.
272static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding;
273
274// Special constant used in a MSLResourceBinding binding
275// element to indicate the buffer binding for swizzle buffers.
276static const uint32_t kSwizzleBufferBinding = ~(1u);
277
278// Special constant used in a MSLResourceBinding binding
279// element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
280static const uint32_t kBufferSizeBufferBinding = ~(2u);
281
282// Special constant used in a MSLResourceBinding binding
283// element to indicate the buffer binding used for the argument buffer itself.
284// This buffer binding should be kept as small as possible as all automatic bindings for buffers
285// will start at max(kArgumentBufferBinding) + 1.
286static const uint32_t kArgumentBufferBinding = ~(3u);
287
288static const uint32_t kMaxArgumentBuffers = 8;
289
290// Decompiles SPIR-V to Metal Shading Language
291class CompilerMSL : public CompilerGLSL
292{
293public:
294 // Options for compiling to Metal Shading Language
295 struct Options
296 {
297 typedef enum
298 {
299 iOS = 0,
300 macOS = 1
301 } Platform;
302
303 Platform platform = macOS;
304 uint32_t msl_version = make_msl_version(major: 1, minor: 2);
305 uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers
306 uint32_t r32ui_linear_texture_alignment = 4;
307 uint32_t r32ui_alignment_constant_id = 65535;
308 uint32_t swizzle_buffer_index = 30;
309 uint32_t indirect_params_buffer_index = 29;
310 uint32_t shader_output_buffer_index = 28;
311 uint32_t shader_patch_output_buffer_index = 27;
312 uint32_t shader_tess_factor_buffer_index = 26;
313 uint32_t buffer_size_buffer_index = 25;
314 uint32_t view_mask_buffer_index = 24;
315 uint32_t dynamic_offsets_buffer_index = 23;
316 uint32_t shader_input_buffer_index = 22;
317 uint32_t shader_index_buffer_index = 21;
318 uint32_t shader_patch_input_buffer_index = 20;
319 uint32_t shader_input_wg_index = 0;
320 uint32_t device_index = 0;
321 uint32_t enable_frag_output_mask = 0xffffffff;
322 // Metal doesn't allow setting a fixed sample mask directly in the pipeline.
323 // We can evade this restriction by ANDing the internal sample_mask output
324 // of the shader with the additional fixed sample mask.
325 uint32_t additional_fixed_sample_mask = 0xffffffff;
326 bool enable_point_size_builtin = true;
327 bool enable_frag_depth_builtin = true;
328 bool enable_frag_stencil_ref_builtin = true;
329 bool disable_rasterization = false;
330 bool capture_output_to_buffer = false;
331 bool swizzle_texture_samples = false;
332 bool tess_domain_origin_lower_left = false;
333 bool multiview = false;
334 bool multiview_layered_rendering = true;
335 bool view_index_from_device_index = false;
336 bool dispatch_base = false;
337 bool texture_1D_as_2D = false;
338
339 // Enable use of Metal argument buffers.
340 // MSL 2.0 must also be enabled.
341 bool argument_buffers = false;
342
343 // Defines Metal argument buffer tier levels.
344 // Uses same values as Metal MTLArgumentBuffersTier enumeration.
345 enum class ArgumentBuffersTier
346 {
347 Tier1 = 0,
348 Tier2 = 1,
349 };
350
351 // When using Metal argument buffers, indicates the Metal argument buffer tier level supported by the Metal platform.
352 // Ignored when Options::argument_buffers is disabled.
353 // - Tier1 supports writable images on macOS, but not on iOS.
354 // - Tier2 supports writable images on macOS and iOS, and higher resource count limits.
355 // Tier capabilities based on recommendations from Apple engineering.
356 ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
357
358 // Enables specifick argument buffer format with extra information to track SSBO-length
359 bool runtime_array_rich_descriptor = false;
360
361 // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID.
362 bool enable_base_index_zero = false;
363
364 // Fragment output in MSL must have at least as many components as the render pass.
365 // Add support to explicit pad out components.
366 bool pad_fragment_output_components = false;
367
368 // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes.
369 bool ios_support_base_vertex_instance = false;
370
371 // Use Metal's native frame-buffer fetch API for subpass inputs.
372 bool use_framebuffer_fetch_subpasses = false;
373
374 // Enables use of "fma" intrinsic for invariant float math
375 bool invariant_float_math = false;
376
377 // Emulate texturecube_array with texture2d_array for iOS where this type is not available
378 bool emulate_cube_array = false;
379
380 // Allow user to enable decoration binding
381 bool enable_decoration_binding = false;
382
383 // Requires MSL 2.1, use the native support for texel buffers.
384 bool texture_buffer_native = false;
385
386 // Forces all resources which are part of an argument buffer to be considered active.
387 // This ensures ABI compatibility between shaders where some resources might be unused,
388 // and would otherwise declare a different IAB.
389 bool force_active_argument_buffer_resources = false;
390
391 // Aligns each resource in an argument buffer to its assigned index value, id(N),
392 // by adding synthetic padding members in the argument buffer struct for any resources
393 // in the argument buffer that are not defined and used by the shader. This allows
394 // the shader to index into the correct argument in a descriptor set argument buffer
395 // that is shared across shaders, where not all resources in the argument buffer are
396 // defined in each shader. For this to work, an MSLResourceBinding must be provided for
397 // all descriptors in any descriptor set held in an argument buffer in the shader, and
398 // that MSLResourceBinding must have the basetype and count members populated correctly.
399 // The implementation here assumes any inline blocks in the argument buffer is provided
400 // in a Metal buffer, and doesn't take into consideration inline blocks that are
401 // optionally embedded directly into the argument buffer via add_inline_uniform_block().
402 bool pad_argument_buffer_resources = false;
403
404 // Forces the use of plain arrays, which works around certain driver bugs on certain versions
405 // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210.
406 // May reduce performance in scenarios where arrays are copied around as value-types.
407 bool force_native_arrays = false;
408
409 // If a shader writes clip distance, also emit user varyings which
410 // can be read in subsequent stages.
411 bool enable_clip_distance_user_varying = true;
412
413 // In a tessellation control shader, assume that more than one patch can be processed in a
414 // single workgroup. This requires changes to the way the InvocationId and PrimitiveId
415 // builtins are processed, but should result in more efficient usage of the GPU.
416 bool multi_patch_workgroup = false;
417
418 // Use storage buffers instead of vertex-style attributes for tessellation evaluation
419 // input. This may require conversion of inputs in the generated post-tessellation
420 // vertex shader, but allows the use of nested arrays.
421 bool raw_buffer_tese_input = false;
422
423 // If set, a vertex shader will be compiled as part of a tessellation pipeline.
424 // It will be translated as a compute kernel, so it can use the global invocation ID
425 // to index the output buffer.
426 bool vertex_for_tessellation = false;
427
428 // Assume that SubpassData images have multiple layers. Layered input attachments
429 // are addressed relative to the Layer output from the vertex pipeline. This option
430 // has no effect with multiview, since all input attachments are assumed to be layered
431 // and will be addressed using the current ViewIndex.
432 bool arrayed_subpass_input = false;
433
434 // Whether to use SIMD-group or quadgroup functions to implement group non-uniform
435 // operations. Some GPUs on iOS do not support the SIMD-group functions, only the
436 // quadgroup functions.
437 bool ios_use_simdgroup_functions = false;
438
439 // If set, the subgroup size will be assumed to be one, and subgroup-related
440 // builtins and operations will be emitted accordingly. This mode is intended to
441 // be used by MoltenVK on hardware/software configurations which do not provide
442 // sufficient support for subgroups.
443 bool emulate_subgroups = false;
444
445 // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control,
446 // allows the SIMD-group size (aka thread execution width) to vary depending on
447 // register usage and requirements. In certain circumstances--for example, a pipeline
448 // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT--
449 // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of
450 // mapping it to the Metal builtin [[thread_execution_width]]. If the thread
451 // execution width is reduced, the extra invocations will appear to be inactive.
452 // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped
453 // to the Metal [[thread_execution_width]] builtin.
454 uint32_t fixed_subgroup_size = 0;
455
456 enum class IndexType
457 {
458 None = 0,
459 UInt16 = 1,
460 UInt32 = 2
461 };
462
463 // The type of index in the index buffer, if present. For a compute shader, Metal
464 // requires specifying the indexing at pipeline creation, rather than at draw time
465 // as with graphics pipelines. This means we must create three different pipelines,
466 // for no indexing, 16-bit indices, and 32-bit indices. Each requires different
467 // handling for the gl_VertexIndex builtin. We may as well, then, create three
468 // different shaders for these three scenarios.
469 IndexType vertex_index_type = IndexType::None;
470
471 // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present.
472 // This will force the shader to run at sample rate, assuming Metal does not optimize
473 // the extra threads away.
474 bool force_sample_rate_shading = false;
475
476 // If set, gl_HelperInvocation will be set manually whenever a fragment is discarded.
477 // Some Metal devices have a bug where simd_is_helper_thread() does not return true
478 // after a fragment has been discarded. This is a workaround that is only expected to be needed
479 // until the bug is fixed in Metal; it is provided as an option to allow disabling it when that occurs.
480 bool manual_helper_invocation_updates = true;
481
482 // If set, extra checks will be emitted in fragment shaders to prevent writes
483 // from discarded fragments. Some Metal devices have a bug where writes to storage resources
484 // from discarded fragment threads continue to occur, despite the fragment being
485 // discarded. This is a workaround that is only expected to be needed until the
486 // bug is fixed in Metal; it is provided as an option so it can be enabled
487 // only when the bug is present.
488 bool check_discarded_frag_stores = false;
489
490 // If set, Lod operands to OpImageSample*DrefExplicitLod for 1D and 2D array images
491 // will be implemented using a gradient instead of passing the level operand directly.
492 // Some Metal devices have a bug where the level() argument to depth2d_array<T>::sample_compare()
493 // in a fragment shader is biased by some unknown amount, possibly dependent on the
494 // partial derivatives of the texture coordinates. This is a workaround that is only
495 // expected to be needed until the bug is fixed in Metal; it is provided as an option
496 // so it can be enabled only when the bug is present.
497 bool sample_dref_lod_array_as_grad = false;
498
499 // MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures.
500 // This inserts fences before each read of a read_write texture to ensure coherency.
501 // If you're sure you never rely on this, you can set this to false for a possible performance improvement.
502 // Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this.
503 bool readwrite_texture_fences = true;
504
505 // Metal 3.1 introduced a Metal regression bug which causes infinite recursion during
506 // Metal's analysis of an entry point input structure that is itself recursive. Enabling
507 // this option will replace the recursive input declaration with a alternate variable of
508 // type void*, and then cast to the correct type at the top of the entry point function.
509 // The bug has been reported to Apple, and will hopefully be fixed in future releases.
510 bool replace_recursive_inputs = false;
511
512 // If set, manual fixups of gradient vectors for cube texture lookups will be performed.
513 // All released Apple Silicon GPUs to date behave incorrectly when sampling a cube texture
514 // with explicit gradients. They will ignore one of the three partial derivatives based
515 // on the selected major axis, and expect the remaining derivatives to be partially
516 // transformed.
517 bool agx_manual_cube_grad_fixup = false;
518
519 // Metal will discard fragments with side effects under certain circumstances prematurely.
520 // Example: CTS test dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth
521 // Test will render a full screen quad with varying depth [0,1] for each fragment.
522 // Each fragment will do an operation with side effects, modify the depth value and
523 // discard the fragment. The test expects the fragment to be run due to:
524 // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/vkspec.html#fragops-shader-depthreplacement
525 // which states that the fragment shader must be run due to replacing the depth in shader.
526 // However, Metal may prematurely discards fragments without executing them
527 // (I believe this to be due to a greedy optimization on their end) making the test fail.
528 // This option enforces fragment execution for such cases where the fragment has operations
529 // with side effects. Provided as an option hoping Metal will fix this issue in the future.
530 bool force_fragment_with_side_effects_execution = false;
531
532 // If set, adds a depth pass through statement to circumvent the following issue:
533 // When the same depth/stencil is used as input and depth/stencil attachment, we need to
534 // force Metal to perform the depth/stencil write after fragment execution. Otherwise,
535 // Metal will write to the depth attachment before fragment execution. This happens
536 // if the fragment does not modify the depth value.
537 bool input_attachment_is_ds_attachment = false;
538
539 bool is_ios() const
540 {
541 return platform == iOS;
542 }
543
544 bool is_macos() const
545 {
546 return platform == macOS;
547 }
548
549 bool use_quadgroup_operation() const
550 {
551 return is_ios() && !ios_use_simdgroup_functions;
552 }
553
554 void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
555 {
556 msl_version = make_msl_version(major, minor, patch);
557 }
558
559 bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const
560 {
561 return msl_version >= make_msl_version(major, minor, patch);
562 }
563
564 static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
565 {
566 return (major * 10000) + (minor * 100) + patch;
567 }
568 };
569
570 const Options &get_msl_options() const
571 {
572 return msl_options;
573 }
574
575 void set_msl_options(const Options &opts)
576 {
577 msl_options = opts;
578 }
579
580 // Provide feedback to calling API to allow runtime to disable pipeline
581 // rasterization if vertex shader requires rasterization to be disabled.
582 bool get_is_rasterization_disabled() const
583 {
584 return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex ||
585 get_entry_point().model == spv::ExecutionModelTessellationControl ||
586 get_entry_point().model == spv::ExecutionModelTessellationEvaluation);
587 }
588
589 // Provide feedback to calling API to allow it to pass an auxiliary
590 // swizzle buffer if the shader needs it.
591 bool needs_swizzle_buffer() const
592 {
593 return used_swizzle_buffer;
594 }
595
596 // Provide feedback to calling API to allow it to pass a buffer
597 // containing STORAGE_BUFFER buffer sizes to support OpArrayLength.
598 bool needs_buffer_size_buffer() const
599 {
600 return !buffers_requiring_array_length.empty();
601 }
602
603 bool buffer_requires_array_length(VariableID id) const
604 {
605 return buffers_requiring_array_length.count(x: id) != 0;
606 }
607
608 // Provide feedback to calling API to allow it to pass a buffer
609 // containing the view mask for the current multiview subpass.
610 bool needs_view_mask_buffer() const
611 {
612 return msl_options.multiview && !msl_options.view_index_from_device_index;
613 }
614
615 // Provide feedback to calling API to allow it to pass a buffer
616 // containing the dispatch base workgroup ID.
617 bool needs_dispatch_base_buffer() const
618 {
619 return msl_options.dispatch_base && !msl_options.supports_msl_version(major: 1, minor: 2);
620 }
621
622 // Provide feedback to calling API to allow it to pass an output
623 // buffer if the shader needs it.
624 bool needs_output_buffer() const
625 {
626 return capture_output_to_buffer && stage_out_var_id != ID(0);
627 }
628
629 // Provide feedback to calling API to allow it to pass a patch output
630 // buffer if the shader needs it.
631 bool needs_patch_output_buffer() const
632 {
633 return capture_output_to_buffer && patch_stage_out_var_id != ID(0);
634 }
635
636 // Provide feedback to calling API to allow it to pass an input threadgroup
637 // buffer if the shader needs it.
638 bool needs_input_threadgroup_mem() const
639 {
640 return capture_output_to_buffer && stage_in_var_id != ID(0);
641 }
642
643 explicit CompilerMSL(std::vector<uint32_t> spirv);
644 CompilerMSL(const uint32_t *ir, size_t word_count);
645 explicit CompilerMSL(const ParsedIR &ir);
646 explicit CompilerMSL(ParsedIR &&ir);
647
648 // input is a shader interface variable description used to fix up shader input variables.
649 // If shader inputs are provided, is_msl_shader_input_used() will return true after
650 // calling ::compile() if the location were used by the MSL code.
651 void add_msl_shader_input(const MSLShaderInterfaceVariable &input);
652
653 // output is a shader interface variable description used to fix up shader output variables.
654 // If shader outputs are provided, is_msl_shader_output_used() will return true after
655 // calling ::compile() if the location were used by the MSL code.
656 void add_msl_shader_output(const MSLShaderInterfaceVariable &output);
657
658 // resource is a resource binding to indicate the MSL buffer,
659 // texture or sampler index to use for a particular SPIR-V description set
660 // and binding. If resource bindings are provided,
661 // is_msl_resource_binding_used() will return true after calling ::compile() if
662 // the set/binding combination was used by the MSL code.
663 void add_msl_resource_binding(const MSLResourceBinding &resource);
664
665 // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
666 // in this shader. index is the index within the dynamic offset buffer to use. This
667 // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
668 // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers
669 // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with
670 // an offset taken from the dynamic offset buffer.
671 void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
672
673 // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
674 // in this shader. This function marks that resource as an inline uniform block
675 // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers
676 // are enabled. If so, the buffer block will be directly embedded into the argument
677 // buffer, instead of being referenced indirectly via pointer.
678 void add_inline_uniform_block(uint32_t desc_set, uint32_t binding);
679
680 // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
681 // This corresponds to VK_KHR_push_descriptor in Vulkan.
682 void add_discrete_descriptor_set(uint32_t desc_set);
683
684 // If an argument buffer is large enough, it may need to be in the device storage space rather than
685 // constant. Opt-in to this behavior here on a per set basis.
686 void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage);
687
688 // Query after compilation is done. This allows you to check if an input location was used by the shader.
689 bool is_msl_shader_input_used(uint32_t location);
690
691 // Query after compilation is done. This allows you to check if an output location were used by the shader.
692 bool is_msl_shader_output_used(uint32_t location);
693
694 // If not using add_msl_shader_input, it's possible
695 // that certain builtin attributes need to be automatically assigned locations.
696 // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc.
697 // This returns k_unknown_location if the location was explicitly assigned with
698 // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]].
699 uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const;
700
701 // If not using add_msl_shader_output, it's possible
702 // that certain builtin attributes need to be automatically assigned locations.
703 // This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc.
704 // This returns k_unknown_location if the location were explicitly assigned with
705 // add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]].
706 uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const;
707
708 // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here.
709 // Constexpr samplers are always assumed to be emitted.
710 // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped
711 // by remap_constexpr_sampler(_by_binding).
712 bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const;
713
714 // This must only be called after a successful call to CompilerMSL::compile().
715 // For a variable resource ID obtained through reflection API, report the automatically assigned resource index.
716 // If the descriptor set was part of an argument buffer, report the [[id(N)]],
717 // or [[buffer/texture/sampler]] binding for other resources.
718 // If the resource was a combined image sampler, report the image binding here,
719 // use the _secondary version of this call to query the sampler half of the resource.
720 // If no binding exists, uint32_t(-1) is returned.
721 uint32_t get_automatic_msl_resource_binding(uint32_t id) const;
722
723 // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the
724 // sampler's binding is returned instead. For any other resource type, -1 is returned.
725 // Secondary bindings are also used for the auxillary image atomic buffer.
726 uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const;
727
728 // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images,
729 // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned.
730 uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const;
731
732 // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images,
733 // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned.
734 uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const;
735
736 // Compiles the SPIR-V code into Metal Shading Language.
737 std::string compile() override;
738
739 // Remap a sampler with ID to a constexpr sampler.
740 // Older iOS targets must use constexpr samplers in certain cases (PCF),
741 // so a static sampler must be used.
742 // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler.
743 // This can be used on both combined image/samplers (sampler2D) or standalone samplers.
744 // The remapped sampler must not be an array of samplers.
745 // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways.
746 void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler);
747
748 // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID.
749 // Remaps based on ID take priority over set/binding remaps.
750 void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler);
751
752 // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect
753 // to use for a particular location. The default is 4 if number of components is not overridden.
754 void set_fragment_output_components(uint32_t location, uint32_t components);
755
756 void set_combined_sampler_suffix(const char *suffix);
757 const char *get_combined_sampler_suffix() const;
758
759protected:
760 // An enum of SPIR-V functions that are implemented in additional
761 // source code that is added to the shader if necessary.
762 enum SPVFuncImpl : uint8_t
763 {
764 SPVFuncImplNone,
765 SPVFuncImplMod,
766 SPVFuncImplRadians,
767 SPVFuncImplDegrees,
768 SPVFuncImplFindILsb,
769 SPVFuncImplFindSMsb,
770 SPVFuncImplFindUMsb,
771 SPVFuncImplSSign,
772 SPVFuncImplArrayCopy,
773 SPVFuncImplArrayCopyMultidim,
774 SPVFuncImplTexelBufferCoords,
775 SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
776 SPVFuncImplGradientCube,
777 SPVFuncImplFMul,
778 SPVFuncImplFAdd,
779 SPVFuncImplFSub,
780 SPVFuncImplQuantizeToF16,
781 SPVFuncImplCubemapTo2DArrayFace,
782 SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
783 SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices
784 SPVFuncImplInverse4x4,
785 SPVFuncImplInverse3x3,
786 SPVFuncImplInverse2x2,
787 // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
788 // emitted before them.
789 SPVFuncImplForwardArgs,
790 // Likewise, this must come before *Swizzle.
791 SPVFuncImplGetSwizzle,
792 SPVFuncImplTextureSwizzle,
793 SPVFuncImplGatherSwizzle,
794 SPVFuncImplGatherCompareSwizzle,
795 SPVFuncImplGatherConstOffsets,
796 SPVFuncImplGatherCompareConstOffsets,
797 SPVFuncImplSubgroupBroadcast,
798 SPVFuncImplSubgroupBroadcastFirst,
799 SPVFuncImplSubgroupBallot,
800 SPVFuncImplSubgroupBallotBitExtract,
801 SPVFuncImplSubgroupBallotFindLSB,
802 SPVFuncImplSubgroupBallotFindMSB,
803 SPVFuncImplSubgroupBallotBitCount,
804 SPVFuncImplSubgroupAllEqual,
805 SPVFuncImplSubgroupShuffle,
806 SPVFuncImplSubgroupShuffleXor,
807 SPVFuncImplSubgroupShuffleUp,
808 SPVFuncImplSubgroupShuffleDown,
809 SPVFuncImplQuadBroadcast,
810 SPVFuncImplQuadSwap,
811 SPVFuncImplReflectScalar,
812 SPVFuncImplRefractScalar,
813 SPVFuncImplFaceForwardScalar,
814 SPVFuncImplChromaReconstructNearest2Plane,
815 SPVFuncImplChromaReconstructNearest3Plane,
816 SPVFuncImplChromaReconstructLinear422CositedEven2Plane,
817 SPVFuncImplChromaReconstructLinear422CositedEven3Plane,
818 SPVFuncImplChromaReconstructLinear422Midpoint2Plane,
819 SPVFuncImplChromaReconstructLinear422Midpoint3Plane,
820 SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane,
821 SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane,
822 SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane,
823 SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane,
824 SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane,
825 SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane,
826 SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane,
827 SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane,
828 SPVFuncImplExpandITUFullRange,
829 SPVFuncImplExpandITUNarrowRange,
830 SPVFuncImplConvertYCbCrBT709,
831 SPVFuncImplConvertYCbCrBT601,
832 SPVFuncImplConvertYCbCrBT2020,
833 SPVFuncImplDynamicImageSampler,
834 SPVFuncImplRayQueryIntersectionParams,
835 SPVFuncImplVariableDescriptor,
836 SPVFuncImplVariableSizedDescriptor,
837 SPVFuncImplVariableDescriptorArray,
838 SPVFuncImplPaddedStd140,
839 SPVFuncImplReduceAdd,
840 SPVFuncImplImageFence,
841 SPVFuncImplTextureCast,
842 SPVFuncImplMulExtended,
843 SPVFuncImplSetMeshOutputsEXT,
844 };
845
846 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too
847 // Use Metal's native frame-buffer fetch API for subpass inputs.
848 void emit_texture_op(const Instruction &i, bool sparse) override;
849 void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
850 std::string to_ptr_expression(uint32_t id, bool register_expression_read = true);
851 void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
852 void emit_instruction(const Instruction &instr) override;
853 void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
854 uint32_t count) override;
855 void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
856 const uint32_t *args, uint32_t count) override;
857 void emit_header() override;
858 void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
859 void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
860 void emit_subgroup_op(const Instruction &i) override;
861 std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
862 SmallVector<uint32_t> &inherited_expressions) override;
863 void emit_fixup() override;
864 std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
865 const std::string &qualifier = "");
866 void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
867 const std::string &qualifier = "", uint32_t base_offset = 0) override;
868 void emit_struct_padding_target(const SPIRType &type) override;
869 std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member);
870 std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
871 void emit_block_hints(const SPIRBlock &block) override;
872 void emit_mesh_entry_point();
873 void emit_mesh_outputs();
874 void emit_mesh_tasks(SPIRBlock &block) override;
875
876 // Allow Metal to use the array<T> template to make arrays a value type
877 std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override;
878 std::string constant_op_expression(const SPIRConstantOp &cop) override;
879
880 bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
881
882 // GCC workaround of lambdas calling protected functions (for older GCC versions)
883 std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override;
884
885 std::string image_type_glsl(const SPIRType &type, uint32_t id, bool member) override;
886 std::string sampler_type(const SPIRType &type, uint32_t id, bool member);
887 std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
888 std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
889 std::string to_name(uint32_t id, bool allow_alias = true) const override;
890 std::string to_function_name(const TextureFunctionNameArguments &args) override;
891 std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override;
892 std::string to_initializer_expression(const SPIRVariable &var) override;
893 std::string to_zero_initialized_expression(uint32_t type_id) override;
894
895 std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
896 bool is_packed, bool row_major) override;
897
898 // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal.
899 bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;
900
901 std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
902 bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override;
903 bool skip_argument(uint32_t id) const override;
904 std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override;
905 std::string to_qualifiers_glsl(uint32_t id) override;
906 void replace_illegal_names() override;
907 void declare_constant_arrays();
908
909 void replace_illegal_entry_point_names();
910 void sync_entry_point_aliases_and_names();
911
912 static const std::unordered_set<std::string> &get_reserved_keyword_set();
913 static const std::unordered_set<std::string> &get_illegal_func_names();
914
915 // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
916 void declare_complex_constant_arrays();
917
918 bool is_patch_block(const SPIRType &type);
919 bool is_non_native_row_major_matrix(uint32_t id) override;
920 bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
921 std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
922 bool is_packed, bool relaxed) override;
923
924 bool is_tesc_shader() const;
925 bool is_tese_shader() const;
926 bool is_mesh_shader() const;
927
928 void preprocess_op_codes();
929 void localize_global_variables();
930 void extract_global_variables_from_functions();
931 void mark_packable_structs();
932 void mark_as_packable(SPIRType &type);
933 void mark_as_workgroup_struct(SPIRType &type);
934
935 std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
936 void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
937 std::unordered_set<uint32_t> &global_var_ids,
938 std::unordered_set<uint32_t> &processed_func_ids);
939 uint32_t add_interface_block(spv::StorageClass storage, bool patch = false);
940 uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage);
941 uint32_t add_meshlet_block(bool per_primitive);
942
943 struct InterfaceBlockMeta
944 {
945 struct LocationMeta
946 {
947 uint32_t base_type_id = 0;
948 uint32_t num_components = 0;
949 bool flat = false;
950 bool noperspective = false;
951 bool centroid = false;
952 bool sample = false;
953 };
954 std::unordered_map<uint32_t, LocationMeta> location_meta;
955 bool strip_array = false;
956 bool allow_local_declaration = false;
957 };
958
959 std::string to_tesc_invocation_id();
960 void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array);
961 void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
962 SPIRVariable &var, InterfaceBlockMeta &meta);
963 void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
964 SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta);
965 void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
966 SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta);
967 bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
968 SPIRVariable &var, const SPIRType &type,
969 InterfaceBlockMeta &meta);
970 void add_plain_member_variable_to_interface_block(spv::StorageClass storage,
971 const std::string &ib_var_ref, SPIRType &ib_type,
972 SPIRVariable &var, SPIRType &var_type,
973 uint32_t mbr_idx, InterfaceBlockMeta &meta,
974 const std::string &mbr_name_qual,
975 const std::string &var_chain_qual,
976 uint32_t &location, uint32_t &var_mbr_idx);
977 void add_composite_member_variable_to_interface_block(spv::StorageClass storage,
978 const std::string &ib_var_ref, SPIRType &ib_type,
979 SPIRVariable &var, SPIRType &var_type,
980 uint32_t mbr_idx, InterfaceBlockMeta &meta,
981 const std::string &mbr_name_qual,
982 const std::string &var_chain_qual,
983 uint32_t &location, uint32_t &var_mbr_idx,
984 const Bitset &interpolation_qual);
985 void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
986 void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var);
987
988 void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);
989
990 void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
991 spv::StorageClass storage, bool fallback = false);
992 uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin);
993 uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component,
994 uint32_t num_components, bool strip_array);
995
996 void emit_custom_templates();
997 void emit_custom_functions();
998 void emit_resources();
999 void emit_specialization_constants_and_structs();
1000 void emit_interface_block(uint32_t ib_var_id);
1001 bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
1002 bool is_var_runtime_size_array(const SPIRVariable &var) const;
1003 uint32_t get_resource_array_size(const SPIRType &type, uint32_t id) const;
1004
1005 void fix_up_shader_inputs_outputs();
1006
1007 std::string func_type_decl(SPIRType &type);
1008 std::string entry_point_args_classic(bool append_comma);
1009 std::string entry_point_args_argument_buffer(bool append_comma);
1010 std::string entry_point_arg_stage_in();
1011 void entry_point_args_builtin(std::string &args);
1012 void entry_point_args_discrete_descriptors(std::string &args);
1013 std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index);
1014 std::string ensure_valid_name(std::string name, std::string pfx);
1015 std::string to_sampler_expression(uint32_t id);
1016 std::string to_swizzle_expression(uint32_t id);
1017 std::string to_buffer_size_expression(uint32_t id);
1018 bool is_sample_rate() const;
1019 bool is_intersection_query() const;
1020 bool is_direct_input_builtin(spv::BuiltIn builtin);
1021 std::string builtin_qualifier(spv::BuiltIn builtin);
1022 std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0);
1023 std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma);
1024 std::string member_attribute_qualifier(const SPIRType &type, uint32_t index);
1025 std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index);
1026 std::string argument_decl(const SPIRFunction::Parameter &arg);
1027 const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const;
1028 std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp);
1029 uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0);
1030 uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const;
1031 uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
1032 uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
1033 uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin,
1034 uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
1035
1036 uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const;
1037
1038 uint32_t get_physical_type_stride(const SPIRType &type) const override;
1039
1040 // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output.
1041 // These values can change depending on various extended decorations which control packing rules.
1042 // We need to make these rules match up with SPIR-V declared rules.
1043 uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const;
1044 uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
1045 uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
1046 uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const;
1047
1048 uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
1049 uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
1050 uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
1051 uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
1052
1053 uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const;
1054 uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
1055 uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
1056 uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
1057
1058 const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const;
1059 SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const;
1060
1061 uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false,
1062 bool ignore_padding = false) const;
1063
1064 std::string to_component_argument(uint32_t id);
1065 void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs);
1066 void mark_scalar_layout_structs(const SPIRType &ib_type);
1067 void mark_struct_members_packed(const SPIRType &type);
1068 void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index);
1069 bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
1070 std::string get_argument_address_space(const SPIRVariable &argument);
1071 std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
1072 static bool decoration_flags_signal_volatile(const Bitset &flags);
1073 const char *to_restrict(uint32_t id, bool space);
1074 SPIRType &get_stage_in_struct_type();
1075 SPIRType &get_stage_out_struct_type();
1076 SPIRType &get_patch_stage_in_struct_type();
1077 SPIRType &get_patch_stage_out_struct_type();
1078 std::string get_tess_factor_struct_name();
1079 SPIRType &get_uint_type();
1080 uint32_t get_uint_type_id();
1081 void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode,
1082 uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
1083 bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
1084 const char *get_memory_order(uint32_t spv_mem_sem);
1085 void add_pragma_line(const std::string &line);
1086 void add_typedef_line(const std::string &line);
1087 void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
1088 bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
1089 spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
1090 void build_implicit_builtins();
1091 uint32_t build_constant_uint_array_pointer();
1092 void emit_entry_point_declarations() override;
1093 bool uses_explicit_early_fragment_test();
1094
1095 uint32_t builtin_frag_coord_id = 0;
1096 uint32_t builtin_sample_id_id = 0;
1097 uint32_t builtin_sample_mask_id = 0;
1098 uint32_t builtin_helper_invocation_id = 0;
1099 uint32_t builtin_vertex_idx_id = 0;
1100 uint32_t builtin_base_vertex_id = 0;
1101 uint32_t builtin_instance_idx_id = 0;
1102 uint32_t builtin_base_instance_id = 0;
1103 uint32_t builtin_view_idx_id = 0;
1104 uint32_t builtin_layer_id = 0;
1105 uint32_t builtin_invocation_id_id = 0;
1106 uint32_t builtin_primitive_id_id = 0;
1107 uint32_t builtin_subgroup_invocation_id_id = 0;
1108 uint32_t builtin_subgroup_size_id = 0;
1109 uint32_t builtin_dispatch_base_id = 0;
1110 uint32_t builtin_stage_input_size_id = 0;
1111 uint32_t builtin_local_invocation_index_id = 0;
1112 uint32_t builtin_workgroup_size_id = 0;
1113 uint32_t builtin_mesh_primitive_indices_id = 0;
1114 uint32_t builtin_mesh_sizes_id = 0;
1115 uint32_t builtin_task_grid_id = 0;
1116 uint32_t builtin_frag_depth_id = 0;
1117 uint32_t swizzle_buffer_id = 0;
1118 uint32_t buffer_size_buffer_id = 0;
1119 uint32_t view_mask_buffer_id = 0;
1120 uint32_t dynamic_offsets_buffer_id = 0;
1121 uint32_t uint_type_id = 0;
1122 uint32_t shared_uint_type_id = 0;
1123 uint32_t meshlet_type_id = 0;
1124 uint32_t argument_buffer_padding_buffer_type_id = 0;
1125 uint32_t argument_buffer_padding_image_type_id = 0;
1126 uint32_t argument_buffer_padding_sampler_type_id = 0;
1127
1128 bool does_shader_write_sample_mask = false;
1129 bool frag_shader_needs_discard_checks = false;
1130
1131 void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
1132 void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
1133 void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
1134
1135 void analyze_sampled_image_usage();
1136
1137 bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override;
1138 bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
1139 bool &is_packed) override;
1140 void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
1141 void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
1142
1143 bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
1144 bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
1145 bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);
1146
1147 void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin);
1148
1149 void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
1150
1151 std::string convert_to_f32(const std::string &expr, uint32_t components);
1152
1153 Options msl_options;
1154 std::set<SPVFuncImpl> spv_function_implementations;
1155 // Must be ordered to ensure declarations are in a specific order.
1156 std::map<LocationComponentPair, MSLShaderInterfaceVariable> inputs_by_location;
1157 std::unordered_map<uint32_t, MSLShaderInterfaceVariable> inputs_by_builtin;
1158 std::map<LocationComponentPair, MSLShaderInterfaceVariable> outputs_by_location;
1159 std::unordered_map<uint32_t, MSLShaderInterfaceVariable> outputs_by_builtin;
1160 std::unordered_set<uint32_t> location_inputs_in_use;
1161 std::unordered_set<uint32_t> location_inputs_in_use_fallback;
1162 std::unordered_set<uint32_t> location_outputs_in_use;
1163 std::unordered_set<uint32_t> location_outputs_in_use_fallback;
1164 std::unordered_map<uint32_t, uint32_t> fragment_output_components;
1165 std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
1166 std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
1167 std::set<std::string> pragma_lines;
1168 std::set<std::string> typedef_lines;
1169 SmallVector<uint32_t> vars_needing_early_declaration;
1170
1171 std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
1172 std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number;
1173
1174 uint32_t next_metal_resource_index_buffer = 0;
1175 uint32_t next_metal_resource_index_texture = 0;
1176 uint32_t next_metal_resource_index_sampler = 0;
1177 // Intentionally uninitialized, works around MSVC 2013 bug.
1178 uint32_t next_metal_resource_ids[kMaxArgumentBuffers];
1179
1180 VariableID stage_in_var_id = 0;
1181 VariableID stage_out_var_id = 0;
1182 VariableID patch_stage_in_var_id = 0;
1183 VariableID patch_stage_out_var_id = 0;
1184 VariableID stage_in_ptr_var_id = 0;
1185 VariableID stage_out_ptr_var_id = 0;
1186 VariableID tess_level_inner_var_id = 0;
1187 VariableID tess_level_outer_var_id = 0;
1188 VariableID mesh_out_per_vertex = 0;
1189 VariableID mesh_out_per_primitive = 0;
1190 VariableID stage_out_masked_builtin_type_id = 0;
1191
1192 // Handle HLSL-style 0-based vertex/instance index.
1193 enum class TriState
1194 {
1195 Neutral,
1196 No,
1197 Yes
1198 };
1199 TriState needs_base_vertex_arg = TriState::Neutral;
1200 TriState needs_base_instance_arg = TriState::Neutral;
1201
1202 bool has_sampled_images = false;
1203 bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index.
1204
1205 bool is_using_builtin_array = false; // Force the use of C style array declaration.
1206 bool using_builtin_array() const;
1207
1208 bool is_rasterization_disabled = false;
1209 bool capture_output_to_buffer = false;
1210 bool needs_swizzle_buffer_def = false;
1211 bool used_swizzle_buffer = false;
1212 bool added_builtin_tess_level = false;
1213 bool needs_subgroup_invocation_id = false;
1214 bool needs_subgroup_size = false;
1215 bool needs_sample_id = false;
1216 bool needs_helper_invocation = false;
1217 bool writes_to_depth = false;
1218 std::string qual_pos_var_name;
1219 std::string stage_in_var_name = "in";
1220 std::string stage_out_var_name = "out";
1221 std::string patch_stage_in_var_name = "patchIn";
1222 std::string patch_stage_out_var_name = "patchOut";
1223 std::string sampler_name_suffix = "Smplr";
1224 std::string swizzle_name_suffix = "Swzl";
1225 std::string buffer_size_name_suffix = "BufferSize";
1226 std::string plane_name_suffix = "Plane";
1227 std::string input_wg_var_name = "gl_in";
1228 std::string input_buffer_var_name = "spvIn";
1229 std::string output_buffer_var_name = "spvOut";
1230 std::string patch_input_buffer_var_name = "spvPatchIn";
1231 std::string patch_output_buffer_var_name = "spvPatchOut";
1232 std::string tess_factor_buffer_var_name = "spvTessLevel";
1233 std::string index_buffer_var_name = "spvIndices";
1234 spv::Op previous_instruction_opcode = spv::OpNop;
1235
1236 // Must be ordered since declaration is in a specific order.
1237 std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id;
1238 std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding;
1239 const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const;
1240
1241 std::unordered_set<uint32_t> buffers_requiring_array_length;
1242 SmallVector<std::pair<uint32_t, uint32_t>> buffer_aliases_argument;
1243 SmallVector<uint32_t> buffer_aliases_discrete;
1244 std::unordered_set<uint32_t> atomic_image_vars_emulated; // Emulate texture2D atomic operations
1245 std::unordered_set<uint32_t> pull_model_inputs;
1246 std::unordered_set<uint32_t> recursive_inputs;
1247
1248 SmallVector<SPIRVariable *> entry_point_bindings;
1249
1250 // Must be ordered since array is in a specific order.
1251 std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
1252
1253 SmallVector<uint32_t> disabled_frag_outputs;
1254
1255 std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks;
1256
1257 uint32_t argument_buffer_ids[kMaxArgumentBuffers];
1258 uint32_t argument_buffer_discrete_mask = 0;
1259 uint32_t argument_buffer_device_storage_mask = 0;
1260
1261 void emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var,
1262 const SPIRVariable &base_var);
1263
1264 void analyze_argument_buffers();
1265 bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
1266 const MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const;
1267 void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
1268 void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
1269 void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
1270 void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count);
1271
1272 uint32_t get_target_components_for_fragment_location(uint32_t location) const;
1273 uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components,
1274 SPIRType::BaseType basetype = SPIRType::Unknown);
1275 uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective);
1276
1277 bool suppress_missing_prototypes = false;
1278 bool suppress_incompatible_pointer_types_discard_qualifiers = false;
1279
1280 void add_spv_func_and_recompile(SPVFuncImpl spv_func);
1281
1282 void activate_argument_buffer_resources();
1283
1284 bool type_is_msl_framebuffer_fetch(const SPIRType &type) const;
1285 bool is_supported_argument_buffer_type(const SPIRType &type) const;
1286
1287 bool variable_storage_requires_stage_io(spv::StorageClass storage) const;
1288
1289 bool needs_manual_helper_invocation_updates() const
1290 {
1291 return msl_options.manual_helper_invocation_updates && msl_options.supports_msl_version(major: 2, minor: 3);
1292 }
1293 bool needs_frag_discard_checks() const
1294 {
1295 return get_execution_model() == spv::ExecutionModelFragment && msl_options.supports_msl_version(major: 2, minor: 3) &&
1296 msl_options.check_discarded_frag_stores && frag_shader_needs_discard_checks;
1297 }
1298
1299 bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; }
1300 std::string additional_fixed_sample_mask_str() const;
1301
1302 // OpcodeHandler that handles several MSL preprocessing operations.
1303 struct OpCodePreprocessor : OpcodeHandler
1304 {
1305 OpCodePreprocessor(CompilerMSL &compiler_)
1306 : compiler(compiler_)
1307 {
1308 }
1309
1310 bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
1311 CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
1312 void check_resource_write(uint32_t var_id);
1313
1314 CompilerMSL &compiler;
1315 std::unordered_map<uint32_t, uint32_t> result_types;
1316 std::unordered_map<uint32_t, uint32_t> image_pointers_emulated; // Emulate texture2D atomic operations
1317 bool suppress_missing_prototypes = false;
1318 bool uses_atomics = false;
1319 bool uses_image_write = false;
1320 bool uses_buffer_write = false;
1321 bool uses_discard = false;
1322 bool needs_subgroup_invocation_id = false;
1323 bool needs_subgroup_size = false;
1324 bool needs_sample_id = false;
1325 bool needs_helper_invocation = false;
1326 };
1327
1328 // OpcodeHandler that scans for uses of sampled images
1329 struct SampledImageScanner : OpcodeHandler
1330 {
1331 SampledImageScanner(CompilerMSL &compiler_)
1332 : compiler(compiler_)
1333 {
1334 }
1335
1336 bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override;
1337
1338 CompilerMSL &compiler;
1339 };
1340
1341 // Sorts the members of a SPIRType and associated Meta info based on a settable sorting
1342 // aspect, which defines which aspect of the struct members will be used to sort them.
1343 // Regardless of the sorting aspect, built-in members always appear at the end of the struct.
1344 struct MemberSorter
1345 {
1346 enum SortAspect
1347 {
1348 LocationThenBuiltInType,
1349 Offset
1350 };
1351
1352 void sort();
1353 bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2);
1354 MemberSorter(SPIRType &t, Meta &m, SortAspect sa);
1355
1356 SPIRType &type;
1357 Meta &meta;
1358 SortAspect sort_aspect;
1359 };
1360};
1361} // namespace SPIRV_CROSS_NAMESPACE
1362
1363#endif
1364

source code of qtshadertools/src/3rdparty/SPIRV-Cross/spirv_msl.hpp