1 | /* |
2 | * Copyright 2016-2021 The Brenwill Workshop Ltd. |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #ifndef SPIRV_CROSS_MSL_HPP |
25 | #define SPIRV_CROSS_MSL_HPP |
26 | |
27 | #include "spirv_glsl.hpp" |
28 | #include <map> |
29 | #include <set> |
30 | #include <stddef.h> |
31 | #include <unordered_map> |
32 | #include <unordered_set> |
33 | |
34 | namespace SPIRV_CROSS_NAMESPACE |
35 | { |
36 | |
37 | // Indicates the format of a shader interface variable. Currently limited to specifying |
38 | // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or |
39 | // some other format. |
40 | enum MSLShaderVariableFormat |
41 | { |
42 | MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, |
43 | MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, |
44 | MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, |
45 | MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, |
46 | MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, |
47 | |
48 | // Deprecated aliases. |
49 | MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, |
50 | MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, |
51 | MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, |
52 | MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER, |
53 | MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8, |
54 | MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16, |
55 | MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16, |
56 | MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32, |
57 | |
58 | MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff |
59 | }; |
60 | |
61 | // Indicates the rate at which a variable changes value, one of: per-vertex, |
62 | // per-primitive, or per-patch. |
63 | enum MSLShaderVariableRate |
64 | { |
65 | MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, |
66 | MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, |
67 | MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, |
68 | |
69 | MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, |
70 | }; |
71 | |
72 | // Defines MSL characteristics of a shader interface variable at a particular location. |
73 | // After compilation, it is possible to query whether or not this location was used. |
74 | // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, |
75 | // or behavior is undefined. |
76 | struct MSLShaderInterfaceVariable |
77 | { |
78 | uint32_t location = 0; |
79 | uint32_t component = 0; |
80 | MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER; |
81 | spv::BuiltIn builtin = spv::BuiltInMax; |
82 | uint32_t vecsize = 0; |
83 | MSLShaderVariableRate rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; |
84 | }; |
85 | |
86 | // Matches the binding index of a MSL resource for a binding within a descriptor set. |
87 | // Taken together, the stage, desc_set and binding combine to form a reference to a resource |
88 | // descriptor used in a particular shading stage. The count field indicates the number of |
89 | // resources consumed by this binding, if the binding represents an array of resources. |
90 | // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value |
91 | // will be used to declare the array size in MSL, which does not support run-time-sized arrays. |
92 | // If pad_argument_buffer_resources is enabled, the base_type and count values are used to |
93 | // specify the base type and array size of the resource in the argument buffer, if that resource |
94 | // is not defined and used by the shader. With pad_argument_buffer_resources enabled, this |
95 | // information will be used to pad the argument buffer structure, in order to align that |
96 | // structure consistently for all uses, across all shaders, of the descriptor set represented |
97 | // by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not |
98 | // need to be populated, and if the resource is also not a run-time sized array, the count |
99 | // field does not need to be populated. |
100 | // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, |
101 | // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we |
102 | // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. |
103 | // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will |
104 | // become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. |
105 | struct MSLResourceBinding |
106 | { |
107 | spv::ExecutionModel stage = spv::ExecutionModelMax; |
108 | SPIRType::BaseType basetype = SPIRType::Unknown; |
109 | uint32_t desc_set = 0; |
110 | uint32_t binding = 0; |
111 | uint32_t count = 0; |
112 | uint32_t msl_buffer = 0; |
113 | uint32_t msl_texture = 0; |
114 | uint32_t msl_sampler = 0; |
115 | }; |
116 | |
117 | enum MSLSamplerCoord |
118 | { |
119 | MSL_SAMPLER_COORD_NORMALIZED = 0, |
120 | MSL_SAMPLER_COORD_PIXEL = 1, |
121 | MSL_SAMPLER_INT_MAX = 0x7fffffff |
122 | }; |
123 | |
124 | enum MSLSamplerFilter |
125 | { |
126 | MSL_SAMPLER_FILTER_NEAREST = 0, |
127 | MSL_SAMPLER_FILTER_LINEAR = 1, |
128 | MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff |
129 | }; |
130 | |
131 | enum MSLSamplerMipFilter |
132 | { |
133 | MSL_SAMPLER_MIP_FILTER_NONE = 0, |
134 | MSL_SAMPLER_MIP_FILTER_NEAREST = 1, |
135 | MSL_SAMPLER_MIP_FILTER_LINEAR = 2, |
136 | MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff |
137 | }; |
138 | |
139 | enum MSLSamplerAddress |
140 | { |
141 | MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, |
142 | MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, |
143 | MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, |
144 | MSL_SAMPLER_ADDRESS_REPEAT = 3, |
145 | MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, |
146 | MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff |
147 | }; |
148 | |
149 | enum MSLSamplerCompareFunc |
150 | { |
151 | MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, |
152 | MSL_SAMPLER_COMPARE_FUNC_LESS = 1, |
153 | MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, |
154 | MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, |
155 | MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, |
156 | MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, |
157 | MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, |
158 | MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, |
159 | MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff |
160 | }; |
161 | |
162 | enum MSLSamplerBorderColor |
163 | { |
164 | MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, |
165 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, |
166 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, |
167 | MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff |
168 | }; |
169 | |
170 | enum MSLFormatResolution |
171 | { |
172 | MSL_FORMAT_RESOLUTION_444 = 0, |
173 | MSL_FORMAT_RESOLUTION_422, |
174 | MSL_FORMAT_RESOLUTION_420, |
175 | MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff |
176 | }; |
177 | |
178 | enum MSLChromaLocation |
179 | { |
180 | MSL_CHROMA_LOCATION_COSITED_EVEN = 0, |
181 | MSL_CHROMA_LOCATION_MIDPOINT, |
182 | MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff |
183 | }; |
184 | |
185 | enum MSLComponentSwizzle |
186 | { |
187 | MSL_COMPONENT_SWIZZLE_IDENTITY = 0, |
188 | MSL_COMPONENT_SWIZZLE_ZERO, |
189 | MSL_COMPONENT_SWIZZLE_ONE, |
190 | MSL_COMPONENT_SWIZZLE_R, |
191 | MSL_COMPONENT_SWIZZLE_G, |
192 | MSL_COMPONENT_SWIZZLE_B, |
193 | MSL_COMPONENT_SWIZZLE_A, |
194 | MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff |
195 | }; |
196 | |
197 | enum MSLSamplerYCbCrModelConversion |
198 | { |
199 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, |
200 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, |
201 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, |
202 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, |
203 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, |
204 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff |
205 | }; |
206 | |
207 | enum MSLSamplerYCbCrRange |
208 | { |
209 | MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, |
210 | MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, |
211 | MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff |
212 | }; |
213 | |
214 | struct MSLConstexprSampler |
215 | { |
216 | MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; |
217 | MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; |
218 | MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; |
219 | MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; |
220 | MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
221 | MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
222 | MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
223 | MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; |
224 | MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; |
225 | float lod_clamp_min = 0.0f; |
226 | float lod_clamp_max = 1000.0f; |
227 | int max_anisotropy = 1; |
228 | |
229 | // Sampler Y'CbCr conversion parameters |
230 | uint32_t planes = 0; |
231 | MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; |
232 | MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; |
233 | MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
234 | MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
235 | MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY |
236 | MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; |
237 | MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; |
238 | uint32_t bpc = 8; |
239 | |
240 | bool compare_enable = false; |
241 | bool lod_clamp_enable = false; |
242 | bool anisotropy_enable = false; |
243 | bool ycbcr_conversion_enable = false; |
244 | |
245 | MSLConstexprSampler() |
246 | { |
247 | for (uint32_t i = 0; i < 4; i++) |
248 | swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; |
249 | } |
250 | bool swizzle_is_identity() const |
251 | { |
252 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && |
253 | swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); |
254 | } |
255 | bool swizzle_has_one_or_zero() const |
256 | { |
257 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || |
258 | swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || |
259 | swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || |
260 | swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); |
261 | } |
262 | }; |
263 | |
264 | // Special constant used in a MSLResourceBinding desc_set |
265 | // element to indicate the bindings for the push constants. |
266 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
267 | static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; |
268 | |
269 | // Special constant used in a MSLResourceBinding binding |
270 | // element to indicate the bindings for the push constants. |
271 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
272 | static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; |
273 | |
274 | // Special constant used in a MSLResourceBinding binding |
275 | // element to indicate the buffer binding for swizzle buffers. |
276 | static const uint32_t kSwizzleBufferBinding = ~(1u); |
277 | |
278 | // Special constant used in a MSLResourceBinding binding |
279 | // element to indicate the buffer binding for buffer size buffers to support OpArrayLength. |
280 | static const uint32_t kBufferSizeBufferBinding = ~(2u); |
281 | |
282 | // Special constant used in a MSLResourceBinding binding |
283 | // element to indicate the buffer binding used for the argument buffer itself. |
284 | // This buffer binding should be kept as small as possible as all automatic bindings for buffers |
285 | // will start at max(kArgumentBufferBinding) + 1. |
286 | static const uint32_t kArgumentBufferBinding = ~(3u); |
287 | |
288 | static const uint32_t kMaxArgumentBuffers = 8; |
289 | |
290 | // Decompiles SPIR-V to Metal Shading Language |
291 | class CompilerMSL : public CompilerGLSL |
292 | { |
293 | public: |
294 | // Options for compiling to Metal Shading Language |
295 | struct Options |
296 | { |
297 | typedef enum |
298 | { |
299 | iOS = 0, |
300 | macOS = 1 |
301 | } Platform; |
302 | |
303 | Platform platform = macOS; |
304 | uint32_t msl_version = make_msl_version(major: 1, minor: 2); |
305 | uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers |
306 | uint32_t r32ui_linear_texture_alignment = 4; |
307 | uint32_t r32ui_alignment_constant_id = 65535; |
308 | uint32_t swizzle_buffer_index = 30; |
309 | uint32_t indirect_params_buffer_index = 29; |
310 | uint32_t shader_output_buffer_index = 28; |
311 | uint32_t shader_patch_output_buffer_index = 27; |
312 | uint32_t shader_tess_factor_buffer_index = 26; |
313 | uint32_t buffer_size_buffer_index = 25; |
314 | uint32_t view_mask_buffer_index = 24; |
315 | uint32_t dynamic_offsets_buffer_index = 23; |
316 | uint32_t shader_input_buffer_index = 22; |
317 | uint32_t shader_index_buffer_index = 21; |
318 | uint32_t shader_patch_input_buffer_index = 20; |
319 | uint32_t shader_input_wg_index = 0; |
320 | uint32_t device_index = 0; |
321 | uint32_t enable_frag_output_mask = 0xffffffff; |
322 | // Metal doesn't allow setting a fixed sample mask directly in the pipeline. |
323 | // We can evade this restriction by ANDing the internal sample_mask output |
324 | // of the shader with the additional fixed sample mask. |
325 | uint32_t additional_fixed_sample_mask = 0xffffffff; |
326 | bool enable_point_size_builtin = true; |
327 | bool enable_frag_depth_builtin = true; |
328 | bool enable_frag_stencil_ref_builtin = true; |
329 | bool disable_rasterization = false; |
330 | bool capture_output_to_buffer = false; |
331 | bool swizzle_texture_samples = false; |
332 | bool tess_domain_origin_lower_left = false; |
333 | bool multiview = false; |
334 | bool multiview_layered_rendering = true; |
335 | bool view_index_from_device_index = false; |
336 | bool dispatch_base = false; |
337 | bool texture_1D_as_2D = false; |
338 | |
339 | // Enable use of Metal argument buffers. |
340 | // MSL 2.0 must also be enabled. |
341 | bool argument_buffers = false; |
342 | |
343 | // Defines Metal argument buffer tier levels. |
344 | // Uses same values as Metal MTLArgumentBuffersTier enumeration. |
345 | enum class ArgumentBuffersTier |
346 | { |
347 | Tier1 = 0, |
348 | Tier2 = 1, |
349 | }; |
350 | |
351 | // When using Metal argument buffers, indicates the Metal argument buffer tier level supported by the Metal platform. |
352 | // Ignored when Options::argument_buffers is disabled. |
353 | // - Tier1 supports writable images on macOS, but not on iOS. |
354 | // - Tier2 supports writable images on macOS and iOS, and higher resource count limits. |
355 | // Tier capabilities based on recommendations from Apple engineering. |
356 | ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; |
357 | |
358 | // Enables specifick argument buffer format with extra information to track SSBO-length |
359 | bool runtime_array_rich_descriptor = false; |
360 | |
361 | // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. |
362 | bool enable_base_index_zero = false; |
363 | |
364 | // Fragment output in MSL must have at least as many components as the render pass. |
365 | // Add support to explicit pad out components. |
366 | bool pad_fragment_output_components = false; |
367 | |
368 | // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. |
369 | bool ios_support_base_vertex_instance = false; |
370 | |
371 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
372 | bool use_framebuffer_fetch_subpasses = false; |
373 | |
374 | // Enables use of "fma" intrinsic for invariant float math |
375 | bool invariant_float_math = false; |
376 | |
377 | // Emulate texturecube_array with texture2d_array for iOS where this type is not available |
378 | bool emulate_cube_array = false; |
379 | |
380 | // Allow user to enable decoration binding |
381 | bool enable_decoration_binding = false; |
382 | |
383 | // Requires MSL 2.1, use the native support for texel buffers. |
384 | bool texture_buffer_native = false; |
385 | |
386 | // Forces all resources which are part of an argument buffer to be considered active. |
387 | // This ensures ABI compatibility between shaders where some resources might be unused, |
388 | // and would otherwise declare a different IAB. |
389 | bool force_active_argument_buffer_resources = false; |
390 | |
391 | // Aligns each resource in an argument buffer to its assigned index value, id(N), |
392 | // by adding synthetic padding members in the argument buffer struct for any resources |
393 | // in the argument buffer that are not defined and used by the shader. This allows |
394 | // the shader to index into the correct argument in a descriptor set argument buffer |
395 | // that is shared across shaders, where not all resources in the argument buffer are |
396 | // defined in each shader. For this to work, an MSLResourceBinding must be provided for |
397 | // all descriptors in any descriptor set held in an argument buffer in the shader, and |
398 | // that MSLResourceBinding must have the basetype and count members populated correctly. |
399 | // The implementation here assumes any inline blocks in the argument buffer is provided |
400 | // in a Metal buffer, and doesn't take into consideration inline blocks that are |
401 | // optionally embedded directly into the argument buffer via add_inline_uniform_block(). |
402 | bool pad_argument_buffer_resources = false; |
403 | |
404 | // Forces the use of plain arrays, which works around certain driver bugs on certain versions |
405 | // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. |
406 | // May reduce performance in scenarios where arrays are copied around as value-types. |
407 | bool force_native_arrays = false; |
408 | |
409 | // If a shader writes clip distance, also emit user varyings which |
410 | // can be read in subsequent stages. |
411 | bool enable_clip_distance_user_varying = true; |
412 | |
413 | // In a tessellation control shader, assume that more than one patch can be processed in a |
414 | // single workgroup. This requires changes to the way the InvocationId and PrimitiveId |
415 | // builtins are processed, but should result in more efficient usage of the GPU. |
416 | bool multi_patch_workgroup = false; |
417 | |
418 | // Use storage buffers instead of vertex-style attributes for tessellation evaluation |
419 | // input. This may require conversion of inputs in the generated post-tessellation |
420 | // vertex shader, but allows the use of nested arrays. |
421 | bool raw_buffer_tese_input = false; |
422 | |
423 | // If set, a vertex shader will be compiled as part of a tessellation pipeline. |
424 | // It will be translated as a compute kernel, so it can use the global invocation ID |
425 | // to index the output buffer. |
426 | bool vertex_for_tessellation = false; |
427 | |
428 | // Assume that SubpassData images have multiple layers. Layered input attachments |
429 | // are addressed relative to the Layer output from the vertex pipeline. This option |
430 | // has no effect with multiview, since all input attachments are assumed to be layered |
431 | // and will be addressed using the current ViewIndex. |
432 | bool arrayed_subpass_input = false; |
433 | |
434 | // Whether to use SIMD-group or quadgroup functions to implement group non-uniform |
435 | // operations. Some GPUs on iOS do not support the SIMD-group functions, only the |
436 | // quadgroup functions. |
437 | bool ios_use_simdgroup_functions = false; |
438 | |
439 | // If set, the subgroup size will be assumed to be one, and subgroup-related |
440 | // builtins and operations will be emitted accordingly. This mode is intended to |
441 | // be used by MoltenVK on hardware/software configurations which do not provide |
442 | // sufficient support for subgroups. |
443 | bool emulate_subgroups = false; |
444 | |
445 | // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, |
446 | // allows the SIMD-group size (aka thread execution width) to vary depending on |
447 | // register usage and requirements. In certain circumstances--for example, a pipeline |
448 | // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- |
449 | // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of |
450 | // mapping it to the Metal builtin [[thread_execution_width]]. If the thread |
451 | // execution width is reduced, the extra invocations will appear to be inactive. |
452 | // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped |
453 | // to the Metal [[thread_execution_width]] builtin. |
454 | uint32_t fixed_subgroup_size = 0; |
455 | |
456 | enum class IndexType |
457 | { |
458 | None = 0, |
459 | UInt16 = 1, |
460 | UInt32 = 2 |
461 | }; |
462 | |
463 | // The type of index in the index buffer, if present. For a compute shader, Metal |
464 | // requires specifying the indexing at pipeline creation, rather than at draw time |
465 | // as with graphics pipelines. This means we must create three different pipelines, |
466 | // for no indexing, 16-bit indices, and 32-bit indices. Each requires different |
467 | // handling for the gl_VertexIndex builtin. We may as well, then, create three |
468 | // different shaders for these three scenarios. |
469 | IndexType vertex_index_type = IndexType::None; |
470 | |
471 | // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. |
472 | // This will force the shader to run at sample rate, assuming Metal does not optimize |
473 | // the extra threads away. |
474 | bool force_sample_rate_shading = false; |
475 | |
476 | // If set, gl_HelperInvocation will be set manually whenever a fragment is discarded. |
477 | // Some Metal devices have a bug where simd_is_helper_thread() does not return true |
478 | // after a fragment has been discarded. This is a workaround that is only expected to be needed |
479 | // until the bug is fixed in Metal; it is provided as an option to allow disabling it when that occurs. |
480 | bool manual_helper_invocation_updates = true; |
481 | |
482 | // If set, extra checks will be emitted in fragment shaders to prevent writes |
483 | // from discarded fragments. Some Metal devices have a bug where writes to storage resources |
484 | // from discarded fragment threads continue to occur, despite the fragment being |
485 | // discarded. This is a workaround that is only expected to be needed until the |
486 | // bug is fixed in Metal; it is provided as an option so it can be enabled |
487 | // only when the bug is present. |
488 | bool check_discarded_frag_stores = false; |
489 | |
490 | // If set, Lod operands to OpImageSample*DrefExplicitLod for 1D and 2D array images |
491 | // will be implemented using a gradient instead of passing the level operand directly. |
492 | // Some Metal devices have a bug where the level() argument to depth2d_array<T>::sample_compare() |
493 | // in a fragment shader is biased by some unknown amount, possibly dependent on the |
494 | // partial derivatives of the texture coordinates. This is a workaround that is only |
495 | // expected to be needed until the bug is fixed in Metal; it is provided as an option |
496 | // so it can be enabled only when the bug is present. |
497 | bool sample_dref_lod_array_as_grad = false; |
498 | |
499 | // MSL doesn't guarantee coherence between writes and subsequent reads of read_write textures. |
500 | // This inserts fences before each read of a read_write texture to ensure coherency. |
501 | // If you're sure you never rely on this, you can set this to false for a possible performance improvement. |
502 | // Note: Only Apple's GPU compiler takes advantage of the lack of coherency, so make sure to test on Apple GPUs if you disable this. |
503 | bool readwrite_texture_fences = true; |
504 | |
505 | // Metal 3.1 introduced a Metal regression bug which causes infinite recursion during |
506 | // Metal's analysis of an entry point input structure that is itself recursive. Enabling |
507 | // this option will replace the recursive input declaration with a alternate variable of |
508 | // type void*, and then cast to the correct type at the top of the entry point function. |
509 | // The bug has been reported to Apple, and will hopefully be fixed in future releases. |
510 | bool replace_recursive_inputs = false; |
511 | |
512 | // If set, manual fixups of gradient vectors for cube texture lookups will be performed. |
513 | // All released Apple Silicon GPUs to date behave incorrectly when sampling a cube texture |
514 | // with explicit gradients. They will ignore one of the three partial derivatives based |
515 | // on the selected major axis, and expect the remaining derivatives to be partially |
516 | // transformed. |
517 | bool agx_manual_cube_grad_fixup = false; |
518 | |
519 | // Metal will discard fragments with side effects under certain circumstances prematurely. |
520 | // Example: CTS test dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth |
521 | // Test will render a full screen quad with varying depth [0,1] for each fragment. |
522 | // Each fragment will do an operation with side effects, modify the depth value and |
523 | // discard the fragment. The test expects the fragment to be run due to: |
524 | // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/vkspec.html#fragops-shader-depthreplacement |
525 | // which states that the fragment shader must be run due to replacing the depth in shader. |
526 | // However, Metal may prematurely discards fragments without executing them |
527 | // (I believe this to be due to a greedy optimization on their end) making the test fail. |
528 | // This option enforces fragment execution for such cases where the fragment has operations |
529 | // with side effects. Provided as an option hoping Metal will fix this issue in the future. |
530 | bool force_fragment_with_side_effects_execution = false; |
531 | |
532 | // If set, adds a depth pass through statement to circumvent the following issue: |
533 | // When the same depth/stencil is used as input and depth/stencil attachment, we need to |
534 | // force Metal to perform the depth/stencil write after fragment execution. Otherwise, |
535 | // Metal will write to the depth attachment before fragment execution. This happens |
536 | // if the fragment does not modify the depth value. |
537 | bool input_attachment_is_ds_attachment = false; |
538 | |
539 | bool is_ios() const |
540 | { |
541 | return platform == iOS; |
542 | } |
543 | |
544 | bool is_macos() const |
545 | { |
546 | return platform == macOS; |
547 | } |
548 | |
549 | bool use_quadgroup_operation() const |
550 | { |
551 | return is_ios() && !ios_use_simdgroup_functions; |
552 | } |
553 | |
554 | void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
555 | { |
556 | msl_version = make_msl_version(major, minor, patch); |
557 | } |
558 | |
559 | bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const |
560 | { |
561 | return msl_version >= make_msl_version(major, minor, patch); |
562 | } |
563 | |
564 | static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
565 | { |
566 | return (major * 10000) + (minor * 100) + patch; |
567 | } |
568 | }; |
569 | |
570 | const Options &get_msl_options() const |
571 | { |
572 | return msl_options; |
573 | } |
574 | |
575 | void set_msl_options(const Options &opts) |
576 | { |
577 | msl_options = opts; |
578 | } |
579 | |
580 | // Provide feedback to calling API to allow runtime to disable pipeline |
581 | // rasterization if vertex shader requires rasterization to be disabled. |
582 | bool get_is_rasterization_disabled() const |
583 | { |
584 | return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || |
585 | get_entry_point().model == spv::ExecutionModelTessellationControl || |
586 | get_entry_point().model == spv::ExecutionModelTessellationEvaluation); |
587 | } |
588 | |
589 | // Provide feedback to calling API to allow it to pass an auxiliary |
590 | // swizzle buffer if the shader needs it. |
591 | bool needs_swizzle_buffer() const |
592 | { |
593 | return used_swizzle_buffer; |
594 | } |
595 | |
596 | // Provide feedback to calling API to allow it to pass a buffer |
597 | // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. |
598 | bool needs_buffer_size_buffer() const |
599 | { |
600 | return !buffers_requiring_array_length.empty(); |
601 | } |
602 | |
603 | bool buffer_requires_array_length(VariableID id) const |
604 | { |
605 | return buffers_requiring_array_length.count(x: id) != 0; |
606 | } |
607 | |
608 | // Provide feedback to calling API to allow it to pass a buffer |
609 | // containing the view mask for the current multiview subpass. |
610 | bool needs_view_mask_buffer() const |
611 | { |
612 | return msl_options.multiview && !msl_options.view_index_from_device_index; |
613 | } |
614 | |
615 | // Provide feedback to calling API to allow it to pass a buffer |
616 | // containing the dispatch base workgroup ID. |
617 | bool needs_dispatch_base_buffer() const |
618 | { |
619 | return msl_options.dispatch_base && !msl_options.supports_msl_version(major: 1, minor: 2); |
620 | } |
621 | |
622 | // Provide feedback to calling API to allow it to pass an output |
623 | // buffer if the shader needs it. |
624 | bool needs_output_buffer() const |
625 | { |
626 | return capture_output_to_buffer && stage_out_var_id != ID(0); |
627 | } |
628 | |
629 | // Provide feedback to calling API to allow it to pass a patch output |
630 | // buffer if the shader needs it. |
631 | bool needs_patch_output_buffer() const |
632 | { |
633 | return capture_output_to_buffer && patch_stage_out_var_id != ID(0); |
634 | } |
635 | |
636 | // Provide feedback to calling API to allow it to pass an input threadgroup |
637 | // buffer if the shader needs it. |
638 | bool needs_input_threadgroup_mem() const |
639 | { |
640 | return capture_output_to_buffer && stage_in_var_id != ID(0); |
641 | } |
642 | |
643 | explicit CompilerMSL(std::vector<uint32_t> spirv); |
644 | CompilerMSL(const uint32_t *ir, size_t word_count); |
645 | explicit CompilerMSL(const ParsedIR &ir); |
646 | explicit CompilerMSL(ParsedIR &&ir); |
647 | |
648 | // input is a shader interface variable description used to fix up shader input variables. |
649 | // If shader inputs are provided, is_msl_shader_input_used() will return true after |
650 | // calling ::compile() if the location were used by the MSL code. |
651 | void add_msl_shader_input(const MSLShaderInterfaceVariable &input); |
652 | |
653 | // output is a shader interface variable description used to fix up shader output variables. |
654 | // If shader outputs are provided, is_msl_shader_output_used() will return true after |
655 | // calling ::compile() if the location were used by the MSL code. |
656 | void add_msl_shader_output(const MSLShaderInterfaceVariable &output); |
657 | |
658 | // resource is a resource binding to indicate the MSL buffer, |
659 | // texture or sampler index to use for a particular SPIR-V description set |
660 | // and binding. If resource bindings are provided, |
661 | // is_msl_resource_binding_used() will return true after calling ::compile() if |
662 | // the set/binding combination was used by the MSL code. |
663 | void add_msl_resource_binding(const MSLResourceBinding &resource); |
664 | |
665 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
666 | // in this shader. index is the index within the dynamic offset buffer to use. This |
667 | // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC |
668 | // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers |
669 | // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with |
670 | // an offset taken from the dynamic offset buffer. |
671 | void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); |
672 | |
673 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
674 | // in this shader. This function marks that resource as an inline uniform block |
675 | // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers |
676 | // are enabled. If so, the buffer block will be directly embedded into the argument |
677 | // buffer, instead of being referenced indirectly via pointer. |
678 | void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); |
679 | |
680 | // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. |
681 | // This corresponds to VK_KHR_push_descriptor in Vulkan. |
682 | void add_discrete_descriptor_set(uint32_t desc_set); |
683 | |
684 | // If an argument buffer is large enough, it may need to be in the device storage space rather than |
685 | // constant. Opt-in to this behavior here on a per set basis. |
686 | void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); |
687 | |
688 | // Query after compilation is done. This allows you to check if an input location was used by the shader. |
689 | bool is_msl_shader_input_used(uint32_t location); |
690 | |
691 | // Query after compilation is done. This allows you to check if an output location were used by the shader. |
692 | bool is_msl_shader_output_used(uint32_t location); |
693 | |
694 | // If not using add_msl_shader_input, it's possible |
695 | // that certain builtin attributes need to be automatically assigned locations. |
696 | // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. |
697 | // This returns k_unknown_location if the location was explicitly assigned with |
698 | // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. |
699 | uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; |
700 | |
701 | // If not using add_msl_shader_output, it's possible |
702 | // that certain builtin attributes need to be automatically assigned locations. |
703 | // This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc. |
704 | // This returns k_unknown_location if the location were explicitly assigned with |
705 | // add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]]. |
706 | uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const; |
707 | |
708 | // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. |
709 | // Constexpr samplers are always assumed to be emitted. |
710 | // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped |
711 | // by remap_constexpr_sampler(_by_binding). |
712 | bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; |
713 | |
714 | // This must only be called after a successful call to CompilerMSL::compile(). |
715 | // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. |
716 | // If the descriptor set was part of an argument buffer, report the [[id(N)]], |
717 | // or [[buffer/texture/sampler]] binding for other resources. |
718 | // If the resource was a combined image sampler, report the image binding here, |
719 | // use the _secondary version of this call to query the sampler half of the resource. |
720 | // If no binding exists, uint32_t(-1) is returned. |
721 | uint32_t get_automatic_msl_resource_binding(uint32_t id) const; |
722 | |
723 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the |
724 | // sampler's binding is returned instead. For any other resource type, -1 is returned. |
725 | // Secondary bindings are also used for the auxillary image atomic buffer. |
726 | uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; |
727 | |
728 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, |
729 | // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. |
730 | uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; |
731 | |
732 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, |
733 | // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. |
734 | uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; |
735 | |
736 | // Compiles the SPIR-V code into Metal Shading Language. |
737 | std::string compile() override; |
738 | |
739 | // Remap a sampler with ID to a constexpr sampler. |
740 | // Older iOS targets must use constexpr samplers in certain cases (PCF), |
741 | // so a static sampler must be used. |
742 | // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. |
743 | // This can be used on both combined image/samplers (sampler2D) or standalone samplers. |
744 | // The remapped sampler must not be an array of samplers. |
745 | // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. |
746 | void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); |
747 | |
748 | // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. |
749 | // Remaps based on ID take priority over set/binding remaps. |
750 | void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); |
751 | |
752 | // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect |
753 | // to use for a particular location. The default is 4 if number of components is not overridden. |
754 | void set_fragment_output_components(uint32_t location, uint32_t components); |
755 | |
756 | void set_combined_sampler_suffix(const char *suffix); |
757 | const char *get_combined_sampler_suffix() const; |
758 | |
759 | protected: |
760 | // An enum of SPIR-V functions that are implemented in additional |
761 | // source code that is added to the shader if necessary. |
762 | enum SPVFuncImpl : uint8_t |
763 | { |
764 | SPVFuncImplNone, |
765 | SPVFuncImplMod, |
766 | SPVFuncImplRadians, |
767 | SPVFuncImplDegrees, |
768 | SPVFuncImplFindILsb, |
769 | SPVFuncImplFindSMsb, |
770 | SPVFuncImplFindUMsb, |
771 | SPVFuncImplSSign, |
772 | SPVFuncImplArrayCopy, |
773 | SPVFuncImplArrayCopyMultidim, |
774 | SPVFuncImplTexelBufferCoords, |
775 | SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations |
776 | SPVFuncImplGradientCube, |
777 | SPVFuncImplFMul, |
778 | SPVFuncImplFAdd, |
779 | SPVFuncImplFSub, |
780 | SPVFuncImplQuantizeToF16, |
781 | SPVFuncImplCubemapTo2DArrayFace, |
782 | SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type |
783 | SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices |
784 | SPVFuncImplInverse4x4, |
785 | SPVFuncImplInverse3x3, |
786 | SPVFuncImplInverse2x2, |
787 | // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's |
788 | // emitted before them. |
789 | SPVFuncImplForwardArgs, |
790 | // Likewise, this must come before *Swizzle. |
791 | SPVFuncImplGetSwizzle, |
792 | SPVFuncImplTextureSwizzle, |
793 | SPVFuncImplGatherSwizzle, |
794 | SPVFuncImplGatherCompareSwizzle, |
795 | SPVFuncImplGatherConstOffsets, |
796 | SPVFuncImplGatherCompareConstOffsets, |
797 | SPVFuncImplSubgroupBroadcast, |
798 | SPVFuncImplSubgroupBroadcastFirst, |
799 | SPVFuncImplSubgroupBallot, |
800 | , |
801 | SPVFuncImplSubgroupBallotFindLSB, |
802 | SPVFuncImplSubgroupBallotFindMSB, |
803 | SPVFuncImplSubgroupBallotBitCount, |
804 | SPVFuncImplSubgroupAllEqual, |
805 | SPVFuncImplSubgroupShuffle, |
806 | SPVFuncImplSubgroupShuffleXor, |
807 | SPVFuncImplSubgroupShuffleUp, |
808 | SPVFuncImplSubgroupShuffleDown, |
809 | SPVFuncImplQuadBroadcast, |
810 | SPVFuncImplQuadSwap, |
811 | SPVFuncImplReflectScalar, |
812 | SPVFuncImplRefractScalar, |
813 | SPVFuncImplFaceForwardScalar, |
814 | SPVFuncImplChromaReconstructNearest2Plane, |
815 | SPVFuncImplChromaReconstructNearest3Plane, |
816 | SPVFuncImplChromaReconstructLinear422CositedEven2Plane, |
817 | SPVFuncImplChromaReconstructLinear422CositedEven3Plane, |
818 | SPVFuncImplChromaReconstructLinear422Midpoint2Plane, |
819 | SPVFuncImplChromaReconstructLinear422Midpoint3Plane, |
820 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, |
821 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, |
822 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, |
823 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, |
824 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, |
825 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, |
826 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, |
827 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, |
828 | SPVFuncImplExpandITUFullRange, |
829 | SPVFuncImplExpandITUNarrowRange, |
830 | SPVFuncImplConvertYCbCrBT709, |
831 | SPVFuncImplConvertYCbCrBT601, |
832 | SPVFuncImplConvertYCbCrBT2020, |
833 | SPVFuncImplDynamicImageSampler, |
834 | SPVFuncImplRayQueryIntersectionParams, |
835 | SPVFuncImplVariableDescriptor, |
836 | SPVFuncImplVariableSizedDescriptor, |
837 | SPVFuncImplVariableDescriptorArray, |
838 | SPVFuncImplPaddedStd140, |
839 | SPVFuncImplReduceAdd, |
840 | SPVFuncImplImageFence, |
841 | SPVFuncImplTextureCast |
842 | }; |
843 | |
844 | // If the underlying resource has been used for comparison then duplicate loads of that resource must be too |
845 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
846 | void emit_texture_op(const Instruction &i, bool sparse) override; |
847 | void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
848 | std::string to_ptr_expression(uint32_t id, bool register_expression_read = true); |
849 | void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
850 | void emit_instruction(const Instruction &instr) override; |
851 | void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, |
852 | uint32_t count) override; |
853 | void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, |
854 | const uint32_t *args, uint32_t count) override; |
855 | void () override; |
856 | void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; |
857 | void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; |
858 | void emit_subgroup_op(const Instruction &i) override; |
859 | std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, |
860 | SmallVector<uint32_t> &inherited_expressions) override; |
861 | void emit_fixup() override; |
862 | std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
863 | const std::string &qualifier = "" ); |
864 | void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
865 | const std::string &qualifier = "" , uint32_t base_offset = 0) override; |
866 | void emit_struct_padding_target(const SPIRType &type) override; |
867 | std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member); |
868 | std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; |
869 | void emit_block_hints(const SPIRBlock &block) override; |
870 | |
871 | // Allow Metal to use the array<T> template to make arrays a value type |
872 | std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override; |
873 | std::string constant_op_expression(const SPIRConstantOp &cop) override; |
874 | |
875 | bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; |
876 | |
877 | // GCC workaround of lambdas calling protected functions (for older GCC versions) |
878 | std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; |
879 | |
880 | std::string image_type_glsl(const SPIRType &type, uint32_t id, bool member) override; |
881 | std::string sampler_type(const SPIRType &type, uint32_t id, bool member); |
882 | std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; |
883 | std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; |
884 | std::string to_name(uint32_t id, bool allow_alias = true) const override; |
885 | std::string to_function_name(const TextureFunctionNameArguments &args) override; |
886 | std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; |
887 | std::string to_initializer_expression(const SPIRVariable &var) override; |
888 | std::string to_zero_initialized_expression(uint32_t type_id) override; |
889 | |
890 | std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, |
891 | bool is_packed, bool row_major) override; |
892 | |
893 | // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. |
894 | bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; |
895 | |
896 | std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; |
897 | bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; |
898 | bool skip_argument(uint32_t id) const override; |
899 | std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override; |
900 | std::string to_qualifiers_glsl(uint32_t id) override; |
901 | void replace_illegal_names() override; |
902 | void declare_constant_arrays(); |
903 | |
904 | void replace_illegal_entry_point_names(); |
905 | void sync_entry_point_aliases_and_names(); |
906 | |
907 | static const std::unordered_set<std::string> &get_reserved_keyword_set(); |
908 | static const std::unordered_set<std::string> &get_illegal_func_names(); |
909 | |
910 | // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries |
911 | void declare_complex_constant_arrays(); |
912 | |
913 | bool is_patch_block(const SPIRType &type); |
914 | bool is_non_native_row_major_matrix(uint32_t id) override; |
915 | bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; |
916 | std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, |
917 | bool is_packed, bool relaxed) override; |
918 | |
919 | bool is_tesc_shader() const; |
920 | bool is_tese_shader() const; |
921 | |
922 | void preprocess_op_codes(); |
923 | void localize_global_variables(); |
924 | void (); |
925 | void mark_packable_structs(); |
926 | void mark_as_packable(SPIRType &type); |
927 | void mark_as_workgroup_struct(SPIRType &type); |
928 | |
929 | std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars; |
930 | void (uint32_t func_id, std::set<uint32_t> &added_arg_ids, |
931 | std::unordered_set<uint32_t> &global_var_ids, |
932 | std::unordered_set<uint32_t> &processed_func_ids); |
933 | uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); |
934 | uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); |
935 | |
936 | struct InterfaceBlockMeta |
937 | { |
938 | struct LocationMeta |
939 | { |
940 | uint32_t base_type_id = 0; |
941 | uint32_t num_components = 0; |
942 | bool flat = false; |
943 | bool noperspective = false; |
944 | bool centroid = false; |
945 | bool sample = false; |
946 | }; |
947 | std::unordered_map<uint32_t, LocationMeta> location_meta; |
948 | bool strip_array = false; |
949 | bool allow_local_declaration = false; |
950 | }; |
951 | |
952 | std::string to_tesc_invocation_id(); |
953 | void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); |
954 | void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, |
955 | SPIRVariable &var, InterfaceBlockMeta &meta); |
956 | void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
957 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
958 | void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
959 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
960 | bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
961 | SPIRVariable &var, const SPIRType &type, |
962 | InterfaceBlockMeta &meta); |
963 | void add_plain_member_variable_to_interface_block(spv::StorageClass storage, |
964 | const std::string &ib_var_ref, SPIRType &ib_type, |
965 | SPIRVariable &var, SPIRType &var_type, |
966 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
967 | const std::string &mbr_name_qual, |
968 | const std::string &var_chain_qual, |
969 | uint32_t &location, uint32_t &var_mbr_idx); |
970 | void add_composite_member_variable_to_interface_block(spv::StorageClass storage, |
971 | const std::string &ib_var_ref, SPIRType &ib_type, |
972 | SPIRVariable &var, SPIRType &var_type, |
973 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
974 | const std::string &mbr_name_qual, |
975 | const std::string &var_chain_qual, |
976 | uint32_t &location, uint32_t &var_mbr_idx, |
977 | const Bitset &interpolation_qual); |
978 | void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); |
979 | void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var); |
980 | |
981 | void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); |
982 | |
983 | void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, |
984 | spv::StorageClass storage, bool fallback = false); |
985 | uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); |
986 | uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, |
987 | uint32_t num_components, bool strip_array); |
988 | |
989 | void emit_custom_templates(); |
990 | void emit_custom_functions(); |
991 | void emit_resources(); |
992 | void emit_specialization_constants_and_structs(); |
993 | void emit_interface_block(uint32_t ib_var_id); |
994 | bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); |
995 | bool is_var_runtime_size_array(const SPIRVariable &var) const; |
996 | uint32_t get_resource_array_size(const SPIRType &type, uint32_t id) const; |
997 | |
998 | void fix_up_shader_inputs_outputs(); |
999 | |
1000 | std::string func_type_decl(SPIRType &type); |
1001 | std::string entry_point_args_classic(bool append_comma); |
1002 | std::string entry_point_args_argument_buffer(bool append_comma); |
1003 | std::string entry_point_arg_stage_in(); |
1004 | void entry_point_args_builtin(std::string &args); |
1005 | void entry_point_args_discrete_descriptors(std::string &args); |
1006 | std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index); |
1007 | std::string ensure_valid_name(std::string name, std::string pfx); |
1008 | std::string to_sampler_expression(uint32_t id); |
1009 | std::string to_swizzle_expression(uint32_t id); |
1010 | std::string to_buffer_size_expression(uint32_t id); |
1011 | bool is_sample_rate() const; |
1012 | bool is_intersection_query() const; |
1013 | bool is_direct_input_builtin(spv::BuiltIn builtin); |
1014 | std::string builtin_qualifier(spv::BuiltIn builtin); |
1015 | std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); |
1016 | std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); |
1017 | std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); |
1018 | std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); |
1019 | std::string argument_decl(const SPIRFunction::Parameter &arg); |
1020 | const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; |
1021 | std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); |
1022 | uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); |
1023 | uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; |
1024 | uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, |
1025 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
1026 | uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin, |
1027 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
1028 | |
1029 | uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; |
1030 | |
1031 | // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. |
1032 | // These values can change depending on various extended decorations which control packing rules. |
1033 | // We need to make these rules match up with SPIR-V declared rules. |
1034 | uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; |
1035 | uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
1036 | uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
1037 | uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; |
1038 | |
1039 | uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; |
1040 | uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
1041 | uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
1042 | uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
1043 | |
1044 | uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; |
1045 | uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
1046 | uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
1047 | uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
1048 | |
1049 | const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; |
1050 | SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; |
1051 | |
1052 | uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, |
1053 | bool ignore_padding = false) const; |
1054 | |
1055 | std::string to_component_argument(uint32_t id); |
1056 | void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs); |
1057 | void mark_scalar_layout_structs(const SPIRType &ib_type); |
1058 | void mark_struct_members_packed(const SPIRType &type); |
1059 | void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); |
1060 | bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; |
1061 | std::string get_argument_address_space(const SPIRVariable &argument); |
1062 | std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); |
1063 | static bool decoration_flags_signal_volatile(const Bitset &flags); |
1064 | const char *to_restrict(uint32_t id, bool space); |
1065 | SPIRType &get_stage_in_struct_type(); |
1066 | SPIRType &get_stage_out_struct_type(); |
1067 | SPIRType &get_patch_stage_in_struct_type(); |
1068 | SPIRType &get_patch_stage_out_struct_type(); |
1069 | std::string get_tess_factor_struct_name(); |
1070 | SPIRType &get_uint_type(); |
1071 | uint32_t get_uint_type_id(); |
1072 | void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode, |
1073 | uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, |
1074 | bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); |
1075 | const char *get_memory_order(uint32_t spv_mem_sem); |
1076 | void add_pragma_line(const std::string &line); |
1077 | void add_typedef_line(const std::string &line); |
1078 | void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); |
1079 | bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, |
1080 | spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; |
1081 | void build_implicit_builtins(); |
1082 | uint32_t build_constant_uint_array_pointer(); |
1083 | void emit_entry_point_declarations() override; |
1084 | bool uses_explicit_early_fragment_test(); |
1085 | |
1086 | uint32_t builtin_frag_coord_id = 0; |
1087 | uint32_t builtin_sample_id_id = 0; |
1088 | uint32_t builtin_sample_mask_id = 0; |
1089 | uint32_t builtin_helper_invocation_id = 0; |
1090 | uint32_t builtin_vertex_idx_id = 0; |
1091 | uint32_t builtin_base_vertex_id = 0; |
1092 | uint32_t builtin_instance_idx_id = 0; |
1093 | uint32_t builtin_base_instance_id = 0; |
1094 | uint32_t builtin_view_idx_id = 0; |
1095 | uint32_t builtin_layer_id = 0; |
1096 | uint32_t builtin_invocation_id_id = 0; |
1097 | uint32_t builtin_primitive_id_id = 0; |
1098 | uint32_t builtin_subgroup_invocation_id_id = 0; |
1099 | uint32_t builtin_subgroup_size_id = 0; |
1100 | uint32_t builtin_dispatch_base_id = 0; |
1101 | uint32_t builtin_stage_input_size_id = 0; |
1102 | uint32_t builtin_local_invocation_index_id = 0; |
1103 | uint32_t builtin_workgroup_size_id = 0; |
1104 | uint32_t builtin_frag_depth_id = 0; |
1105 | uint32_t swizzle_buffer_id = 0; |
1106 | uint32_t buffer_size_buffer_id = 0; |
1107 | uint32_t view_mask_buffer_id = 0; |
1108 | uint32_t dynamic_offsets_buffer_id = 0; |
1109 | uint32_t uint_type_id = 0; |
1110 | uint32_t argument_buffer_padding_buffer_type_id = 0; |
1111 | uint32_t argument_buffer_padding_image_type_id = 0; |
1112 | uint32_t argument_buffer_padding_sampler_type_id = 0; |
1113 | |
1114 | bool does_shader_write_sample_mask = false; |
1115 | bool frag_shader_needs_discard_checks = false; |
1116 | |
1117 | void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; |
1118 | void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; |
1119 | void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; |
1120 | |
1121 | void analyze_sampled_image_usage(); |
1122 | |
1123 | bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; |
1124 | bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, |
1125 | bool &is_packed) override; |
1126 | void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); |
1127 | void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; |
1128 | |
1129 | bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); |
1130 | bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); |
1131 | bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); |
1132 | |
1133 | void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); |
1134 | |
1135 | void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); |
1136 | |
1137 | std::string convert_to_f32(const std::string &expr, uint32_t components); |
1138 | |
1139 | Options msl_options; |
1140 | std::set<SPVFuncImpl> spv_function_implementations; |
1141 | // Must be ordered to ensure declarations are in a specific order. |
1142 | std::map<LocationComponentPair, MSLShaderInterfaceVariable> inputs_by_location; |
1143 | std::unordered_map<uint32_t, MSLShaderInterfaceVariable> inputs_by_builtin; |
1144 | std::map<LocationComponentPair, MSLShaderInterfaceVariable> outputs_by_location; |
1145 | std::unordered_map<uint32_t, MSLShaderInterfaceVariable> outputs_by_builtin; |
1146 | std::unordered_set<uint32_t> location_inputs_in_use; |
1147 | std::unordered_set<uint32_t> location_inputs_in_use_fallback; |
1148 | std::unordered_set<uint32_t> location_outputs_in_use; |
1149 | std::unordered_set<uint32_t> location_outputs_in_use_fallback; |
1150 | std::unordered_map<uint32_t, uint32_t> fragment_output_components; |
1151 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location; |
1152 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location; |
1153 | std::set<std::string> pragma_lines; |
1154 | std::set<std::string> typedef_lines; |
1155 | SmallVector<uint32_t> vars_needing_early_declaration; |
1156 | |
1157 | std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings; |
1158 | std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number; |
1159 | |
1160 | uint32_t next_metal_resource_index_buffer = 0; |
1161 | uint32_t next_metal_resource_index_texture = 0; |
1162 | uint32_t next_metal_resource_index_sampler = 0; |
1163 | // Intentionally uninitialized, works around MSVC 2013 bug. |
1164 | uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; |
1165 | |
1166 | VariableID stage_in_var_id = 0; |
1167 | VariableID stage_out_var_id = 0; |
1168 | VariableID patch_stage_in_var_id = 0; |
1169 | VariableID patch_stage_out_var_id = 0; |
1170 | VariableID stage_in_ptr_var_id = 0; |
1171 | VariableID stage_out_ptr_var_id = 0; |
1172 | VariableID tess_level_inner_var_id = 0; |
1173 | VariableID tess_level_outer_var_id = 0; |
1174 | VariableID stage_out_masked_builtin_type_id = 0; |
1175 | |
1176 | // Handle HLSL-style 0-based vertex/instance index. |
1177 | enum class TriState |
1178 | { |
1179 | Neutral, |
1180 | No, |
1181 | Yes |
1182 | }; |
1183 | TriState needs_base_vertex_arg = TriState::Neutral; |
1184 | TriState needs_base_instance_arg = TriState::Neutral; |
1185 | |
1186 | bool has_sampled_images = false; |
1187 | bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. |
1188 | |
1189 | bool is_using_builtin_array = false; // Force the use of C style array declaration. |
1190 | bool using_builtin_array() const; |
1191 | |
1192 | bool is_rasterization_disabled = false; |
1193 | bool capture_output_to_buffer = false; |
1194 | bool needs_swizzle_buffer_def = false; |
1195 | bool used_swizzle_buffer = false; |
1196 | bool added_builtin_tess_level = false; |
1197 | bool needs_subgroup_invocation_id = false; |
1198 | bool needs_subgroup_size = false; |
1199 | bool needs_sample_id = false; |
1200 | bool needs_helper_invocation = false; |
1201 | bool writes_to_depth = false; |
1202 | std::string qual_pos_var_name; |
1203 | std::string stage_in_var_name = "in" ; |
1204 | std::string stage_out_var_name = "out" ; |
1205 | std::string patch_stage_in_var_name = "patchIn" ; |
1206 | std::string patch_stage_out_var_name = "patchOut" ; |
1207 | std::string sampler_name_suffix = "Smplr" ; |
1208 | std::string swizzle_name_suffix = "Swzl" ; |
1209 | std::string buffer_size_name_suffix = "BufferSize" ; |
1210 | std::string plane_name_suffix = "Plane" ; |
1211 | std::string input_wg_var_name = "gl_in" ; |
1212 | std::string input_buffer_var_name = "spvIn" ; |
1213 | std::string output_buffer_var_name = "spvOut" ; |
1214 | std::string patch_input_buffer_var_name = "spvPatchIn" ; |
1215 | std::string patch_output_buffer_var_name = "spvPatchOut" ; |
1216 | std::string tess_factor_buffer_var_name = "spvTessLevel" ; |
1217 | std::string index_buffer_var_name = "spvIndices" ; |
1218 | spv::Op previous_instruction_opcode = spv::OpNop; |
1219 | |
1220 | // Must be ordered since declaration is in a specific order. |
1221 | std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id; |
1222 | std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding; |
1223 | const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; |
1224 | |
1225 | std::unordered_set<uint32_t> buffers_requiring_array_length; |
1226 | SmallVector<std::pair<uint32_t, uint32_t>> buffer_aliases_argument; |
1227 | SmallVector<uint32_t> buffer_aliases_discrete; |
1228 | std::unordered_set<uint32_t> atomic_image_vars_emulated; // Emulate texture2D atomic operations |
1229 | std::unordered_set<uint32_t> pull_model_inputs; |
1230 | std::unordered_set<uint32_t> recursive_inputs; |
1231 | |
1232 | SmallVector<SPIRVariable *> entry_point_bindings; |
1233 | |
1234 | // Must be ordered since array is in a specific order. |
1235 | std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; |
1236 | |
1237 | SmallVector<uint32_t> disabled_frag_outputs; |
1238 | |
1239 | std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks; |
1240 | |
1241 | uint32_t argument_buffer_ids[kMaxArgumentBuffers]; |
1242 | uint32_t argument_buffer_discrete_mask = 0; |
1243 | uint32_t argument_buffer_device_storage_mask = 0; |
1244 | |
1245 | void emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var, |
1246 | const SPIRVariable &base_var); |
1247 | |
1248 | void analyze_argument_buffers(); |
1249 | bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; |
1250 | const MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const; |
1251 | void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1252 | void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1253 | void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1254 | void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); |
1255 | |
1256 | uint32_t get_target_components_for_fragment_location(uint32_t location) const; |
1257 | uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, |
1258 | SPIRType::BaseType basetype = SPIRType::Unknown); |
1259 | uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); |
1260 | |
1261 | bool suppress_missing_prototypes = false; |
1262 | bool suppress_incompatible_pointer_types_discard_qualifiers = false; |
1263 | |
1264 | void add_spv_func_and_recompile(SPVFuncImpl spv_func); |
1265 | |
1266 | void activate_argument_buffer_resources(); |
1267 | |
1268 | bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; |
1269 | bool is_supported_argument_buffer_type(const SPIRType &type) const; |
1270 | |
1271 | bool variable_storage_requires_stage_io(spv::StorageClass storage) const; |
1272 | |
1273 | bool needs_manual_helper_invocation_updates() const |
1274 | { |
1275 | return msl_options.manual_helper_invocation_updates && msl_options.supports_msl_version(major: 2, minor: 3); |
1276 | } |
1277 | bool needs_frag_discard_checks() const |
1278 | { |
1279 | return get_execution_model() == spv::ExecutionModelFragment && msl_options.supports_msl_version(major: 2, minor: 3) && |
1280 | msl_options.check_discarded_frag_stores && frag_shader_needs_discard_checks; |
1281 | } |
1282 | |
1283 | bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; } |
1284 | std::string additional_fixed_sample_mask_str() const; |
1285 | |
1286 | // OpcodeHandler that handles several MSL preprocessing operations. |
1287 | struct OpCodePreprocessor : OpcodeHandler |
1288 | { |
1289 | OpCodePreprocessor(CompilerMSL &compiler_) |
1290 | : compiler(compiler_) |
1291 | { |
1292 | } |
1293 | |
1294 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; |
1295 | CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); |
1296 | void check_resource_write(uint32_t var_id); |
1297 | |
1298 | CompilerMSL &compiler; |
1299 | std::unordered_map<uint32_t, uint32_t> result_types; |
1300 | std::unordered_map<uint32_t, uint32_t> image_pointers_emulated; // Emulate texture2D atomic operations |
1301 | bool suppress_missing_prototypes = false; |
1302 | bool uses_atomics = false; |
1303 | bool uses_image_write = false; |
1304 | bool uses_buffer_write = false; |
1305 | bool uses_discard = false; |
1306 | bool needs_subgroup_invocation_id = false; |
1307 | bool needs_subgroup_size = false; |
1308 | bool needs_sample_id = false; |
1309 | bool needs_helper_invocation = false; |
1310 | }; |
1311 | |
1312 | // OpcodeHandler that scans for uses of sampled images |
1313 | struct SampledImageScanner : OpcodeHandler |
1314 | { |
1315 | SampledImageScanner(CompilerMSL &compiler_) |
1316 | : compiler(compiler_) |
1317 | { |
1318 | } |
1319 | |
1320 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; |
1321 | |
1322 | CompilerMSL &compiler; |
1323 | }; |
1324 | |
1325 | // Sorts the members of a SPIRType and associated Meta info based on a settable sorting |
1326 | // aspect, which defines which aspect of the struct members will be used to sort them. |
1327 | // Regardless of the sorting aspect, built-in members always appear at the end of the struct. |
1328 | struct MemberSorter |
1329 | { |
1330 | enum SortAspect |
1331 | { |
1332 | LocationThenBuiltInType, |
1333 | Offset |
1334 | }; |
1335 | |
1336 | void sort(); |
1337 | bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); |
1338 | MemberSorter(SPIRType &t, Meta &m, SortAspect sa); |
1339 | |
1340 | SPIRType &type; |
1341 | Meta &meta; |
1342 | SortAspect sort_aspect; |
1343 | }; |
1344 | }; |
1345 | } // namespace SPIRV_CROSS_NAMESPACE |
1346 | |
1347 | #endif |
1348 | |