1 | /* |
2 | * Copyright 2016-2021 The Brenwill Workshop Ltd. |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #ifndef SPIRV_CROSS_MSL_HPP |
25 | #define SPIRV_CROSS_MSL_HPP |
26 | |
27 | #include "spirv_glsl.hpp" |
28 | #include <map> |
29 | #include <set> |
30 | #include <stddef.h> |
31 | #include <unordered_map> |
32 | #include <unordered_set> |
33 | |
34 | namespace SPIRV_CROSS_NAMESPACE |
35 | { |
36 | |
37 | // Indicates the format of a shader input. Currently limited to specifying |
38 | // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or |
39 | // some other format. |
40 | enum MSLShaderInputFormat |
41 | { |
42 | MSL_SHADER_INPUT_FORMAT_OTHER = 0, |
43 | MSL_SHADER_INPUT_FORMAT_UINT8 = 1, |
44 | MSL_SHADER_INPUT_FORMAT_UINT16 = 2, |
45 | MSL_SHADER_INPUT_FORMAT_ANY16 = 3, |
46 | MSL_SHADER_INPUT_FORMAT_ANY32 = 4, |
47 | |
48 | // Deprecated aliases. |
49 | MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER, |
50 | MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8, |
51 | MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16, |
52 | |
53 | MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff |
54 | }; |
55 | |
56 | // Defines MSL characteristics of an input variable at a particular location. |
57 | // After compilation, it is possible to query whether or not this location was used. |
58 | // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, |
59 | // or behavior is undefined. |
60 | struct MSLShaderInput |
61 | { |
62 | uint32_t location = 0; |
63 | uint32_t component = 0; |
64 | MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER; |
65 | spv::BuiltIn builtin = spv::BuiltInMax; |
66 | uint32_t vecsize = 0; |
67 | }; |
68 | |
69 | // Matches the binding index of a MSL resource for a binding within a descriptor set. |
70 | // Taken together, the stage, desc_set and binding combine to form a reference to a resource |
71 | // descriptor used in a particular shading stage. The count field indicates the number of |
72 | // resources consumed by this binding, if the binding represents an array of resources. |
73 | // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value |
74 | // will be used to declare the array size in MSL, which does not support run-time-sized arrays. |
75 | // If pad_argument_buffer_resources is enabled, the base_type and count values are used to |
76 | // specify the base type and array size of the resource in the argument buffer, if that resource |
77 | // is not defined and used by the shader. With pad_argument_buffer_resources enabled, this |
78 | // information will be used to pad the argument buffer structure, in order to align that |
79 | // structure consistently for all uses, across all shaders, of the descriptor set represented |
80 | // by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not |
81 | // need to be populated, and if the resource is also not a run-time sized array, the count |
82 | // field does not need to be populated. |
83 | // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, |
84 | // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we |
85 | // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. |
86 | // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will |
87 | // become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. |
88 | struct MSLResourceBinding |
89 | { |
90 | spv::ExecutionModel stage = spv::ExecutionModelMax; |
91 | SPIRType::BaseType basetype = SPIRType::Unknown; |
92 | uint32_t desc_set = 0; |
93 | uint32_t binding = 0; |
94 | uint32_t count = 0; |
95 | uint32_t msl_buffer = 0; |
96 | uint32_t msl_texture = 0; |
97 | uint32_t msl_sampler = 0; |
98 | }; |
99 | |
100 | enum MSLSamplerCoord |
101 | { |
102 | MSL_SAMPLER_COORD_NORMALIZED = 0, |
103 | MSL_SAMPLER_COORD_PIXEL = 1, |
104 | MSL_SAMPLER_INT_MAX = 0x7fffffff |
105 | }; |
106 | |
107 | enum MSLSamplerFilter |
108 | { |
109 | MSL_SAMPLER_FILTER_NEAREST = 0, |
110 | MSL_SAMPLER_FILTER_LINEAR = 1, |
111 | MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff |
112 | }; |
113 | |
114 | enum MSLSamplerMipFilter |
115 | { |
116 | MSL_SAMPLER_MIP_FILTER_NONE = 0, |
117 | MSL_SAMPLER_MIP_FILTER_NEAREST = 1, |
118 | MSL_SAMPLER_MIP_FILTER_LINEAR = 2, |
119 | MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff |
120 | }; |
121 | |
122 | enum MSLSamplerAddress |
123 | { |
124 | MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, |
125 | MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, |
126 | MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, |
127 | MSL_SAMPLER_ADDRESS_REPEAT = 3, |
128 | MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, |
129 | MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff |
130 | }; |
131 | |
132 | enum MSLSamplerCompareFunc |
133 | { |
134 | MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, |
135 | MSL_SAMPLER_COMPARE_FUNC_LESS = 1, |
136 | MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, |
137 | MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, |
138 | MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, |
139 | MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, |
140 | MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, |
141 | MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, |
142 | MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff |
143 | }; |
144 | |
145 | enum MSLSamplerBorderColor |
146 | { |
147 | MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, |
148 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, |
149 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, |
150 | MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff |
151 | }; |
152 | |
153 | enum MSLFormatResolution |
154 | { |
155 | MSL_FORMAT_RESOLUTION_444 = 0, |
156 | MSL_FORMAT_RESOLUTION_422, |
157 | MSL_FORMAT_RESOLUTION_420, |
158 | MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff |
159 | }; |
160 | |
161 | enum MSLChromaLocation |
162 | { |
163 | MSL_CHROMA_LOCATION_COSITED_EVEN = 0, |
164 | MSL_CHROMA_LOCATION_MIDPOINT, |
165 | MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff |
166 | }; |
167 | |
168 | enum MSLComponentSwizzle |
169 | { |
170 | MSL_COMPONENT_SWIZZLE_IDENTITY = 0, |
171 | MSL_COMPONENT_SWIZZLE_ZERO, |
172 | MSL_COMPONENT_SWIZZLE_ONE, |
173 | MSL_COMPONENT_SWIZZLE_R, |
174 | MSL_COMPONENT_SWIZZLE_G, |
175 | MSL_COMPONENT_SWIZZLE_B, |
176 | MSL_COMPONENT_SWIZZLE_A, |
177 | MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff |
178 | }; |
179 | |
180 | enum MSLSamplerYCbCrModelConversion |
181 | { |
182 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, |
183 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, |
184 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, |
185 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, |
186 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, |
187 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff |
188 | }; |
189 | |
190 | enum MSLSamplerYCbCrRange |
191 | { |
192 | MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, |
193 | MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, |
194 | MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff |
195 | }; |
196 | |
197 | struct MSLConstexprSampler |
198 | { |
199 | MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; |
200 | MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; |
201 | MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; |
202 | MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; |
203 | MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
204 | MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
205 | MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
206 | MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; |
207 | MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; |
208 | float lod_clamp_min = 0.0f; |
209 | float lod_clamp_max = 1000.0f; |
210 | int max_anisotropy = 1; |
211 | |
212 | // Sampler Y'CbCr conversion parameters |
213 | uint32_t planes = 0; |
214 | MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; |
215 | MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; |
216 | MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
217 | MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
218 | MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY |
219 | MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; |
220 | MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; |
221 | uint32_t bpc = 8; |
222 | |
223 | bool compare_enable = false; |
224 | bool lod_clamp_enable = false; |
225 | bool anisotropy_enable = false; |
226 | bool ycbcr_conversion_enable = false; |
227 | |
228 | MSLConstexprSampler() |
229 | { |
230 | for (uint32_t i = 0; i < 4; i++) |
231 | swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; |
232 | } |
233 | bool swizzle_is_identity() const |
234 | { |
235 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && |
236 | swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); |
237 | } |
238 | bool swizzle_has_one_or_zero() const |
239 | { |
240 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || |
241 | swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || |
242 | swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || |
243 | swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); |
244 | } |
245 | }; |
246 | |
247 | // Special constant used in a MSLResourceBinding desc_set |
248 | // element to indicate the bindings for the push constants. |
249 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
250 | static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; |
251 | |
252 | // Special constant used in a MSLResourceBinding binding |
253 | // element to indicate the bindings for the push constants. |
254 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
255 | static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; |
256 | |
257 | // Special constant used in a MSLResourceBinding binding |
258 | // element to indicate the buffer binding for swizzle buffers. |
259 | static const uint32_t kSwizzleBufferBinding = ~(1u); |
260 | |
261 | // Special constant used in a MSLResourceBinding binding |
262 | // element to indicate the buffer binding for buffer size buffers to support OpArrayLength. |
263 | static const uint32_t kBufferSizeBufferBinding = ~(2u); |
264 | |
265 | // Special constant used in a MSLResourceBinding binding |
266 | // element to indicate the buffer binding used for the argument buffer itself. |
267 | // This buffer binding should be kept as small as possible as all automatic bindings for buffers |
268 | // will start at max(kArgumentBufferBinding) + 1. |
269 | static const uint32_t kArgumentBufferBinding = ~(3u); |
270 | |
271 | static const uint32_t kMaxArgumentBuffers = 8; |
272 | |
273 | // The arbitrary maximum for the nesting of array of array copies. |
274 | static const uint32_t kArrayCopyMultidimMax = 6; |
275 | |
276 | // Decompiles SPIR-V to Metal Shading Language |
277 | class CompilerMSL : public CompilerGLSL |
278 | { |
279 | public: |
280 | // Options for compiling to Metal Shading Language |
281 | struct Options |
282 | { |
283 | typedef enum |
284 | { |
285 | iOS = 0, |
286 | macOS = 1 |
287 | } Platform; |
288 | |
289 | Platform platform = macOS; |
290 | uint32_t msl_version = make_msl_version(major: 1, minor: 2); |
291 | uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers |
292 | uint32_t r32ui_linear_texture_alignment = 4; |
293 | uint32_t r32ui_alignment_constant_id = 65535; |
294 | uint32_t swizzle_buffer_index = 30; |
295 | uint32_t indirect_params_buffer_index = 29; |
296 | uint32_t shader_output_buffer_index = 28; |
297 | uint32_t shader_patch_output_buffer_index = 27; |
298 | uint32_t shader_tess_factor_buffer_index = 26; |
299 | uint32_t buffer_size_buffer_index = 25; |
300 | uint32_t view_mask_buffer_index = 24; |
301 | uint32_t dynamic_offsets_buffer_index = 23; |
302 | uint32_t shader_input_buffer_index = 22; |
303 | uint32_t shader_index_buffer_index = 21; |
304 | uint32_t shader_input_wg_index = 0; |
305 | uint32_t device_index = 0; |
306 | uint32_t enable_frag_output_mask = 0xffffffff; |
307 | // Metal doesn't allow setting a fixed sample mask directly in the pipeline. |
308 | // We can evade this restriction by ANDing the internal sample_mask output |
309 | // of the shader with the additional fixed sample mask. |
310 | uint32_t additional_fixed_sample_mask = 0xffffffff; |
311 | bool enable_point_size_builtin = true; |
312 | bool enable_frag_depth_builtin = true; |
313 | bool enable_frag_stencil_ref_builtin = true; |
314 | bool disable_rasterization = false; |
315 | bool capture_output_to_buffer = false; |
316 | bool swizzle_texture_samples = false; |
317 | bool tess_domain_origin_lower_left = false; |
318 | bool multiview = false; |
319 | bool multiview_layered_rendering = true; |
320 | bool view_index_from_device_index = false; |
321 | bool dispatch_base = false; |
322 | bool texture_1D_as_2D = false; |
323 | |
324 | // Enable use of MSL 2.0 indirect argument buffers. |
325 | // MSL 2.0 must also be enabled. |
326 | bool argument_buffers = false; |
327 | |
328 | // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. |
329 | bool enable_base_index_zero = false; |
330 | |
331 | // Fragment output in MSL must have at least as many components as the render pass. |
332 | // Add support to explicit pad out components. |
333 | bool pad_fragment_output_components = false; |
334 | |
335 | // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. |
336 | bool ios_support_base_vertex_instance = false; |
337 | |
338 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
339 | bool use_framebuffer_fetch_subpasses = false; |
340 | |
341 | // Enables use of "fma" intrinsic for invariant float math |
342 | bool invariant_float_math = false; |
343 | |
344 | // Emulate texturecube_array with texture2d_array for iOS where this type is not available |
345 | bool emulate_cube_array = false; |
346 | |
347 | // Allow user to enable decoration binding |
348 | bool enable_decoration_binding = false; |
349 | |
350 | // Requires MSL 2.1, use the native support for texel buffers. |
351 | bool texture_buffer_native = false; |
352 | |
353 | // Forces all resources which are part of an argument buffer to be considered active. |
354 | // This ensures ABI compatibility between shaders where some resources might be unused, |
355 | // and would otherwise declare a different IAB. |
356 | bool force_active_argument_buffer_resources = false; |
357 | |
358 | // Aligns each resource in an argument buffer to its assigned index value, id(N), |
359 | // by adding synthetic padding members in the argument buffer struct for any resources |
360 | // in the argument buffer that are not defined and used by the shader. This allows |
361 | // the shader to index into the correct argument in a descriptor set argument buffer |
362 | // that is shared across shaders, where not all resources in the argument buffer are |
363 | // defined in each shader. For this to work, an MSLResourceBinding must be provided for |
364 | // all descriptors in any descriptor set held in an argument buffer in the shader, and |
365 | // that MSLResourceBinding must have the basetype and count members populated correctly. |
366 | // The implementation here assumes any inline blocks in the argument buffer is provided |
367 | // in a Metal buffer, and doesn't take into consideration inline blocks that are |
368 | // optionally embedded directly into the argument buffer via add_inline_uniform_block(). |
369 | bool pad_argument_buffer_resources = false; |
370 | |
371 | // Forces the use of plain arrays, which works around certain driver bugs on certain versions |
372 | // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. |
373 | // May reduce performance in scenarios where arrays are copied around as value-types. |
374 | bool force_native_arrays = false; |
375 | |
376 | // If a shader writes clip distance, also emit user varyings which |
377 | // can be read in subsequent stages. |
378 | bool enable_clip_distance_user_varying = true; |
379 | |
380 | // In a tessellation control shader, assume that more than one patch can be processed in a |
381 | // single workgroup. This requires changes to the way the InvocationId and PrimitiveId |
382 | // builtins are processed, but should result in more efficient usage of the GPU. |
383 | bool multi_patch_workgroup = false; |
384 | |
385 | // If set, a vertex shader will be compiled as part of a tessellation pipeline. |
386 | // It will be translated as a compute kernel, so it can use the global invocation ID |
387 | // to index the output buffer. |
388 | bool vertex_for_tessellation = false; |
389 | |
390 | // Assume that SubpassData images have multiple layers. Layered input attachments |
391 | // are addressed relative to the Layer output from the vertex pipeline. This option |
392 | // has no effect with multiview, since all input attachments are assumed to be layered |
393 | // and will be addressed using the current ViewIndex. |
394 | bool arrayed_subpass_input = false; |
395 | |
396 | // Whether to use SIMD-group or quadgroup functions to implement group non-uniform |
397 | // operations. Some GPUs on iOS do not support the SIMD-group functions, only the |
398 | // quadgroup functions. |
399 | bool ios_use_simdgroup_functions = false; |
400 | |
401 | // If set, the subgroup size will be assumed to be one, and subgroup-related |
402 | // builtins and operations will be emitted accordingly. This mode is intended to |
403 | // be used by MoltenVK on hardware/software configurations which do not provide |
404 | // sufficient support for subgroups. |
405 | bool emulate_subgroups = false; |
406 | |
407 | // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, |
408 | // allows the SIMD-group size (aka thread execution width) to vary depending on |
409 | // register usage and requirements. In certain circumstances--for example, a pipeline |
410 | // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- |
411 | // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of |
412 | // mapping it to the Metal builtin [[thread_execution_width]]. If the thread |
413 | // execution width is reduced, the extra invocations will appear to be inactive. |
414 | // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped |
415 | // to the Metal [[thread_execution_width]] builtin. |
416 | uint32_t fixed_subgroup_size = 0; |
417 | |
418 | enum class IndexType |
419 | { |
420 | None = 0, |
421 | UInt16 = 1, |
422 | UInt32 = 2 |
423 | }; |
424 | |
425 | // The type of index in the index buffer, if present. For a compute shader, Metal |
426 | // requires specifying the indexing at pipeline creation, rather than at draw time |
427 | // as with graphics pipelines. This means we must create three different pipelines, |
428 | // for no indexing, 16-bit indices, and 32-bit indices. Each requires different |
429 | // handling for the gl_VertexIndex builtin. We may as well, then, create three |
430 | // different shaders for these three scenarios. |
431 | IndexType vertex_index_type = IndexType::None; |
432 | |
433 | // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. |
434 | // This will force the shader to run at sample rate, assuming Metal does not optimize |
435 | // the extra threads away. |
436 | bool force_sample_rate_shading = false; |
437 | |
438 | bool is_ios() const |
439 | { |
440 | return platform == iOS; |
441 | } |
442 | |
443 | bool is_macos() const |
444 | { |
445 | return platform == macOS; |
446 | } |
447 | |
448 | bool use_quadgroup_operation() const |
449 | { |
450 | return is_ios() && !ios_use_simdgroup_functions; |
451 | } |
452 | |
453 | void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
454 | { |
455 | msl_version = make_msl_version(major, minor, patch); |
456 | } |
457 | |
458 | bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const |
459 | { |
460 | return msl_version >= make_msl_version(major, minor, patch); |
461 | } |
462 | |
463 | static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
464 | { |
465 | return (major * 10000) + (minor * 100) + patch; |
466 | } |
467 | }; |
468 | |
469 | const Options &get_msl_options() const |
470 | { |
471 | return msl_options; |
472 | } |
473 | |
474 | void set_msl_options(const Options &opts) |
475 | { |
476 | msl_options = opts; |
477 | } |
478 | |
479 | // Provide feedback to calling API to allow runtime to disable pipeline |
480 | // rasterization if vertex shader requires rasterization to be disabled. |
481 | bool get_is_rasterization_disabled() const |
482 | { |
483 | return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || |
484 | get_entry_point().model == spv::ExecutionModelTessellationControl || |
485 | get_entry_point().model == spv::ExecutionModelTessellationEvaluation); |
486 | } |
487 | |
488 | // Provide feedback to calling API to allow it to pass an auxiliary |
489 | // swizzle buffer if the shader needs it. |
490 | bool needs_swizzle_buffer() const |
491 | { |
492 | return used_swizzle_buffer; |
493 | } |
494 | |
495 | // Provide feedback to calling API to allow it to pass a buffer |
496 | // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. |
497 | bool needs_buffer_size_buffer() const |
498 | { |
499 | return !buffers_requiring_array_length.empty(); |
500 | } |
501 | |
502 | // Provide feedback to calling API to allow it to pass a buffer |
503 | // containing the view mask for the current multiview subpass. |
504 | bool needs_view_mask_buffer() const |
505 | { |
506 | return msl_options.multiview && !msl_options.view_index_from_device_index; |
507 | } |
508 | |
509 | // Provide feedback to calling API to allow it to pass a buffer |
510 | // containing the dispatch base workgroup ID. |
511 | bool needs_dispatch_base_buffer() const |
512 | { |
513 | return msl_options.dispatch_base && !msl_options.supports_msl_version(major: 1, minor: 2); |
514 | } |
515 | |
516 | // Provide feedback to calling API to allow it to pass an output |
517 | // buffer if the shader needs it. |
518 | bool needs_output_buffer() const |
519 | { |
520 | return capture_output_to_buffer && stage_out_var_id != ID(0); |
521 | } |
522 | |
523 | // Provide feedback to calling API to allow it to pass a patch output |
524 | // buffer if the shader needs it. |
525 | bool needs_patch_output_buffer() const |
526 | { |
527 | return capture_output_to_buffer && patch_stage_out_var_id != ID(0); |
528 | } |
529 | |
530 | // Provide feedback to calling API to allow it to pass an input threadgroup |
531 | // buffer if the shader needs it. |
532 | bool needs_input_threadgroup_mem() const |
533 | { |
534 | return capture_output_to_buffer && stage_in_var_id != ID(0); |
535 | } |
536 | |
537 | explicit CompilerMSL(std::vector<uint32_t> spirv); |
538 | CompilerMSL(const uint32_t *ir, size_t word_count); |
539 | explicit CompilerMSL(const ParsedIR &ir); |
540 | explicit CompilerMSL(ParsedIR &&ir); |
541 | |
542 | // input is a shader input description used to fix up shader input variables. |
543 | // If shader inputs are provided, is_msl_shader_input_used() will return true after |
544 | // calling ::compile() if the location was used by the MSL code. |
545 | void add_msl_shader_input(const MSLShaderInput &input); |
546 | |
547 | // resource is a resource binding to indicate the MSL buffer, |
548 | // texture or sampler index to use for a particular SPIR-V description set |
549 | // and binding. If resource bindings are provided, |
550 | // is_msl_resource_binding_used() will return true after calling ::compile() if |
551 | // the set/binding combination was used by the MSL code. |
552 | void add_msl_resource_binding(const MSLResourceBinding &resource); |
553 | |
554 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
555 | // in this shader. index is the index within the dynamic offset buffer to use. This |
556 | // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC |
557 | // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers |
558 | // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with |
559 | // an offset taken from the dynamic offset buffer. |
560 | void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); |
561 | |
562 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
563 | // in this shader. This function marks that resource as an inline uniform block |
564 | // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers |
565 | // are enabled. If so, the buffer block will be directly embedded into the argument |
566 | // buffer, instead of being referenced indirectly via pointer. |
567 | void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); |
568 | |
569 | // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. |
570 | // This corresponds to VK_KHR_push_descriptor in Vulkan. |
571 | void add_discrete_descriptor_set(uint32_t desc_set); |
572 | |
573 | // If an argument buffer is large enough, it may need to be in the device storage space rather than |
574 | // constant. Opt-in to this behavior here on a per set basis. |
575 | void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); |
576 | |
577 | // Query after compilation is done. This allows you to check if an input location was used by the shader. |
578 | bool is_msl_shader_input_used(uint32_t location); |
579 | |
580 | // If not using add_msl_shader_input, it's possible |
581 | // that certain builtin attributes need to be automatically assigned locations. |
582 | // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. |
583 | // This returns k_unknown_location if the location was explicitly assigned with |
584 | // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. |
585 | uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; |
586 | |
587 | // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. |
588 | // Constexpr samplers are always assumed to be emitted. |
589 | // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped |
590 | // by remap_constexpr_sampler(_by_binding). |
591 | bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; |
592 | |
593 | // This must only be called after a successful call to CompilerMSL::compile(). |
594 | // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. |
595 | // If the descriptor set was part of an argument buffer, report the [[id(N)]], |
596 | // or [[buffer/texture/sampler]] binding for other resources. |
597 | // If the resource was a combined image sampler, report the image binding here, |
598 | // use the _secondary version of this call to query the sampler half of the resource. |
599 | // If no binding exists, uint32_t(-1) is returned. |
600 | uint32_t get_automatic_msl_resource_binding(uint32_t id) const; |
601 | |
602 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the |
603 | // sampler's binding is returned instead. For any other resource type, -1 is returned. |
604 | // Secondary bindings are also used for the auxillary image atomic buffer. |
605 | uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; |
606 | |
607 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, |
608 | // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. |
609 | uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; |
610 | |
611 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, |
612 | // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. |
613 | uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; |
614 | |
615 | // Compiles the SPIR-V code into Metal Shading Language. |
616 | std::string compile() override; |
617 | |
618 | // Remap a sampler with ID to a constexpr sampler. |
619 | // Older iOS targets must use constexpr samplers in certain cases (PCF), |
620 | // so a static sampler must be used. |
621 | // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. |
622 | // This can be used on both combined image/samplers (sampler2D) or standalone samplers. |
623 | // The remapped sampler must not be an array of samplers. |
624 | // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. |
625 | void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); |
626 | |
627 | // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. |
628 | // Remaps based on ID take priority over set/binding remaps. |
629 | void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); |
630 | |
631 | // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect |
632 | // to use for a particular location. The default is 4 if number of components is not overridden. |
633 | void set_fragment_output_components(uint32_t location, uint32_t components); |
634 | |
635 | void set_combined_sampler_suffix(const char *suffix); |
636 | const char *get_combined_sampler_suffix() const; |
637 | |
638 | protected: |
639 | // An enum of SPIR-V functions that are implemented in additional |
640 | // source code that is added to the shader if necessary. |
641 | enum SPVFuncImpl : uint8_t |
642 | { |
643 | SPVFuncImplNone, |
644 | SPVFuncImplMod, |
645 | SPVFuncImplRadians, |
646 | SPVFuncImplDegrees, |
647 | SPVFuncImplFindILsb, |
648 | SPVFuncImplFindSMsb, |
649 | SPVFuncImplFindUMsb, |
650 | SPVFuncImplSSign, |
651 | SPVFuncImplArrayCopyMultidimBase, |
652 | // Unfortunately, we cannot use recursive templates in the MSL compiler properly, |
653 | // so stamp out variants up to some arbitrary maximum. |
654 | SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1, |
655 | SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2, |
656 | SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3, |
657 | SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4, |
658 | SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, |
659 | SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, |
660 | SPVFuncImplTexelBufferCoords, |
661 | SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations |
662 | SPVFuncImplFMul, |
663 | SPVFuncImplFAdd, |
664 | SPVFuncImplFSub, |
665 | SPVFuncImplQuantizeToF16, |
666 | SPVFuncImplCubemapTo2DArrayFace, |
667 | SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type |
668 | SPVFuncImplInverse4x4, |
669 | SPVFuncImplInverse3x3, |
670 | SPVFuncImplInverse2x2, |
671 | // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's |
672 | // emitted before them. |
673 | SPVFuncImplForwardArgs, |
674 | // Likewise, this must come before *Swizzle. |
675 | SPVFuncImplGetSwizzle, |
676 | SPVFuncImplTextureSwizzle, |
677 | SPVFuncImplGatherSwizzle, |
678 | SPVFuncImplGatherCompareSwizzle, |
679 | SPVFuncImplSubgroupBroadcast, |
680 | SPVFuncImplSubgroupBroadcastFirst, |
681 | SPVFuncImplSubgroupBallot, |
682 | , |
683 | SPVFuncImplSubgroupBallotFindLSB, |
684 | SPVFuncImplSubgroupBallotFindMSB, |
685 | SPVFuncImplSubgroupBallotBitCount, |
686 | SPVFuncImplSubgroupAllEqual, |
687 | SPVFuncImplSubgroupShuffle, |
688 | SPVFuncImplSubgroupShuffleXor, |
689 | SPVFuncImplSubgroupShuffleUp, |
690 | SPVFuncImplSubgroupShuffleDown, |
691 | SPVFuncImplQuadBroadcast, |
692 | SPVFuncImplQuadSwap, |
693 | SPVFuncImplReflectScalar, |
694 | SPVFuncImplRefractScalar, |
695 | SPVFuncImplFaceForwardScalar, |
696 | SPVFuncImplChromaReconstructNearest2Plane, |
697 | SPVFuncImplChromaReconstructNearest3Plane, |
698 | SPVFuncImplChromaReconstructLinear422CositedEven2Plane, |
699 | SPVFuncImplChromaReconstructLinear422CositedEven3Plane, |
700 | SPVFuncImplChromaReconstructLinear422Midpoint2Plane, |
701 | SPVFuncImplChromaReconstructLinear422Midpoint3Plane, |
702 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, |
703 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, |
704 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, |
705 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, |
706 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, |
707 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, |
708 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, |
709 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, |
710 | SPVFuncImplExpandITUFullRange, |
711 | SPVFuncImplExpandITUNarrowRange, |
712 | SPVFuncImplConvertYCbCrBT709, |
713 | SPVFuncImplConvertYCbCrBT601, |
714 | SPVFuncImplConvertYCbCrBT2020, |
715 | SPVFuncImplDynamicImageSampler, |
716 | }; |
717 | |
718 | // If the underlying resource has been used for comparison then duplicate loads of that resource must be too |
719 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
720 | void emit_texture_op(const Instruction &i, bool sparse) override; |
721 | void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
722 | void emit_instruction(const Instruction &instr) override; |
723 | void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, |
724 | uint32_t count) override; |
725 | void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, |
726 | const uint32_t *args, uint32_t count) override; |
727 | void () override; |
728 | void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; |
729 | void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; |
730 | void emit_subgroup_op(const Instruction &i) override; |
731 | std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, |
732 | SmallVector<uint32_t> &inherited_expressions) override; |
733 | void emit_fixup() override; |
734 | std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
735 | const std::string &qualifier = "" ); |
736 | void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
737 | const std::string &qualifier = "" , uint32_t base_offset = 0) override; |
738 | void emit_struct_padding_target(const SPIRType &type) override; |
739 | std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; |
740 | void emit_block_hints(const SPIRBlock &block) override; |
741 | |
742 | // Allow Metal to use the array<T> template to make arrays a value type |
743 | std::string type_to_array_glsl(const SPIRType &type) override; |
744 | std::string constant_op_expression(const SPIRConstantOp &cop) override; |
745 | |
746 | // Threadgroup arrays can't have a wrapper type |
747 | std::string variable_decl(const SPIRVariable &variable) override; |
748 | |
749 | bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; |
750 | |
751 | // GCC workaround of lambdas calling protected functions (for older GCC versions) |
752 | std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; |
753 | |
754 | std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; |
755 | std::string sampler_type(const SPIRType &type, uint32_t id); |
756 | std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; |
757 | std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; |
758 | std::string to_name(uint32_t id, bool allow_alias = true) const override; |
759 | std::string to_function_name(const TextureFunctionNameArguments &args) override; |
760 | std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; |
761 | std::string to_initializer_expression(const SPIRVariable &var) override; |
762 | std::string to_zero_initialized_expression(uint32_t type_id) override; |
763 | |
764 | std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, |
765 | bool is_packed, bool row_major) override; |
766 | |
767 | // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. |
768 | bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; |
769 | |
770 | std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; |
771 | bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; |
772 | bool skip_argument(uint32_t id) const override; |
773 | std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; |
774 | std::string to_qualifiers_glsl(uint32_t id) override; |
775 | void replace_illegal_names() override; |
776 | void declare_undefined_values() override; |
777 | void declare_constant_arrays(); |
778 | |
779 | void replace_illegal_entry_point_names(); |
780 | void sync_entry_point_aliases_and_names(); |
781 | |
782 | static const std::unordered_set<std::string> &get_reserved_keyword_set(); |
783 | static const std::unordered_set<std::string> &get_illegal_func_names(); |
784 | |
785 | // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries |
786 | void declare_complex_constant_arrays(); |
787 | |
788 | bool is_patch_block(const SPIRType &type); |
789 | bool is_non_native_row_major_matrix(uint32_t id) override; |
790 | bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; |
791 | std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, |
792 | bool is_packed) override; |
793 | |
794 | void preprocess_op_codes(); |
795 | void localize_global_variables(); |
796 | void (); |
797 | void mark_packable_structs(); |
798 | void mark_as_packable(SPIRType &type); |
799 | |
800 | std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars; |
801 | void (uint32_t func_id, std::set<uint32_t> &added_arg_ids, |
802 | std::unordered_set<uint32_t> &global_var_ids, |
803 | std::unordered_set<uint32_t> &processed_func_ids); |
804 | uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); |
805 | uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); |
806 | |
807 | struct InterfaceBlockMeta |
808 | { |
809 | struct LocationMeta |
810 | { |
811 | uint32_t base_type_id = 0; |
812 | uint32_t num_components = 0; |
813 | bool flat = false; |
814 | bool noperspective = false; |
815 | bool centroid = false; |
816 | bool sample = false; |
817 | }; |
818 | std::unordered_map<uint32_t, LocationMeta> location_meta; |
819 | bool strip_array = false; |
820 | bool allow_local_declaration = false; |
821 | }; |
822 | |
823 | std::string to_tesc_invocation_id(); |
824 | void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); |
825 | void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, |
826 | SPIRVariable &var, InterfaceBlockMeta &meta); |
827 | void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
828 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
829 | void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
830 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
831 | bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
832 | SPIRVariable &var, const SPIRType &type, |
833 | InterfaceBlockMeta &meta); |
834 | void add_plain_member_variable_to_interface_block(spv::StorageClass storage, |
835 | const std::string &ib_var_ref, SPIRType &ib_type, |
836 | SPIRVariable &var, SPIRType &var_type, |
837 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
838 | const std::string &mbr_name_qual, |
839 | const std::string &var_chain_qual, |
840 | uint32_t &location, uint32_t &var_mbr_idx); |
841 | void add_composite_member_variable_to_interface_block(spv::StorageClass storage, |
842 | const std::string &ib_var_ref, SPIRType &ib_type, |
843 | SPIRVariable &var, SPIRType &var_type, |
844 | uint32_t mbr_idx, InterfaceBlockMeta &meta, |
845 | const std::string &mbr_name_qual, |
846 | const std::string &var_chain_qual, |
847 | uint32_t &location, uint32_t &var_mbr_idx); |
848 | void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); |
849 | |
850 | void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); |
851 | |
852 | void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, |
853 | spv::StorageClass storage, bool fallback = false); |
854 | uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); |
855 | uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, |
856 | uint32_t num_components, bool strip_array); |
857 | |
858 | void emit_custom_templates(); |
859 | void emit_custom_functions(); |
860 | void emit_resources(); |
861 | void emit_specialization_constants_and_structs(); |
862 | void emit_interface_block(uint32_t ib_var_id); |
863 | bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); |
864 | uint32_t get_resource_array_size(uint32_t id) const; |
865 | |
866 | void fix_up_shader_inputs_outputs(); |
867 | |
868 | std::string func_type_decl(SPIRType &type); |
869 | std::string entry_point_args_classic(bool append_comma); |
870 | std::string entry_point_args_argument_buffer(bool append_comma); |
871 | std::string entry_point_arg_stage_in(); |
872 | void entry_point_args_builtin(std::string &args); |
873 | void entry_point_args_discrete_descriptors(std::string &args); |
874 | std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index); |
875 | std::string ensure_valid_name(std::string name, std::string pfx); |
876 | std::string to_sampler_expression(uint32_t id); |
877 | std::string to_swizzle_expression(uint32_t id); |
878 | std::string to_buffer_size_expression(uint32_t id); |
879 | bool is_sample_rate() const; |
880 | bool is_intersection_query() const; |
881 | bool is_direct_input_builtin(spv::BuiltIn builtin); |
882 | std::string builtin_qualifier(spv::BuiltIn builtin); |
883 | std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); |
884 | std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); |
885 | std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); |
886 | std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); |
887 | std::string argument_decl(const SPIRFunction::Parameter &arg); |
888 | const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; |
889 | std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); |
890 | uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); |
891 | uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; |
892 | uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, |
893 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
894 | |
895 | uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; |
896 | |
897 | // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. |
898 | // These values can change depending on various extended decorations which control packing rules. |
899 | // We need to make these rules match up with SPIR-V declared rules. |
900 | uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; |
901 | uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
902 | uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
903 | uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; |
904 | |
905 | uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; |
906 | uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
907 | uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
908 | uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
909 | |
910 | uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; |
911 | uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
912 | uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
913 | uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
914 | |
915 | const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; |
916 | SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; |
917 | |
918 | uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, |
919 | bool ignore_padding = false) const; |
920 | |
921 | std::string to_component_argument(uint32_t id); |
922 | void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs); |
923 | void mark_scalar_layout_structs(const SPIRType &ib_type); |
924 | void mark_struct_members_packed(const SPIRType &type); |
925 | void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); |
926 | bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; |
927 | std::string get_argument_address_space(const SPIRVariable &argument); |
928 | std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); |
929 | const char *to_restrict(uint32_t id, bool space = true); |
930 | SPIRType &get_stage_in_struct_type(); |
931 | SPIRType &get_stage_out_struct_type(); |
932 | SPIRType &get_patch_stage_in_struct_type(); |
933 | SPIRType &get_patch_stage_out_struct_type(); |
934 | std::string get_tess_factor_struct_name(); |
935 | SPIRType &get_uint_type(); |
936 | uint32_t get_uint_type_id(); |
937 | void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode, |
938 | uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, |
939 | bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); |
940 | const char *get_memory_order(uint32_t spv_mem_sem); |
941 | void add_pragma_line(const std::string &line); |
942 | void add_typedef_line(const std::string &line); |
943 | void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); |
944 | void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, |
945 | spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; |
946 | void build_implicit_builtins(); |
947 | uint32_t build_constant_uint_array_pointer(); |
948 | void emit_entry_point_declarations() override; |
949 | bool uses_explicit_early_fragment_test(); |
950 | |
951 | uint32_t builtin_frag_coord_id = 0; |
952 | uint32_t builtin_sample_id_id = 0; |
953 | uint32_t builtin_sample_mask_id = 0; |
954 | uint32_t builtin_vertex_idx_id = 0; |
955 | uint32_t builtin_base_vertex_id = 0; |
956 | uint32_t builtin_instance_idx_id = 0; |
957 | uint32_t builtin_base_instance_id = 0; |
958 | uint32_t builtin_view_idx_id = 0; |
959 | uint32_t builtin_layer_id = 0; |
960 | uint32_t builtin_invocation_id_id = 0; |
961 | uint32_t builtin_primitive_id_id = 0; |
962 | uint32_t builtin_subgroup_invocation_id_id = 0; |
963 | uint32_t builtin_subgroup_size_id = 0; |
964 | uint32_t builtin_dispatch_base_id = 0; |
965 | uint32_t builtin_stage_input_size_id = 0; |
966 | uint32_t builtin_local_invocation_index_id = 0; |
967 | uint32_t builtin_workgroup_size_id = 0; |
968 | uint32_t swizzle_buffer_id = 0; |
969 | uint32_t buffer_size_buffer_id = 0; |
970 | uint32_t view_mask_buffer_id = 0; |
971 | uint32_t dynamic_offsets_buffer_id = 0; |
972 | uint32_t uint_type_id = 0; |
973 | uint32_t argument_buffer_padding_buffer_type_id = 0; |
974 | uint32_t argument_buffer_padding_image_type_id = 0; |
975 | uint32_t argument_buffer_padding_sampler_type_id = 0; |
976 | |
977 | bool does_shader_write_sample_mask = false; |
978 | |
979 | void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; |
980 | void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; |
981 | void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; |
982 | |
983 | void analyze_sampled_image_usage(); |
984 | |
985 | bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; |
986 | void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, |
987 | bool &is_packed) override; |
988 | void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); |
989 | bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); |
990 | bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); |
991 | bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); |
992 | |
993 | void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); |
994 | |
995 | void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); |
996 | |
997 | std::string convert_to_f32(const std::string &expr, uint32_t components); |
998 | |
999 | Options msl_options; |
1000 | std::set<SPVFuncImpl> spv_function_implementations; |
1001 | // Must be ordered to ensure declarations are in a specific order. |
1002 | std::map<LocationComponentPair, MSLShaderInput> inputs_by_location; |
1003 | std::unordered_map<uint32_t, MSLShaderInput> inputs_by_builtin; |
1004 | std::unordered_set<uint32_t> location_inputs_in_use; |
1005 | std::unordered_set<uint32_t> location_inputs_in_use_fallback; |
1006 | std::unordered_map<uint32_t, uint32_t> fragment_output_components; |
1007 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location; |
1008 | std::set<std::string> pragma_lines; |
1009 | std::set<std::string> typedef_lines; |
1010 | SmallVector<uint32_t> vars_needing_early_declaration; |
1011 | |
1012 | std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings; |
1013 | std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number; |
1014 | |
1015 | uint32_t next_metal_resource_index_buffer = 0; |
1016 | uint32_t next_metal_resource_index_texture = 0; |
1017 | uint32_t next_metal_resource_index_sampler = 0; |
1018 | // Intentionally uninitialized, works around MSVC 2013 bug. |
1019 | uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; |
1020 | |
1021 | VariableID stage_in_var_id = 0; |
1022 | VariableID stage_out_var_id = 0; |
1023 | VariableID patch_stage_in_var_id = 0; |
1024 | VariableID patch_stage_out_var_id = 0; |
1025 | VariableID stage_in_ptr_var_id = 0; |
1026 | VariableID stage_out_ptr_var_id = 0; |
1027 | VariableID stage_out_masked_builtin_type_id = 0; |
1028 | |
1029 | // Handle HLSL-style 0-based vertex/instance index. |
1030 | enum class TriState |
1031 | { |
1032 | Neutral, |
1033 | No, |
1034 | Yes |
1035 | }; |
1036 | TriState needs_base_vertex_arg = TriState::Neutral; |
1037 | TriState needs_base_instance_arg = TriState::Neutral; |
1038 | |
1039 | bool has_sampled_images = false; |
1040 | bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. |
1041 | |
1042 | bool is_using_builtin_array = false; // Force the use of C style array declaration. |
1043 | bool using_builtin_array() const; |
1044 | |
1045 | bool is_rasterization_disabled = false; |
1046 | bool capture_output_to_buffer = false; |
1047 | bool needs_swizzle_buffer_def = false; |
1048 | bool used_swizzle_buffer = false; |
1049 | bool added_builtin_tess_level = false; |
1050 | bool needs_subgroup_invocation_id = false; |
1051 | bool needs_subgroup_size = false; |
1052 | bool needs_sample_id = false; |
1053 | std::string qual_pos_var_name; |
1054 | std::string stage_in_var_name = "in" ; |
1055 | std::string stage_out_var_name = "out" ; |
1056 | std::string patch_stage_in_var_name = "patchIn" ; |
1057 | std::string patch_stage_out_var_name = "patchOut" ; |
1058 | std::string sampler_name_suffix = "Smplr" ; |
1059 | std::string swizzle_name_suffix = "Swzl" ; |
1060 | std::string buffer_size_name_suffix = "BufferSize" ; |
1061 | std::string plane_name_suffix = "Plane" ; |
1062 | std::string input_wg_var_name = "gl_in" ; |
1063 | std::string input_buffer_var_name = "spvIn" ; |
1064 | std::string output_buffer_var_name = "spvOut" ; |
1065 | std::string patch_output_buffer_var_name = "spvPatchOut" ; |
1066 | std::string tess_factor_buffer_var_name = "spvTessLevel" ; |
1067 | std::string index_buffer_var_name = "spvIndices" ; |
1068 | spv::Op previous_instruction_opcode = spv::OpNop; |
1069 | |
1070 | // Must be ordered since declaration is in a specific order. |
1071 | std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id; |
1072 | std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding; |
1073 | const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; |
1074 | |
1075 | std::unordered_set<uint32_t> buffers_requiring_array_length; |
1076 | SmallVector<uint32_t> buffer_arrays; |
1077 | std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations |
1078 | std::unordered_set<uint32_t> pull_model_inputs; |
1079 | |
1080 | // Must be ordered since array is in a specific order. |
1081 | std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; |
1082 | |
1083 | SmallVector<uint32_t> disabled_frag_outputs; |
1084 | |
1085 | std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks; |
1086 | |
1087 | uint32_t argument_buffer_ids[kMaxArgumentBuffers]; |
1088 | uint32_t argument_buffer_discrete_mask = 0; |
1089 | uint32_t argument_buffer_device_storage_mask = 0; |
1090 | |
1091 | void analyze_argument_buffers(); |
1092 | bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; |
1093 | MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); |
1094 | void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1095 | void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1096 | void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1097 | void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); |
1098 | |
1099 | uint32_t get_target_components_for_fragment_location(uint32_t location) const; |
1100 | uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, |
1101 | SPIRType::BaseType basetype = SPIRType::Unknown); |
1102 | uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); |
1103 | |
1104 | bool suppress_missing_prototypes = false; |
1105 | |
1106 | void add_spv_func_and_recompile(SPVFuncImpl spv_func); |
1107 | |
1108 | void activate_argument_buffer_resources(); |
1109 | |
1110 | bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; |
1111 | bool type_is_pointer(const SPIRType &type) const; |
1112 | bool type_is_pointer_to_pointer(const SPIRType &type) const; |
1113 | bool is_supported_argument_buffer_type(const SPIRType &type) const; |
1114 | |
1115 | bool variable_storage_requires_stage_io(spv::StorageClass storage) const; |
1116 | |
1117 | bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; } |
1118 | std::string additional_fixed_sample_mask_str() const; |
1119 | |
1120 | // OpcodeHandler that handles several MSL preprocessing operations. |
1121 | struct OpCodePreprocessor : OpcodeHandler |
1122 | { |
1123 | OpCodePreprocessor(CompilerMSL &compiler_) |
1124 | : compiler(compiler_) |
1125 | { |
1126 | } |
1127 | |
1128 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; |
1129 | CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); |
1130 | void check_resource_write(uint32_t var_id); |
1131 | |
1132 | CompilerMSL &compiler; |
1133 | std::unordered_map<uint32_t, uint32_t> result_types; |
1134 | std::unordered_map<uint32_t, uint32_t> image_pointers; // Emulate texture2D atomic operations |
1135 | bool suppress_missing_prototypes = false; |
1136 | bool uses_atomics = false; |
1137 | bool uses_resource_write = false; |
1138 | bool needs_subgroup_invocation_id = false; |
1139 | bool needs_subgroup_size = false; |
1140 | bool needs_sample_id = false; |
1141 | }; |
1142 | |
1143 | // OpcodeHandler that scans for uses of sampled images |
1144 | struct SampledImageScanner : OpcodeHandler |
1145 | { |
1146 | SampledImageScanner(CompilerMSL &compiler_) |
1147 | : compiler(compiler_) |
1148 | { |
1149 | } |
1150 | |
1151 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; |
1152 | |
1153 | CompilerMSL &compiler; |
1154 | }; |
1155 | |
1156 | // Sorts the members of a SPIRType and associated Meta info based on a settable sorting |
1157 | // aspect, which defines which aspect of the struct members will be used to sort them. |
1158 | // Regardless of the sorting aspect, built-in members always appear at the end of the struct. |
1159 | struct MemberSorter |
1160 | { |
1161 | enum SortAspect |
1162 | { |
1163 | LocationThenBuiltInType, |
1164 | Offset |
1165 | }; |
1166 | |
1167 | void sort(); |
1168 | bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); |
1169 | MemberSorter(SPIRType &t, Meta &m, SortAspect sa); |
1170 | |
1171 | SPIRType &type; |
1172 | Meta &meta; |
1173 | SortAspect sort_aspect; |
1174 | }; |
1175 | }; |
1176 | } // namespace SPIRV_CROSS_NAMESPACE |
1177 | |
1178 | #endif |
1179 | |