1 | //===- Passes.h - Sparse tensor pipeline entry points -----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This header file defines prototypes of all sparse tensor pipelines. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ |
14 | #define MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ |
15 | |
16 | #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h" |
17 | #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" |
18 | #include "mlir/Pass/PassOptions.h" |
19 | |
20 | using namespace mlir::detail; |
21 | using namespace llvm::cl; |
22 | |
23 | namespace mlir { |
24 | namespace sparse_tensor { |
25 | |
26 | /// Options for the "sparsifier" pipeline. So far this only contains |
27 | /// a subset of the options that can be set for the underlying passes, |
28 | /// because it must be manually kept in sync with the tablegen files |
29 | /// for those passes. |
30 | struct SparsifierOptions : public PassPipelineOptions<SparsifierOptions> { |
31 | // These options must be kept in sync with `SparsificationBase`. |
32 | // TODO(57514): These options are duplicated in Passes.td. |
33 | PassOptions::Option<mlir::SparseParallelizationStrategy> parallelization{ |
34 | *this, "parallelization-strategy" , |
35 | ::llvm::cl::desc("Set the parallelization strategy" ), |
36 | ::llvm::cl::init(Val: mlir::SparseParallelizationStrategy::kNone), |
37 | llvm::cl::values( |
38 | clEnumValN(mlir::SparseParallelizationStrategy::kNone, "none" , |
39 | "Turn off sparse parallelization." ), |
40 | clEnumValN(mlir::SparseParallelizationStrategy::kDenseOuterLoop, |
41 | "dense-outer-loop" , |
42 | "Enable dense outer loop sparse parallelization." ), |
43 | clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageOuterLoop, |
44 | "any-storage-outer-loop" , |
45 | "Enable sparse parallelization regardless of storage for " |
46 | "the outer loop." ), |
47 | clEnumValN(mlir::SparseParallelizationStrategy::kDenseAnyLoop, |
48 | "dense-any-loop" , |
49 | "Enable dense parallelization for any loop." ), |
50 | clEnumValN( |
51 | mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop, |
52 | "any-storage-any-loop" , |
53 | "Enable sparse parallelization for any storage and loop." ))}; |
54 | |
55 | PassOptions::Option<bool> enableRuntimeLibrary{ |
56 | *this, "enable-runtime-library" , |
57 | desc("Enable runtime library for manipulating sparse tensors" ), |
58 | init(Val: true)}; |
59 | |
60 | PassOptions::Option<bool> testBufferizationAnalysisOnly{ |
61 | *this, "test-bufferization-analysis-only" , |
62 | desc("Run only the inplacability analysis" ), init(Val: false)}; |
63 | |
64 | PassOptions::Option<bool> enableBufferInitialization{ |
65 | *this, "enable-buffer-initialization" , |
66 | desc("Enable zero-initialization of memory buffers" ), init(Val: false)}; |
67 | |
68 | // TODO: Delete the option, it should also be false after switching to |
69 | // buffer-deallocation-pass |
70 | PassOptions::Option<bool> createSparseDeallocs{ |
71 | *this, "create-sparse-deallocs" , |
72 | desc("Specify if the temporary buffers created by the sparse " |
73 | "compiler should be deallocated. For compatibility with core " |
74 | "bufferization passes. " |
75 | "This option is only used when enable-runtime-library=false." ), |
76 | init(Val: true)}; |
77 | |
78 | PassOptions::Option<int32_t> vectorLength{ |
79 | *this, "vl" , desc("Set the vector length (0 disables vectorization)" ), |
80 | init(Val: 0)}; |
81 | |
82 | // These options must be kept in sync with the `ConvertVectorToLLVM` |
83 | // (defined in include/mlir/Dialect/SparseTensor/Pipelines/Passes.h). |
84 | PassOptions::Option<bool> reassociateFPReductions{ |
85 | *this, "reassociate-fp-reductions" , |
86 | desc("Allows llvm to reassociate floating-point reductions for speed" ), |
87 | init(Val: false)}; |
88 | PassOptions::Option<bool> force32BitVectorIndices{ |
89 | *this, "enable-index-optimizations" , |
90 | desc("Allows compiler to assume indices fit in 32-bit if that yields " |
91 | "faster code" ), |
92 | init(Val: true)}; |
93 | PassOptions::Option<bool> amx{ |
94 | *this, "enable-amx" , |
95 | desc("Enables the use of AMX dialect while lowering the vector dialect" ), |
96 | init(Val: false)}; |
97 | PassOptions::Option<bool> armNeon{ |
98 | *this, "enable-arm-neon" , |
99 | desc("Enables the use of ArmNeon dialect while lowering the vector " |
100 | "dialect" ), |
101 | init(Val: false)}; |
102 | PassOptions::Option<bool> armSVE{ |
103 | *this, "enable-arm-sve" , |
104 | desc("Enables the use of ArmSVE dialect while lowering the vector " |
105 | "dialect" ), |
106 | init(Val: false)}; |
107 | PassOptions::Option<bool> x86Vector{ |
108 | *this, "enable-x86vector" , |
109 | desc("Enables the use of X86Vector dialect while lowering the vector " |
110 | "dialect" ), |
111 | init(Val: false)}; |
112 | |
113 | /// These options are used to enable GPU code generation. |
114 | PassOptions::Option<std::string> gpuTriple{*this, "gpu-triple" , |
115 | desc("GPU target triple" )}; |
116 | PassOptions::Option<std::string> gpuChip{*this, "gpu-chip" , |
117 | desc("GPU target architecture" )}; |
118 | PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features" , |
119 | desc("GPU target features" )}; |
120 | /// For NVIDIA GPUs there are 3 compilation format options: |
121 | /// 1. `isa`: the compiler generates PTX and the driver JITs the PTX. |
122 | /// 2. `bin`: generates a CUBIN object for `chip=gpuChip`. |
123 | /// 3. `fatbin`: generates a fat binary with a CUBIN object for `gpuChip` and |
124 | /// also embeds the PTX in the fat binary. |
125 | /// Notes: |
126 | /// Option 1 adds a significant runtime performance hit, however, tests are |
127 | /// more likely to pass with this option. |
128 | /// Option 2 is better for execution time as there is no JIT; however, the |
129 | /// program will fail if there's an architecture mismatch between `gpuChip` |
130 | /// and the GPU running the program. |
131 | /// Option 3 is the best compromise between options 1 and 2 as it can JIT in |
132 | /// case of an architecture mismatch between `gpuChip` and the running |
133 | /// architecture. However, it's only possible to JIT to a higher CC than |
134 | /// `gpuChip`. |
135 | PassOptions::Option<std::string> gpuFormat{ |
136 | *this, "gpu-format" , desc("GPU compilation format" ), init(Val: "fatbin" )}; |
137 | |
138 | /// This option is used to enable GPU library generation. |
139 | PassOptions::Option<bool> enableGPULibgen{ |
140 | *this, "enable-gpu-libgen" , |
141 | desc("Enables GPU acceleration by means of direct library calls (like " |
142 | "cuSPARSE)" )}; |
143 | |
144 | /// Projects out the options for `createSparsificationPass`. |
145 | SparsificationOptions sparsificationOptions() const { |
146 | return SparsificationOptions(parallelization, enableRuntimeLibrary); |
147 | } |
148 | |
149 | /// Projects out the options for `createConvertVectorToLLVMPass`. |
150 | ConvertVectorToLLVMPassOptions lowerVectorToLLVMOptions() const { |
151 | ConvertVectorToLLVMPassOptions opts{}; |
152 | opts.reassociateFPReductions = reassociateFPReductions; |
153 | opts.force32BitVectorIndices = force32BitVectorIndices; |
154 | opts.armNeon = armNeon; |
155 | opts.armSVE = armSVE; |
156 | opts.amx = amx; |
157 | opts.x86Vector = x86Vector; |
158 | return opts; |
159 | } |
160 | }; |
161 | |
162 | //===----------------------------------------------------------------------===// |
163 | // Building and Registering. |
164 | //===----------------------------------------------------------------------===// |
165 | |
166 | /// Adds the "sparsifier" pipeline to the `OpPassManager`. This |
167 | /// is the standard pipeline for taking sparsity-agnostic IR using |
168 | /// the sparse-tensor type and lowering it to LLVM IR with concrete |
169 | /// representations and algorithms for sparse tensors. |
170 | void buildSparsifier(OpPassManager &pm, const SparsifierOptions &options); |
171 | |
172 | /// Registers all pipelines for the `sparse_tensor` dialect. At present, |
173 | /// this includes only "sparsifier". |
174 | void registerSparseTensorPipelines(); |
175 | |
176 | } // namespace sparse_tensor |
177 | } // namespace mlir |
178 | |
179 | #endif // MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ |
180 | |