1 | //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SystemZTargetMachine.h" |
10 | #include "MCTargetDesc/SystemZMCTargetDesc.h" |
11 | #include "SystemZ.h" |
12 | #include "SystemZMachineFunctionInfo.h" |
13 | #include "SystemZMachineScheduler.h" |
14 | #include "SystemZTargetObjectFile.h" |
15 | #include "SystemZTargetTransformInfo.h" |
16 | #include "TargetInfo/SystemZTargetInfo.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Analysis/TargetTransformInfo.h" |
19 | #include "llvm/CodeGen/Passes.h" |
20 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
21 | #include "llvm/CodeGen/TargetPassConfig.h" |
22 | #include "llvm/IR/DataLayout.h" |
23 | #include "llvm/MC/TargetRegistry.h" |
24 | #include "llvm/Support/CodeGen.h" |
25 | #include "llvm/Target/TargetLoweringObjectFile.h" |
26 | #include "llvm/Transforms/Scalar.h" |
27 | #include <memory> |
28 | #include <optional> |
29 | #include <string> |
30 | |
31 | using namespace llvm; |
32 | |
33 | // NOLINTNEXTLINE(readability-identifier-naming) |
34 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() { |
35 | // Register the target. |
36 | RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); |
37 | auto &PR = *PassRegistry::getPassRegistry(); |
38 | initializeSystemZElimComparePass(PR); |
39 | initializeSystemZShortenInstPass(PR); |
40 | initializeSystemZLongBranchPass(PR); |
41 | initializeSystemZLDCleanupPass(PR); |
42 | initializeSystemZShortenInstPass(PR); |
43 | initializeSystemZPostRewritePass(PR); |
44 | initializeSystemZTDCPassPass(PR); |
45 | initializeSystemZDAGToDAGISelPass(PR); |
46 | } |
47 | |
48 | static std::string computeDataLayout(const Triple &TT) { |
49 | std::string Ret; |
50 | |
51 | // Big endian. |
52 | Ret += "E" ; |
53 | |
54 | // Data mangling. |
55 | Ret += DataLayout::getManglingComponent(T: TT); |
56 | |
57 | // Make sure that global data has at least 16 bits of alignment by |
58 | // default, so that we can refer to it using LARL. We don't have any |
59 | // special requirements for stack variables though. |
60 | Ret += "-i1:8:16-i8:8:16" ; |
61 | |
62 | // 64-bit integers are naturally aligned. |
63 | Ret += "-i64:64" ; |
64 | |
65 | // 128-bit floats are aligned only to 64 bits. |
66 | Ret += "-f128:64" ; |
67 | |
68 | // The DataLayout string always holds a vector alignment of 64 bits, see |
69 | // comment in clang/lib/Basic/Targets/SystemZ.h. |
70 | Ret += "-v128:64" ; |
71 | |
72 | // We prefer 16 bits of aligned for all globals; see above. |
73 | Ret += "-a:8:16" ; |
74 | |
75 | // Integer registers are 32 or 64 bits. |
76 | Ret += "-n32:64" ; |
77 | |
78 | return Ret; |
79 | } |
80 | |
81 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
82 | if (TT.isOSzOS()) |
83 | return std::make_unique<TargetLoweringObjectFileGOFF>(); |
84 | |
85 | // Note: Some times run with -triple s390x-unknown. |
86 | // In this case, default to ELF unless z/OS specifically provided. |
87 | return std::make_unique<SystemZELFTargetObjectFile>(); |
88 | } |
89 | |
90 | static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { |
91 | // Static code is suitable for use in a dynamic executable; there is no |
92 | // separate DynamicNoPIC model. |
93 | if (!RM || *RM == Reloc::DynamicNoPIC) |
94 | return Reloc::Static; |
95 | return *RM; |
96 | } |
97 | |
98 | // For SystemZ we define the models as follows: |
99 | // |
100 | // Small: BRASL can call any function and will use a stub if necessary. |
101 | // Locally-binding symbols will always be in range of LARL. |
102 | // |
103 | // Medium: BRASL can call any function and will use a stub if necessary. |
104 | // GOT slots and locally-defined text will always be in range |
105 | // of LARL, but other symbols might not be. |
106 | // |
107 | // Large: Equivalent to Medium for now. |
108 | // |
109 | // Kernel: Equivalent to Medium for now. |
110 | // |
111 | // This means that any PIC module smaller than 4GB meets the |
112 | // requirements of Small, so Small seems like the best default there. |
113 | // |
114 | // All symbols bind locally in a non-PIC module, so the choice is less |
115 | // obvious. There are two cases: |
116 | // |
117 | // - When creating an executable, PLTs and copy relocations allow |
118 | // us to treat external symbols as part of the executable. |
119 | // Any executable smaller than 4GB meets the requirements of Small, |
120 | // so that seems like the best default. |
121 | // |
122 | // - When creating JIT code, stubs will be in range of BRASL if the |
123 | // image is less than 4GB in size. GOT entries will likewise be |
124 | // in range of LARL. However, the JIT environment has no equivalent |
125 | // of copy relocs, so locally-binding data symbols might not be in |
126 | // the range of LARL. We need the Medium model in that case. |
127 | static CodeModel::Model |
128 | getEffectiveSystemZCodeModel(std::optional<CodeModel::Model> CM, |
129 | Reloc::Model RM, bool JIT) { |
130 | if (CM) { |
131 | if (*CM == CodeModel::Tiny) |
132 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
133 | if (*CM == CodeModel::Kernel) |
134 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
135 | return *CM; |
136 | } |
137 | if (JIT) |
138 | return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; |
139 | return CodeModel::Small; |
140 | } |
141 | |
142 | SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, |
143 | StringRef CPU, StringRef FS, |
144 | const TargetOptions &Options, |
145 | std::optional<Reloc::Model> RM, |
146 | std::optional<CodeModel::Model> CM, |
147 | CodeGenOptLevel OL, bool JIT) |
148 | : LLVMTargetMachine( |
149 | T, computeDataLayout(TT), TT, CPU, FS, Options, |
150 | getEffectiveRelocModel(RM), |
151 | getEffectiveSystemZCodeModel(CM, RM: getEffectiveRelocModel(RM), JIT), |
152 | OL), |
153 | TLOF(createTLOF(TT: getTargetTriple())) { |
154 | initAsmInfo(); |
155 | } |
156 | |
157 | SystemZTargetMachine::~SystemZTargetMachine() = default; |
158 | |
159 | const SystemZSubtarget * |
160 | SystemZTargetMachine::getSubtargetImpl(const Function &F) const { |
161 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
162 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
163 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
164 | |
165 | std::string CPU = |
166 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
167 | std::string TuneCPU = |
168 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
169 | std::string FS = |
170 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
171 | |
172 | // FIXME: This is related to the code below to reset the target options, |
173 | // we need to know whether the soft float and backchain flags are set on the |
174 | // function, so we can enable them as subtarget features. |
175 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
176 | if (SoftFloat) |
177 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
178 | bool BackChain = F.hasFnAttribute(Kind: "backchain" ); |
179 | if (BackChain) |
180 | FS += FS.empty() ? "+backchain" : ",+backchain" ; |
181 | |
182 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
183 | if (!I) { |
184 | // This needs to be done before we create a new subtarget since any |
185 | // creation will depend on the TM and the code generation flags on the |
186 | // function that reside in TargetOptions. |
187 | resetTargetOptions(F); |
188 | I = std::make_unique<SystemZSubtarget>(args: TargetTriple, args&: CPU, args&: TuneCPU, args&: FS, |
189 | args: *this); |
190 | } |
191 | |
192 | return I.get(); |
193 | } |
194 | |
195 | namespace { |
196 | |
197 | /// SystemZ Code Generator Pass Configuration Options. |
198 | class SystemZPassConfig : public TargetPassConfig { |
199 | public: |
200 | SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) |
201 | : TargetPassConfig(TM, PM) {} |
202 | |
203 | SystemZTargetMachine &getSystemZTargetMachine() const { |
204 | return getTM<SystemZTargetMachine>(); |
205 | } |
206 | |
207 | ScheduleDAGInstrs * |
208 | createPostMachineScheduler(MachineSchedContext *C) const override { |
209 | return new ScheduleDAGMI(C, |
210 | std::make_unique<SystemZPostRASchedStrategy>(args&: C), |
211 | /*RemoveKillFlags=*/true); |
212 | } |
213 | |
214 | void addIRPasses() override; |
215 | bool addInstSelector() override; |
216 | bool addILPOpts() override; |
217 | void addPreRegAlloc() override; |
218 | void addPostRewrite() override; |
219 | void addPostRegAlloc() override; |
220 | void addPreSched2() override; |
221 | void addPreEmitPass() override; |
222 | }; |
223 | |
224 | } // end anonymous namespace |
225 | |
226 | void SystemZPassConfig::addIRPasses() { |
227 | if (getOptLevel() != CodeGenOptLevel::None) { |
228 | addPass(P: createSystemZTDCPass()); |
229 | addPass(P: createLoopDataPrefetchPass()); |
230 | } |
231 | |
232 | addPass(P: createAtomicExpandLegacyPass()); |
233 | |
234 | TargetPassConfig::addIRPasses(); |
235 | } |
236 | |
237 | bool SystemZPassConfig::addInstSelector() { |
238 | addPass(P: createSystemZISelDag(TM&: getSystemZTargetMachine(), OptLevel: getOptLevel())); |
239 | |
240 | if (getOptLevel() != CodeGenOptLevel::None) |
241 | addPass(P: createSystemZLDCleanupPass(TM&: getSystemZTargetMachine())); |
242 | |
243 | return false; |
244 | } |
245 | |
246 | bool SystemZPassConfig::addILPOpts() { |
247 | addPass(PassID: &EarlyIfConverterID); |
248 | return true; |
249 | } |
250 | |
251 | void SystemZPassConfig::addPreRegAlloc() { |
252 | addPass(P: createSystemZCopyPhysRegsPass(TM&: getSystemZTargetMachine())); |
253 | } |
254 | |
255 | void SystemZPassConfig::addPostRewrite() { |
256 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
257 | } |
258 | |
259 | void SystemZPassConfig::addPostRegAlloc() { |
260 | // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() |
261 | // is not called). |
262 | if (getOptLevel() == CodeGenOptLevel::None) |
263 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
264 | } |
265 | |
266 | void SystemZPassConfig::addPreSched2() { |
267 | if (getOptLevel() != CodeGenOptLevel::None) |
268 | addPass(PassID: &IfConverterID); |
269 | } |
270 | |
271 | void SystemZPassConfig::addPreEmitPass() { |
272 | // Do instruction shortening before compare elimination because some |
273 | // vector instructions will be shortened into opcodes that compare |
274 | // elimination recognizes. |
275 | if (getOptLevel() != CodeGenOptLevel::None) |
276 | addPass(P: createSystemZShortenInstPass(TM&: getSystemZTargetMachine())); |
277 | |
278 | // We eliminate comparisons here rather than earlier because some |
279 | // transformations can change the set of available CC values and we |
280 | // generally want those transformations to have priority. This is |
281 | // especially true in the commonest case where the result of the comparison |
282 | // is used by a single in-range branch instruction, since we will then |
283 | // be able to fuse the compare and the branch instead. |
284 | // |
285 | // For example, two-address NILF can sometimes be converted into |
286 | // three-address RISBLG. NILF produces a CC value that indicates whether |
287 | // the low word is zero, but RISBLG does not modify CC at all. On the |
288 | // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. |
289 | // The CC value produced by NILL isn't useful for our purposes, but the |
290 | // value produced by RISBG can be used for any comparison with zero |
291 | // (not just equality). So there are some transformations that lose |
292 | // CC values (while still being worthwhile) and others that happen to make |
293 | // the CC result more useful than it was originally. |
294 | // |
295 | // Another reason is that we only want to use BRANCH ON COUNT in cases |
296 | // where we know that the count register is not going to be spilled. |
297 | // |
298 | // Doing it so late makes it more likely that a register will be reused |
299 | // between the comparison and the branch, but it isn't clear whether |
300 | // preventing that would be a win or not. |
301 | if (getOptLevel() != CodeGenOptLevel::None) |
302 | addPass(P: createSystemZElimComparePass(TM&: getSystemZTargetMachine())); |
303 | addPass(P: createSystemZLongBranchPass(TM&: getSystemZTargetMachine())); |
304 | |
305 | // Do final scheduling after all other optimizations, to get an |
306 | // optimal input for the decoder (branch relaxation must happen |
307 | // after block placement). |
308 | if (getOptLevel() != CodeGenOptLevel::None) |
309 | addPass(PassID: &PostMachineSchedulerID); |
310 | } |
311 | |
312 | TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { |
313 | return new SystemZPassConfig(*this, PM); |
314 | } |
315 | |
316 | TargetTransformInfo |
317 | SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { |
318 | return TargetTransformInfo(SystemZTTIImpl(this, F)); |
319 | } |
320 | |
321 | MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( |
322 | BumpPtrAllocator &Allocator, const Function &F, |
323 | const TargetSubtargetInfo *STI) const { |
324 | return SystemZMachineFunctionInfo::create<SystemZMachineFunctionInfo>( |
325 | Allocator, F, STI); |
326 | } |
327 | |