1 | //===-- PPCIntrinsicCall.cpp ----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Helper routines for constructing the FIR dialect of MLIR for PowerPC |
10 | // intrinsics. Extensive use of MLIR interfaces and MLIR's coding style |
11 | // (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this |
12 | // module. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "flang/Optimizer/Builder/PPCIntrinsicCall.h" |
17 | #include "flang/Evaluate/common.h" |
18 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
19 | #include "flang/Optimizer/Builder/MutableBox.h" |
20 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
21 | |
22 | namespace fir { |
23 | |
24 | using PI = PPCIntrinsicLibrary; |
25 | |
26 | // PPC specific intrinsic handlers. |
27 | static constexpr IntrinsicHandler ppcHandlers[]{ |
28 | {"__ppc_mma_assemble_acc" , |
29 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
30 | &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>), |
31 | {{{"acc" , asAddr}, |
32 | {"arg1" , asValue}, |
33 | {"arg2" , asValue}, |
34 | {"arg3" , asValue}, |
35 | {"arg4" , asValue}}}, |
36 | /*isElemental=*/true}, |
37 | {"__ppc_mma_assemble_pair" , |
38 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
39 | &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>), |
40 | {{{"pair" , asAddr}, {"arg1" , asValue}, {"arg2" , asValue}}}, |
41 | /*isElemental=*/true}, |
42 | {"__ppc_mma_build_acc" , |
43 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
44 | &PI::genMmaIntr<MMAOp::AssembleAcc, |
45 | MMAHandlerOp::SubToFuncReverseArgOnLE>), |
46 | {{{"acc" , asAddr}, |
47 | {"arg1" , asValue}, |
48 | {"arg2" , asValue}, |
49 | {"arg3" , asValue}, |
50 | {"arg4" , asValue}}}, |
51 | /*isElemental=*/true}, |
52 | {"__ppc_mma_disassemble_acc" , |
53 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
54 | &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>), |
55 | {{{"data" , asAddr}, {"acc" , asValue}}}, |
56 | /*isElemental=*/true}, |
57 | {"__ppc_mma_disassemble_pair" , |
58 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
59 | &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>), |
60 | {{{"data" , asAddr}, {"pair" , asValue}}}, |
61 | /*isElemental=*/true}, |
62 | {"__ppc_mma_pmxvbf16ger2_" , |
63 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
64 | &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>), |
65 | {{{"acc" , asAddr}, |
66 | {"a" , asValue}, |
67 | {"b" , asValue}, |
68 | {"xmask" , asValue}, |
69 | {"ymask" , asValue}, |
70 | {"pmask" , asValue}}}, |
71 | /*isElemental=*/true}, |
72 | {"__ppc_mma_pmxvbf16ger2nn" , |
73 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
74 | &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn, |
75 | MMAHandlerOp::FirstArgIsResult>), |
76 | {{{"acc" , asAddr}, |
77 | {"a" , asValue}, |
78 | {"b" , asValue}, |
79 | {"xmask" , asValue}, |
80 | {"ymask" , asValue}, |
81 | {"pmask" , asValue}}}, |
82 | /*isElemental=*/true}, |
83 | {"__ppc_mma_pmxvbf16ger2np" , |
84 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
85 | &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np, |
86 | MMAHandlerOp::FirstArgIsResult>), |
87 | {{{"acc" , asAddr}, |
88 | {"a" , asValue}, |
89 | {"b" , asValue}, |
90 | {"xmask" , asValue}, |
91 | {"ymask" , asValue}, |
92 | {"pmask" , asValue}}}, |
93 | /*isElemental=*/true}, |
94 | {"__ppc_mma_pmxvbf16ger2pn" , |
95 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
96 | &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn, |
97 | MMAHandlerOp::FirstArgIsResult>), |
98 | {{{"acc" , asAddr}, |
99 | {"a" , asValue}, |
100 | {"b" , asValue}, |
101 | {"xmask" , asValue}, |
102 | {"ymask" , asValue}, |
103 | {"pmask" , asValue}}}, |
104 | /*isElemental=*/true}, |
105 | {"__ppc_mma_pmxvbf16ger2pp" , |
106 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
107 | &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp, |
108 | MMAHandlerOp::FirstArgIsResult>), |
109 | {{{"acc" , asAddr}, |
110 | {"a" , asValue}, |
111 | {"b" , asValue}, |
112 | {"xmask" , asValue}, |
113 | {"ymask" , asValue}, |
114 | {"pmask" , asValue}}}, |
115 | /*isElemental=*/true}, |
116 | {"__ppc_mma_pmxvf16ger2_" , |
117 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
118 | &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>), |
119 | {{{"acc" , asAddr}, |
120 | {"a" , asValue}, |
121 | {"b" , asValue}, |
122 | {"xmask" , asValue}, |
123 | {"ymask" , asValue}, |
124 | {"pmask" , asValue}}}, |
125 | /*isElemental=*/true}, |
126 | {"__ppc_mma_pmxvf16ger2nn" , |
127 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
128 | &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>), |
129 | {{{"acc" , asAddr}, |
130 | {"a" , asValue}, |
131 | {"b" , asValue}, |
132 | {"xmask" , asValue}, |
133 | {"ymask" , asValue}, |
134 | {"pmask" , asValue}}}, |
135 | /*isElemental=*/true}, |
136 | {"__ppc_mma_pmxvf16ger2np" , |
137 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
138 | &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>), |
139 | {{{"acc" , asAddr}, |
140 | {"a" , asValue}, |
141 | {"b" , asValue}, |
142 | {"xmask" , asValue}, |
143 | {"ymask" , asValue}, |
144 | {"pmask" , asValue}}}, |
145 | /*isElemental=*/true}, |
146 | {"__ppc_mma_pmxvf16ger2pn" , |
147 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
148 | &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>), |
149 | {{{"acc" , asAddr}, |
150 | {"a" , asValue}, |
151 | {"b" , asValue}, |
152 | {"xmask" , asValue}, |
153 | {"ymask" , asValue}, |
154 | {"pmask" , asValue}}}, |
155 | /*isElemental=*/true}, |
156 | {"__ppc_mma_pmxvf16ger2pp" , |
157 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
158 | &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>), |
159 | {{{"acc" , asAddr}, |
160 | {"a" , asValue}, |
161 | {"b" , asValue}, |
162 | {"xmask" , asValue}, |
163 | {"ymask" , asValue}, |
164 | {"pmask" , asValue}}}, |
165 | /*isElemental=*/true}, |
166 | {"__ppc_mma_pmxvf32ger" , |
167 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
168 | &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>), |
169 | {{{"acc" , asAddr}, |
170 | {"a" , asValue}, |
171 | {"b" , asValue}, |
172 | {"xmask" , asValue}, |
173 | {"ymask" , asValue}}}, |
174 | /*isElemental=*/true}, |
175 | {"__ppc_mma_pmxvf32gernn" , |
176 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
177 | &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>), |
178 | {{{"acc" , asAddr}, |
179 | {"a" , asValue}, |
180 | {"b" , asValue}, |
181 | {"xmask" , asValue}, |
182 | {"ymask" , asValue}}}, |
183 | /*isElemental=*/true}, |
184 | {"__ppc_mma_pmxvf32gernp" , |
185 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
186 | &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>), |
187 | {{{"acc" , asAddr}, |
188 | {"a" , asValue}, |
189 | {"b" , asValue}, |
190 | {"xmask" , asValue}, |
191 | {"ymask" , asValue}}}, |
192 | /*isElemental=*/true}, |
193 | {"__ppc_mma_pmxvf32gerpn" , |
194 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
195 | &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>), |
196 | {{{"acc" , asAddr}, |
197 | {"a" , asValue}, |
198 | {"b" , asValue}, |
199 | {"xmask" , asValue}, |
200 | {"ymask" , asValue}}}, |
201 | /*isElemental=*/true}, |
202 | {"__ppc_mma_pmxvf32gerpp" , |
203 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
204 | &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>), |
205 | {{{"acc" , asAddr}, |
206 | {"a" , asValue}, |
207 | {"b" , asValue}, |
208 | {"xmask" , asValue}, |
209 | {"ymask" , asValue}}}, |
210 | /*isElemental=*/true}, |
211 | {"__ppc_mma_pmxvf64ger" , |
212 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
213 | &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>), |
214 | {{{"acc" , asAddr}, |
215 | {"a" , asValue}, |
216 | {"b" , asValue}, |
217 | {"xmask" , asValue}, |
218 | {"ymask" , asValue}}}, |
219 | /*isElemental=*/true}, |
220 | {"__ppc_mma_pmxvf64gernn" , |
221 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
222 | &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>), |
223 | {{{"acc" , asAddr}, |
224 | {"a" , asValue}, |
225 | {"b" , asValue}, |
226 | {"xmask" , asValue}, |
227 | {"ymask" , asValue}}}, |
228 | /*isElemental=*/true}, |
229 | {"__ppc_mma_pmxvf64gernp" , |
230 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
231 | &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>), |
232 | {{{"acc" , asAddr}, |
233 | {"a" , asValue}, |
234 | {"b" , asValue}, |
235 | {"xmask" , asValue}, |
236 | {"ymask" , asValue}}}, |
237 | /*isElemental=*/true}, |
238 | {"__ppc_mma_pmxvf64gerpn" , |
239 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
240 | &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>), |
241 | {{{"acc" , asAddr}, |
242 | {"a" , asValue}, |
243 | {"b" , asValue}, |
244 | {"xmask" , asValue}, |
245 | {"ymask" , asValue}}}, |
246 | /*isElemental=*/true}, |
247 | {"__ppc_mma_pmxvf64gerpp" , |
248 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
249 | &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>), |
250 | {{{"acc" , asAddr}, |
251 | {"a" , asValue}, |
252 | {"b" , asValue}, |
253 | {"xmask" , asValue}, |
254 | {"ymask" , asValue}}}, |
255 | /*isElemental=*/true}, |
256 | {"__ppc_mma_pmxvi16ger2_" , |
257 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
258 | &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>), |
259 | {{{"acc" , asAddr}, |
260 | {"a" , asValue}, |
261 | {"b" , asValue}, |
262 | {"xmask" , asValue}, |
263 | {"ymask" , asValue}, |
264 | {"pmask" , asValue}}}, |
265 | /*isElemental=*/true}, |
266 | {"__ppc_mma_pmxvi16ger2pp" , |
267 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
268 | &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>), |
269 | {{{"acc" , asAddr}, |
270 | {"a" , asValue}, |
271 | {"b" , asValue}, |
272 | {"xmask" , asValue}, |
273 | {"ymask" , asValue}, |
274 | {"pmask" , asValue}}}, |
275 | /*isElemental=*/true}, |
276 | {"__ppc_mma_pmxvi16ger2s" , |
277 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
278 | &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>), |
279 | {{{"acc" , asAddr}, |
280 | {"a" , asValue}, |
281 | {"b" , asValue}, |
282 | {"xmask" , asValue}, |
283 | {"ymask" , asValue}, |
284 | {"pmask" , asValue}}}, |
285 | /*isElemental=*/true}, |
286 | {"__ppc_mma_pmxvi16ger2spp" , |
287 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
288 | &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp, |
289 | MMAHandlerOp::FirstArgIsResult>), |
290 | {{{"acc" , asAddr}, |
291 | {"a" , asValue}, |
292 | {"b" , asValue}, |
293 | {"xmask" , asValue}, |
294 | {"ymask" , asValue}, |
295 | {"pmask" , asValue}}}, |
296 | /*isElemental=*/true}, |
297 | {"__ppc_mma_pmxvi4ger8_" , |
298 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
299 | &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>), |
300 | {{{"acc" , asAddr}, |
301 | {"a" , asValue}, |
302 | {"b" , asValue}, |
303 | {"xmask" , asValue}, |
304 | {"ymask" , asValue}, |
305 | {"pmask" , asValue}}}, |
306 | /*isElemental=*/true}, |
307 | {"__ppc_mma_pmxvi4ger8pp" , |
308 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
309 | &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>), |
310 | {{{"acc" , asAddr}, |
311 | {"a" , asValue}, |
312 | {"b" , asValue}, |
313 | {"xmask" , asValue}, |
314 | {"ymask" , asValue}, |
315 | {"pmask" , asValue}}}, |
316 | /*isElemental=*/true}, |
317 | {"__ppc_mma_pmxvi8ger4_" , |
318 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
319 | &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>), |
320 | {{{"acc" , asAddr}, |
321 | {"a" , asValue}, |
322 | {"b" , asValue}, |
323 | {"xmask" , asValue}, |
324 | {"ymask" , asValue}, |
325 | {"pmask" , asValue}}}, |
326 | /*isElemental=*/true}, |
327 | {"__ppc_mma_pmxvi8ger4pp" , |
328 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
329 | &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>), |
330 | {{{"acc" , asAddr}, |
331 | {"a" , asValue}, |
332 | {"b" , asValue}, |
333 | {"xmask" , asValue}, |
334 | {"ymask" , asValue}, |
335 | {"pmask" , asValue}}}, |
336 | /*isElemental=*/true}, |
337 | {"__ppc_mma_pmxvi8ger4spp" , |
338 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
339 | &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>), |
340 | {{{"acc" , asAddr}, |
341 | {"a" , asValue}, |
342 | {"b" , asValue}, |
343 | {"xmask" , asValue}, |
344 | {"ymask" , asValue}, |
345 | {"pmask" , asValue}}}, |
346 | /*isElemental=*/true}, |
347 | {"__ppc_mma_xvbf16ger2_" , |
348 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
349 | &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>), |
350 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
351 | /*isElemental=*/true}, |
352 | {"__ppc_mma_xvbf16ger2nn" , |
353 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
354 | &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>), |
355 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
356 | /*isElemental=*/true}, |
357 | {"__ppc_mma_xvbf16ger2np" , |
358 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
359 | &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>), |
360 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
361 | /*isElemental=*/true}, |
362 | {"__ppc_mma_xvbf16ger2pn" , |
363 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
364 | &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>), |
365 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
366 | /*isElemental=*/true}, |
367 | {"__ppc_mma_xvbf16ger2pp" , |
368 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
369 | &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>), |
370 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
371 | /*isElemental=*/true}, |
372 | {"__ppc_mma_xvf16ger2_" , |
373 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
374 | &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>), |
375 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
376 | /*isElemental=*/true}, |
377 | {"__ppc_mma_xvf16ger2nn" , |
378 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
379 | &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>), |
380 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
381 | /*isElemental=*/true}, |
382 | {"__ppc_mma_xvf16ger2np" , |
383 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
384 | &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>), |
385 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
386 | /*isElemental=*/true}, |
387 | {"__ppc_mma_xvf16ger2pn" , |
388 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
389 | &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>), |
390 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
391 | /*isElemental=*/true}, |
392 | {"__ppc_mma_xvf16ger2pp" , |
393 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
394 | &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>), |
395 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
396 | /*isElemental=*/true}, |
397 | {"__ppc_mma_xvf32ger" , |
398 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
399 | &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>), |
400 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
401 | /*isElemental=*/true}, |
402 | {"__ppc_mma_xvf32gernn" , |
403 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
404 | &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>), |
405 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
406 | /*isElemental=*/true}, |
407 | {"__ppc_mma_xvf32gernp" , |
408 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
409 | &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>), |
410 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
411 | /*isElemental=*/true}, |
412 | {"__ppc_mma_xvf32gerpn" , |
413 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
414 | &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>), |
415 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
416 | /*isElemental=*/true}, |
417 | {"__ppc_mma_xvf32gerpp" , |
418 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
419 | &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>), |
420 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
421 | /*isElemental=*/true}, |
422 | {"__ppc_mma_xvf64ger" , |
423 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
424 | &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>), |
425 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
426 | /*isElemental=*/true}, |
427 | {"__ppc_mma_xvf64gernn" , |
428 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
429 | &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>), |
430 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
431 | /*isElemental=*/true}, |
432 | {"__ppc_mma_xvf64gernp" , |
433 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
434 | &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>), |
435 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
436 | /*isElemental=*/true}, |
437 | {"__ppc_mma_xvf64gerpn" , |
438 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
439 | &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>), |
440 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
441 | /*isElemental=*/true}, |
442 | {"__ppc_mma_xvf64gerpp" , |
443 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
444 | &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>), |
445 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
446 | /*isElemental=*/true}, |
447 | {"__ppc_mma_xvi16ger2_" , |
448 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
449 | &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>), |
450 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
451 | /*isElemental=*/true}, |
452 | {"__ppc_mma_xvi16ger2pp" , |
453 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
454 | &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>), |
455 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
456 | /*isElemental=*/true}, |
457 | {"__ppc_mma_xvi16ger2s" , |
458 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
459 | &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>), |
460 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
461 | /*isElemental=*/true}, |
462 | {"__ppc_mma_xvi16ger2spp" , |
463 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
464 | &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>), |
465 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
466 | /*isElemental=*/true}, |
467 | {"__ppc_mma_xvi4ger8_" , |
468 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
469 | &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>), |
470 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
471 | /*isElemental=*/true}, |
472 | {"__ppc_mma_xvi4ger8pp" , |
473 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
474 | &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>), |
475 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
476 | /*isElemental=*/true}, |
477 | {"__ppc_mma_xvi8ger4_" , |
478 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
479 | &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>), |
480 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
481 | /*isElemental=*/true}, |
482 | {"__ppc_mma_xvi8ger4pp" , |
483 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
484 | &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>), |
485 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
486 | /*isElemental=*/true}, |
487 | {"__ppc_mma_xvi8ger4spp" , |
488 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
489 | &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>), |
490 | {{{"acc" , asAddr}, {"a" , asValue}, {"b" , asValue}}}, |
491 | /*isElemental=*/true}, |
492 | {"__ppc_mma_xxmfacc" , |
493 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
494 | &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>), |
495 | {{{"acc" , asAddr}}}, |
496 | /*isElemental=*/true}, |
497 | {"__ppc_mma_xxmtacc" , |
498 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
499 | &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>), |
500 | {{{"acc" , asAddr}}}, |
501 | /*isElemental=*/true}, |
502 | {"__ppc_mma_xxsetaccz" , |
503 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
504 | &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>), |
505 | {{{"acc" , asAddr}}}, |
506 | /*isElemental=*/true}, |
507 | {"__ppc_mtfsf" , |
508 | static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>), |
509 | {{{"mask" , asValue}, {"r" , asValue}}}, |
510 | /*isElemental=*/false}, |
511 | {"__ppc_mtfsfi" , |
512 | static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>), |
513 | {{{"bf" , asValue}, {"i" , asValue}}}, |
514 | /*isElemental=*/false}, |
515 | {"__ppc_vec_abs" , |
516 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs), |
517 | {{{"arg1" , asValue}}}, |
518 | /*isElemental=*/true}, |
519 | {"__ppc_vec_add" , |
520 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
521 | &PI::genVecAddAndMulSubXor<VecOp::Add>), |
522 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
523 | /*isElemental=*/true}, |
524 | {"__ppc_vec_and" , |
525 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
526 | &PI::genVecAddAndMulSubXor<VecOp::And>), |
527 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
528 | /*isElemental=*/true}, |
529 | {"__ppc_vec_any_ge" , |
530 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
531 | &PI::genVecAnyCompare<VecOp::Anyge>), |
532 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
533 | /*isElemental=*/true}, |
534 | {"__ppc_vec_cmpge" , |
535 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
536 | &PI::genVecCmp<VecOp::Cmpge>), |
537 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
538 | /*isElemental=*/true}, |
539 | {"__ppc_vec_cmpgt" , |
540 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
541 | &PI::genVecCmp<VecOp::Cmpgt>), |
542 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
543 | /*isElemental=*/true}, |
544 | {"__ppc_vec_cmple" , |
545 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
546 | &PI::genVecCmp<VecOp::Cmple>), |
547 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
548 | /*isElemental=*/true}, |
549 | {"__ppc_vec_cmplt" , |
550 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
551 | &PI::genVecCmp<VecOp::Cmplt>), |
552 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
553 | /*isElemental=*/true}, |
554 | {"__ppc_vec_convert" , |
555 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
556 | &PI::genVecConvert<VecOp::Convert>), |
557 | {{{"v" , asValue}, {"mold" , asValue}}}, |
558 | /*isElemental=*/false}, |
559 | {"__ppc_vec_ctf" , |
560 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
561 | &PI::genVecConvert<VecOp::Ctf>), |
562 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
563 | /*isElemental=*/true}, |
564 | {"__ppc_vec_cvf" , |
565 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
566 | &PI::genVecConvert<VecOp::Cvf>), |
567 | {{{"arg1" , asValue}}}, |
568 | /*isElemental=*/true}, |
569 | {"__ppc_vec_extract" , |
570 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract), |
571 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
572 | /*isElemental=*/true}, |
573 | {"__ppc_vec_insert" , |
574 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert), |
575 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
576 | /*isElemental=*/true}, |
577 | {"__ppc_vec_ld" , |
578 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
579 | &PI::genVecLdCallGrp<VecOp::Ld>), |
580 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
581 | /*isElemental=*/false}, |
582 | {"__ppc_vec_lde" , |
583 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
584 | &PI::genVecLdCallGrp<VecOp::Lde>), |
585 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
586 | /*isElemental=*/false}, |
587 | {"__ppc_vec_ldl" , |
588 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
589 | &PI::genVecLdCallGrp<VecOp::Ldl>), |
590 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
591 | /*isElemental=*/false}, |
592 | {"__ppc_vec_lvsl" , |
593 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
594 | &PI::genVecLvsGrp<VecOp::Lvsl>), |
595 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
596 | /*isElemental=*/false}, |
597 | {"__ppc_vec_lvsr" , |
598 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
599 | &PI::genVecLvsGrp<VecOp::Lvsr>), |
600 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
601 | /*isElemental=*/false}, |
602 | {"__ppc_vec_lxv" , |
603 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
604 | &PI::genVecLdNoCallGrp<VecOp::Lxv>), |
605 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
606 | /*isElemental=*/false}, |
607 | {"__ppc_vec_lxvp" , |
608 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
609 | &PI::genVecLdCallGrp<VecOp::Lxvp>), |
610 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
611 | /*isElemental=*/false}, |
612 | {"__ppc_vec_mergeh" , |
613 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
614 | &PI::genVecMerge<VecOp::Mergeh>), |
615 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
616 | /*isElemental=*/true}, |
617 | {"__ppc_vec_mergel" , |
618 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
619 | &PI::genVecMerge<VecOp::Mergel>), |
620 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
621 | /*isElemental=*/true}, |
622 | {"__ppc_vec_msub" , |
623 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
624 | &PI::genVecNmaddMsub<VecOp::Msub>), |
625 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
626 | /*isElemental=*/true}, |
627 | {"__ppc_vec_mul" , |
628 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
629 | &PI::genVecAddAndMulSubXor<VecOp::Mul>), |
630 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
631 | /*isElemental=*/true}, |
632 | {"__ppc_vec_nmadd" , |
633 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
634 | &PI::genVecNmaddMsub<VecOp::Nmadd>), |
635 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
636 | /*isElemental=*/true}, |
637 | {"__ppc_vec_perm" , |
638 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
639 | &PI::genVecPerm<VecOp::Perm>), |
640 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
641 | /*isElemental=*/true}, |
642 | {"__ppc_vec_permi" , |
643 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
644 | &PI::genVecPerm<VecOp::Permi>), |
645 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
646 | /*isElemental=*/true}, |
647 | {"__ppc_vec_sel" , |
648 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel), |
649 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
650 | /*isElemental=*/true}, |
651 | {"__ppc_vec_sl" , |
652 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
653 | &PI::genVecShift<VecOp::Sl>), |
654 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
655 | /*isElemental=*/true}, |
656 | {"__ppc_vec_sld" , |
657 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
658 | &PI::genVecShift<VecOp::Sld>), |
659 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
660 | /*isElemental=*/true}, |
661 | {"__ppc_vec_sldw" , |
662 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
663 | &PI::genVecShift<VecOp::Sldw>), |
664 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asValue}}}, |
665 | /*isElemental=*/true}, |
666 | {"__ppc_vec_sll" , |
667 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
668 | &PI::genVecShift<VecOp::Sll>), |
669 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
670 | /*isElemental=*/true}, |
671 | {"__ppc_vec_slo" , |
672 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
673 | &PI::genVecShift<VecOp::Slo>), |
674 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
675 | /*isElemental=*/true}, |
676 | {"__ppc_vec_splat" , |
677 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
678 | &PI::genVecSplat<VecOp::Splat>), |
679 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
680 | /*isElemental=*/true}, |
681 | {"__ppc_vec_splat_s32_" , |
682 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
683 | &PI::genVecSplat<VecOp::Splat_s32>), |
684 | {{{"arg1" , asValue}}}, |
685 | /*isElemental=*/true}, |
686 | {"__ppc_vec_splats" , |
687 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
688 | &PI::genVecSplat<VecOp::Splats>), |
689 | {{{"arg1" , asValue}}}, |
690 | /*isElemental=*/true}, |
691 | {"__ppc_vec_sr" , |
692 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
693 | &PI::genVecShift<VecOp::Sr>), |
694 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
695 | /*isElemental=*/true}, |
696 | {"__ppc_vec_srl" , |
697 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
698 | &PI::genVecShift<VecOp::Srl>), |
699 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
700 | /*isElemental=*/true}, |
701 | {"__ppc_vec_sro" , |
702 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
703 | &PI::genVecShift<VecOp::Sro>), |
704 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
705 | /*isElemental=*/true}, |
706 | {"__ppc_vec_st" , |
707 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
708 | &PI::genVecStore<VecOp::St>), |
709 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
710 | /*isElemental=*/false}, |
711 | {"__ppc_vec_ste" , |
712 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
713 | &PI::genVecStore<VecOp::Ste>), |
714 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
715 | /*isElemental=*/false}, |
716 | {"__ppc_vec_stxv" , |
717 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
718 | &PI::genVecXStore<VecOp::Stxv>), |
719 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
720 | /*isElemental=*/false}, |
721 | {"__ppc_vec_stxvp" , |
722 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
723 | &PI::genVecStore<VecOp::Stxvp>), |
724 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
725 | /*isElemental=*/false}, |
726 | {"__ppc_vec_sub" , |
727 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
728 | &PI::genVecAddAndMulSubXor<VecOp::Sub>), |
729 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
730 | /*isElemental=*/true}, |
731 | {"__ppc_vec_xl" , |
732 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp), |
733 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
734 | /*isElemental=*/false}, |
735 | {"__ppc_vec_xl_be" , |
736 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
737 | &PI::genVecLdNoCallGrp<VecOp::Xlbe>), |
738 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
739 | /*isElemental=*/false}, |
740 | {"__ppc_vec_xld2_" , |
741 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
742 | &PI::genVecLdCallGrp<VecOp::Xld2>), |
743 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
744 | /*isElemental=*/false}, |
745 | {"__ppc_vec_xlds" , |
746 | static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds), |
747 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
748 | /*isElemental=*/false}, |
749 | {"__ppc_vec_xlw4_" , |
750 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
751 | &PI::genVecLdCallGrp<VecOp::Xlw4>), |
752 | {{{"arg1" , asValue}, {"arg2" , asAddr}}}, |
753 | /*isElemental=*/false}, |
754 | {"__ppc_vec_xor" , |
755 | static_cast<IntrinsicLibrary::ExtendedGenerator>( |
756 | &PI::genVecAddAndMulSubXor<VecOp::Xor>), |
757 | {{{"arg1" , asValue}, {"arg2" , asValue}}}, |
758 | /*isElemental=*/true}, |
759 | {"__ppc_vec_xst" , |
760 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
761 | &PI::genVecXStore<VecOp::Xst>), |
762 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
763 | /*isElemental=*/false}, |
764 | {"__ppc_vec_xst_be" , |
765 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
766 | &PI::genVecXStore<VecOp::Xst_be>), |
767 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
768 | /*isElemental=*/false}, |
769 | {"__ppc_vec_xstd2_" , |
770 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
771 | &PI::genVecXStore<VecOp::Xstd2>), |
772 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
773 | /*isElemental=*/false}, |
774 | {"__ppc_vec_xstw4_" , |
775 | static_cast<IntrinsicLibrary::SubroutineGenerator>( |
776 | &PI::genVecXStore<VecOp::Xstw4>), |
777 | {{{"arg1" , asValue}, {"arg2" , asValue}, {"arg3" , asAddr}}}, |
778 | /*isElemental=*/false}, |
779 | }; |
780 | |
781 | static constexpr MathOperation ppcMathOperations[] = { |
782 | // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi. |
783 | {"__ppc_fcfi" , "llvm.ppc.fcfid" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
784 | genLibCall}, |
785 | {"__ppc_fcfid" , "llvm.ppc.fcfid" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
786 | genLibCall}, |
787 | {"__ppc_fcfud" , "llvm.ppc.fcfud" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
788 | genLibCall}, |
789 | {"__ppc_fctid" , "llvm.ppc.fctid" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
790 | genLibCall}, |
791 | {"__ppc_fctidz" , "llvm.ppc.fctidz" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
792 | genLibCall}, |
793 | {"__ppc_fctiw" , "llvm.ppc.fctiw" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
794 | genLibCall}, |
795 | {"__ppc_fctiwz" , "llvm.ppc.fctiwz" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
796 | genLibCall}, |
797 | {"__ppc_fctudz" , "llvm.ppc.fctudz" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
798 | genLibCall}, |
799 | {"__ppc_fctuwz" , "llvm.ppc.fctuwz" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
800 | genLibCall}, |
801 | {"__ppc_fmadd" , "llvm.fma.f32" , |
802 | genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, |
803 | genMathOp<mlir::math::FmaOp>}, |
804 | {"__ppc_fmadd" , "llvm.fma.f64" , |
805 | genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, |
806 | genMathOp<mlir::math::FmaOp>}, |
807 | {"__ppc_fmsub" , "llvm.ppc.fmsubs" , |
808 | genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, |
809 | genLibCall}, |
810 | {"__ppc_fmsub" , "llvm.ppc.fmsub" , |
811 | genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, |
812 | genLibCall}, |
813 | {"__ppc_fnabs" , "llvm.ppc.fnabss" , genFuncType<Ty::Real<4>, Ty::Real<4>>, |
814 | genLibCall}, |
815 | {"__ppc_fnabs" , "llvm.ppc.fnabs" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
816 | genLibCall}, |
817 | {"__ppc_fnmadd" , "llvm.ppc.fnmadds" , |
818 | genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, |
819 | genLibCall}, |
820 | {"__ppc_fnmadd" , "llvm.ppc.fnmadd" , |
821 | genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, |
822 | genLibCall}, |
823 | {"__ppc_fnmsub" , "llvm.ppc.fnmsub.f32" , |
824 | genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, |
825 | genLibCall}, |
826 | {"__ppc_fnmsub" , "llvm.ppc.fnmsub.f64" , |
827 | genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, |
828 | genLibCall}, |
829 | {"__ppc_fre" , "llvm.ppc.fre" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
830 | genLibCall}, |
831 | {"__ppc_fres" , "llvm.ppc.fres" , genFuncType<Ty::Real<4>, Ty::Real<4>>, |
832 | genLibCall}, |
833 | {"__ppc_frsqrte" , "llvm.ppc.frsqrte" , genFuncType<Ty::Real<8>, Ty::Real<8>>, |
834 | genLibCall}, |
835 | {"__ppc_frsqrtes" , "llvm.ppc.frsqrtes" , |
836 | genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall}, |
837 | {"__ppc_vec_cvbf16spn" , "llvm.ppc.vsx.xvcvbf16spn" , |
838 | genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, |
839 | {"__ppc_vec_cvspbf16_" , "llvm.ppc.vsx.xvcvspbf16" , |
840 | genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, |
841 | {"__ppc_vec_madd" , "llvm.fma.v4f32" , |
842 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>, |
843 | Ty::RealVector<4>>, |
844 | genLibCall}, |
845 | {"__ppc_vec_madd" , "llvm.fma.v2f64" , |
846 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>, |
847 | Ty::RealVector<8>>, |
848 | genLibCall}, |
849 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxsb" , |
850 | genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, |
851 | Ty::IntegerVector<1>>, |
852 | genLibCall}, |
853 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxsh" , |
854 | genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, |
855 | Ty::IntegerVector<2>>, |
856 | genLibCall}, |
857 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxsw" , |
858 | genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, |
859 | Ty::IntegerVector<4>>, |
860 | genLibCall}, |
861 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxsd" , |
862 | genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, |
863 | Ty::IntegerVector<8>>, |
864 | genLibCall}, |
865 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxub" , |
866 | genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, |
867 | Ty::UnsignedVector<1>>, |
868 | genLibCall}, |
869 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxuh" , |
870 | genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, |
871 | Ty::UnsignedVector<2>>, |
872 | genLibCall}, |
873 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxuw" , |
874 | genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, |
875 | Ty::UnsignedVector<4>>, |
876 | genLibCall}, |
877 | {"__ppc_vec_max" , "llvm.ppc.altivec.vmaxud" , |
878 | genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, |
879 | Ty::UnsignedVector<8>>, |
880 | genLibCall}, |
881 | {"__ppc_vec_max" , "llvm.ppc.vsx.xvmaxsp" , |
882 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, |
883 | genLibCall}, |
884 | {"__ppc_vec_max" , "llvm.ppc.vsx.xvmaxdp" , |
885 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, |
886 | genLibCall}, |
887 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminsb" , |
888 | genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, |
889 | Ty::IntegerVector<1>>, |
890 | genLibCall}, |
891 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminsh" , |
892 | genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, |
893 | Ty::IntegerVector<2>>, |
894 | genLibCall}, |
895 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminsw" , |
896 | genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, |
897 | Ty::IntegerVector<4>>, |
898 | genLibCall}, |
899 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminsd" , |
900 | genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, |
901 | Ty::IntegerVector<8>>, |
902 | genLibCall}, |
903 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminub" , |
904 | genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, |
905 | Ty::UnsignedVector<1>>, |
906 | genLibCall}, |
907 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminuh" , |
908 | genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, |
909 | Ty::UnsignedVector<2>>, |
910 | genLibCall}, |
911 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminuw" , |
912 | genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, |
913 | Ty::UnsignedVector<4>>, |
914 | genLibCall}, |
915 | {"__ppc_vec_min" , "llvm.ppc.altivec.vminud" , |
916 | genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, |
917 | Ty::UnsignedVector<8>>, |
918 | genLibCall}, |
919 | {"__ppc_vec_min" , "llvm.ppc.vsx.xvminsp" , |
920 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, |
921 | genLibCall}, |
922 | {"__ppc_vec_min" , "llvm.ppc.vsx.xvmindp" , |
923 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, |
924 | genLibCall}, |
925 | {"__ppc_vec_nmsub" , "llvm.ppc.fnmsub.v4f32" , |
926 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>, |
927 | Ty::RealVector<4>>, |
928 | genLibCall}, |
929 | {"__ppc_vec_nmsub" , "llvm.ppc.fnmsub.v2f64" , |
930 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>, |
931 | Ty::RealVector<8>>, |
932 | genLibCall}, |
933 | }; |
934 | |
935 | const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) { |
936 | auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) { |
937 | return name.compare(ppcHandler.name) > 0; |
938 | }; |
939 | auto result = llvm::lower_bound(ppcHandlers, name, compare); |
940 | return result != std::end(ppcHandlers) && result->name == name ? result |
941 | : nullptr; |
942 | } |
943 | |
944 | using RtMap = Fortran::common::StaticMultimapView<MathOperation>; |
945 | static constexpr RtMap ppcMathOps(ppcMathOperations); |
946 | static_assert(ppcMathOps.Verify() && "map must be sorted" ); |
947 | |
948 | std::pair<const MathOperation *, const MathOperation *> |
949 | checkPPCMathOperationsRange(llvm::StringRef name) { |
950 | return ppcMathOps.equal_range(name); |
951 | } |
952 | |
953 | // Helper functions for vector element ordering. |
954 | bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() { |
955 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
956 | return (triple.isLittleEndian() && |
957 | converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); |
958 | } |
959 | bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() { |
960 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
961 | return (triple.isLittleEndian() && |
962 | !converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); |
963 | } |
964 | bool PPCIntrinsicLibrary::changeVecElemOrder() { |
965 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
966 | return (triple.isLittleEndian() != |
967 | converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); |
968 | } |
969 | |
970 | static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context, |
971 | int quadCnt, int pairCnt, int vecCnt, |
972 | int intCnt = 0, |
973 | int vecElemBitSize = 8, |
974 | int intBitSize = 32) { |
975 | // Constructs a function type with the following signature: |
976 | // Result type: __vector_pair |
977 | // Arguments: |
978 | // quadCnt: number of arguments that has __vector_quad type, followed by |
979 | // pairCnt: number of arguments that has __vector_pair type, followed by |
980 | // vecCnt: number of arguments that has vector(integer) type, followed by |
981 | // intCnt: number of arguments that has integer type |
982 | // vecElemBitSize: specifies the size of vector elements in bits |
983 | // intBitSize: specifies the size of integer arguments in bits |
984 | auto vType{mlir::VectorType::get( |
985 | 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; |
986 | auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; |
987 | auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; |
988 | auto iType{mlir::IntegerType::get(context, intBitSize)}; |
989 | llvm::SmallVector<mlir::Type> argTypes; |
990 | for (int i = 0; i < quadCnt; ++i) { |
991 | argTypes.push_back(Elt: vqType); |
992 | } |
993 | for (int i = 0; i < pairCnt; ++i) { |
994 | argTypes.push_back(Elt: vpType); |
995 | } |
996 | for (int i = 0; i < vecCnt; ++i) { |
997 | argTypes.push_back(Elt: vType); |
998 | } |
999 | for (int i = 0; i < intCnt; ++i) { |
1000 | argTypes.push_back(Elt: iType); |
1001 | } |
1002 | |
1003 | return mlir::FunctionType::get(context, argTypes, {vpType}); |
1004 | } |
1005 | |
1006 | static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context, |
1007 | int quadCnt, int pairCnt, int vecCnt, |
1008 | int intCnt = 0, |
1009 | int vecElemBitSize = 8, |
1010 | int intBitSize = 32) { |
1011 | // Constructs a function type with the following signature: |
1012 | // Result type: __vector_quad |
1013 | // Arguments: |
1014 | // quadCnt: number of arguments that has __vector_quad type, followed by |
1015 | // pairCnt: number of arguments that has __vector_pair type, followed by |
1016 | // vecCnt: number of arguments that has vector(integer) type, followed by |
1017 | // intCnt: number of arguments that has integer type |
1018 | // vecElemBitSize: specifies the size of vector elements in bits |
1019 | // intBitSize: specifies the size of integer arguments in bits |
1020 | auto vType{mlir::VectorType::get( |
1021 | 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; |
1022 | auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; |
1023 | auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; |
1024 | auto iType{mlir::IntegerType::get(context, intBitSize)}; |
1025 | llvm::SmallVector<mlir::Type> argTypes; |
1026 | for (int i = 0; i < quadCnt; ++i) { |
1027 | argTypes.push_back(Elt: vqType); |
1028 | } |
1029 | for (int i = 0; i < pairCnt; ++i) { |
1030 | argTypes.push_back(Elt: vpType); |
1031 | } |
1032 | for (int i = 0; i < vecCnt; ++i) { |
1033 | argTypes.push_back(Elt: vType); |
1034 | } |
1035 | for (int i = 0; i < intCnt; ++i) { |
1036 | argTypes.push_back(Elt: iType); |
1037 | } |
1038 | |
1039 | return mlir::FunctionType::get(context, argTypes, {vqType}); |
1040 | } |
1041 | |
1042 | mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context, |
1043 | MMAOp mmaOp) { |
1044 | auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; |
1045 | llvm::SmallVector<mlir::Type> members; |
1046 | |
1047 | if (mmaOp == MMAOp::DisassembleAcc) { |
1048 | auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; |
1049 | members.push_back(Elt: vType); |
1050 | members.push_back(Elt: vType); |
1051 | members.push_back(Elt: vType); |
1052 | members.push_back(Elt: vType); |
1053 | auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; |
1054 | return mlir::FunctionType::get(context, {vqType}, {resType}); |
1055 | } else if (mmaOp == MMAOp::DisassemblePair) { |
1056 | auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; |
1057 | members.push_back(Elt: vType); |
1058 | members.push_back(Elt: vType); |
1059 | auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; |
1060 | return mlir::FunctionType::get(context, {vpType}, {resType}); |
1061 | } else { |
1062 | llvm_unreachable( |
1063 | "Unsupported intrinsic code for function signature generator" ); |
1064 | } |
1065 | } |
1066 | |
1067 | //===----------------------------------------------------------------------===// |
1068 | // PowerPC specific intrinsic handlers. |
1069 | //===----------------------------------------------------------------------===// |
1070 | |
1071 | // MTFSF, MTFSFI |
1072 | template <bool isImm> |
1073 | void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) { |
1074 | assert(args.size() == 2); |
1075 | llvm::SmallVector<mlir::Value> scalarArgs; |
1076 | for (const fir::ExtendedValue &arg : args) |
1077 | if (arg.getUnboxed()) |
1078 | scalarArgs.emplace_back(fir::getBase(arg)); |
1079 | else |
1080 | mlir::emitError(loc, "nonscalar intrinsic argument" ); |
1081 | |
1082 | mlir::FunctionType libFuncType; |
1083 | mlir::func::FuncOp funcOp; |
1084 | if (isImm) { |
1085 | libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>( |
1086 | builder.getContext(), builder); |
1087 | funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi" , libFuncType); |
1088 | } else { |
1089 | libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>( |
1090 | builder.getContext(), builder); |
1091 | funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf" , libFuncType); |
1092 | } |
1093 | builder.create<fir::CallOp>(loc, funcOp, scalarArgs); |
1094 | } |
1095 | |
1096 | // VEC_ABS |
1097 | fir::ExtendedValue |
1098 | PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, |
1099 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1100 | assert(args.size() == 1); |
1101 | auto context{builder.getContext()}; |
1102 | auto argBases{getBasesForArgs(args)}; |
1103 | auto vTypeInfo{getVecTypeFromFir(argBases[0])}; |
1104 | |
1105 | mlir::func::FuncOp funcOp{nullptr}; |
1106 | mlir::FunctionType ftype; |
1107 | llvm::StringRef fname{}; |
1108 | if (vTypeInfo.isFloat()) { |
1109 | if (vTypeInfo.isFloat32()) { |
1110 | fname = "llvm.fabs.v4f32" ; |
1111 | ftype = |
1112 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder); |
1113 | } else if (vTypeInfo.isFloat64()) { |
1114 | fname = "llvm.fabs.v2f64" ; |
1115 | ftype = |
1116 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder); |
1117 | } |
1118 | |
1119 | funcOp = builder.createFunction(loc, fname, ftype); |
1120 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])}; |
1121 | return callOp.getResult(0); |
1122 | } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast<mlir::IntegerType>()) { |
1123 | // vec_abs(arg1) = max(0 - arg1, arg1) |
1124 | |
1125 | auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)}; |
1126 | auto varg1{builder.createConvert(loc, newVecTy, argBases[0])}; |
1127 | // construct vector(0,..) |
1128 | auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)}; |
1129 | auto vZero{ |
1130 | builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)}; |
1131 | auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)}; |
1132 | |
1133 | mlir::func::FuncOp funcOp{nullptr}; |
1134 | switch (eleTy.getWidth()) { |
1135 | case 8: |
1136 | fname = "llvm.ppc.altivec.vmaxsb" ; |
1137 | ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, |
1138 | Ty::IntegerVector<1>>(context, builder); |
1139 | break; |
1140 | case 16: |
1141 | fname = "llvm.ppc.altivec.vmaxsh" ; |
1142 | ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, |
1143 | Ty::IntegerVector<2>>(context, builder); |
1144 | break; |
1145 | case 32: |
1146 | fname = "llvm.ppc.altivec.vmaxsw" ; |
1147 | ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, |
1148 | Ty::IntegerVector<4>>(context, builder); |
1149 | break; |
1150 | case 64: |
1151 | fname = "llvm.ppc.altivec.vmaxsd" ; |
1152 | ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, |
1153 | Ty::IntegerVector<8>>(context, builder); |
1154 | break; |
1155 | default: |
1156 | llvm_unreachable("invalid integer size" ); |
1157 | } |
1158 | funcOp = builder.createFunction(loc, fname, ftype); |
1159 | |
1160 | mlir::Value args[] = {zeroSubVarg1, varg1}; |
1161 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)}; |
1162 | return builder.createConvert(loc, argBases[0].getType(), |
1163 | callOp.getResult(0)); |
1164 | } |
1165 | |
1166 | llvm_unreachable("unknown vector type" ); |
1167 | } |
1168 | |
1169 | // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR |
1170 | template <VecOp vop> |
1171 | fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor( |
1172 | mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { |
1173 | assert(args.size() == 2); |
1174 | auto argBases{getBasesForArgs(args)}; |
1175 | auto argsTy{getTypesForArgs(argBases)}; |
1176 | assert(argsTy[0].isa<fir::VectorType>() && argsTy[1].isa<fir::VectorType>()); |
1177 | |
1178 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
1179 | |
1180 | const auto isInteger{vecTyInfo.eleTy.isa<mlir::IntegerType>()}; |
1181 | const auto isFloat{vecTyInfo.eleTy.isa<mlir::FloatType>()}; |
1182 | assert((isInteger || isFloat) && "unknown vector type" ); |
1183 | |
1184 | auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; |
1185 | |
1186 | mlir::Value r{nullptr}; |
1187 | switch (vop) { |
1188 | case VecOp::Add: |
1189 | if (isInteger) |
1190 | r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]); |
1191 | else if (isFloat) |
1192 | r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]); |
1193 | break; |
1194 | case VecOp::Mul: |
1195 | if (isInteger) |
1196 | r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]); |
1197 | else if (isFloat) |
1198 | r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]); |
1199 | break; |
1200 | case VecOp::Sub: |
1201 | if (isInteger) |
1202 | r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]); |
1203 | else if (isFloat) |
1204 | r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]); |
1205 | break; |
1206 | case VecOp::And: |
1207 | case VecOp::Xor: { |
1208 | mlir::Value arg1{nullptr}; |
1209 | mlir::Value arg2{nullptr}; |
1210 | if (isInteger) { |
1211 | arg1 = vargs[0]; |
1212 | arg2 = vargs[1]; |
1213 | } else if (isFloat) { |
1214 | // bitcast the arguments to integer |
1215 | auto wd{vecTyInfo.eleTy.dyn_cast<mlir::FloatType>().getWidth()}; |
1216 | auto ftype{builder.getIntegerType(wd)}; |
1217 | auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)}; |
1218 | arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]); |
1219 | arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]); |
1220 | } |
1221 | if (vop == VecOp::And) |
1222 | r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2); |
1223 | else if (vop == VecOp::Xor) |
1224 | r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2); |
1225 | |
1226 | if (isFloat) |
1227 | r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r); |
1228 | |
1229 | break; |
1230 | } |
1231 | } |
1232 | |
1233 | return builder.createConvert(loc, argsTy[0], r); |
1234 | } |
1235 | |
1236 | // VEC_ANY_GE |
1237 | template <VecOp vop> |
1238 | fir::ExtendedValue |
1239 | PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType, |
1240 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1241 | assert(args.size() == 2); |
1242 | assert(vop == VecOp::Anyge && "unknown vector compare operation" ); |
1243 | auto argBases{getBasesForArgs(args)}; |
1244 | VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])}; |
1245 | [[maybe_unused]] const auto isSupportedTy{ |
1246 | mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>( |
1247 | vTypeInfo.eleTy)}; |
1248 | assert(isSupportedTy && "unsupported vector type" ); |
1249 | |
1250 | // Constants for mapping CR6 bits to predicate result |
1251 | enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 }; |
1252 | |
1253 | auto context{builder.getContext()}; |
1254 | |
1255 | static std::map<std::pair<ParamTypeId, unsigned>, |
1256 | std::pair<llvm::StringRef, mlir::FunctionType>> |
1257 | uiBuiltin{ |
1258 | {std::make_pair(ParamTypeId::IntegerVector, 8), |
1259 | std::make_pair( |
1260 | "llvm.ppc.altivec.vcmpgtsb.p" , |
1261 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>, |
1262 | Ty::IntegerVector<1>>(context, builder))}, |
1263 | {std::make_pair(ParamTypeId::IntegerVector, 16), |
1264 | std::make_pair( |
1265 | "llvm.ppc.altivec.vcmpgtsh.p" , |
1266 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>, |
1267 | Ty::IntegerVector<2>>(context, builder))}, |
1268 | {std::make_pair(ParamTypeId::IntegerVector, 32), |
1269 | std::make_pair( |
1270 | "llvm.ppc.altivec.vcmpgtsw.p" , |
1271 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>, |
1272 | Ty::IntegerVector<4>>(context, builder))}, |
1273 | {std::make_pair(ParamTypeId::IntegerVector, 64), |
1274 | std::make_pair( |
1275 | "llvm.ppc.altivec.vcmpgtsd.p" , |
1276 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>, |
1277 | Ty::IntegerVector<8>>(context, builder))}, |
1278 | {std::make_pair(ParamTypeId::UnsignedVector, 8), |
1279 | std::make_pair( |
1280 | "llvm.ppc.altivec.vcmpgtub.p" , |
1281 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, |
1282 | Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>( |
1283 | context, builder))}, |
1284 | {std::make_pair(ParamTypeId::UnsignedVector, 16), |
1285 | std::make_pair( |
1286 | "llvm.ppc.altivec.vcmpgtuh.p" , |
1287 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, |
1288 | Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>( |
1289 | context, builder))}, |
1290 | {std::make_pair(ParamTypeId::UnsignedVector, 32), |
1291 | std::make_pair( |
1292 | "llvm.ppc.altivec.vcmpgtuw.p" , |
1293 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, |
1294 | Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>( |
1295 | context, builder))}, |
1296 | {std::make_pair(ParamTypeId::UnsignedVector, 64), |
1297 | std::make_pair( |
1298 | "llvm.ppc.altivec.vcmpgtud.p" , |
1299 | genFuncType<Ty::Integer<4>, Ty::Integer<4>, |
1300 | Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>( |
1301 | context, builder))}, |
1302 | }; |
1303 | |
1304 | mlir::FunctionType ftype{nullptr}; |
1305 | llvm::StringRef fname; |
1306 | const auto i32Ty{mlir::IntegerType::get(context, 32)}; |
1307 | llvm::SmallVector<mlir::Value> cmpArgs; |
1308 | mlir::Value op{nullptr}; |
1309 | const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; |
1310 | |
1311 | if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) { |
1312 | std::pair<llvm::StringRef, mlir::FunctionType> bi; |
1313 | bi = (elementTy.isUnsignedInteger()) |
1314 | ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)] |
1315 | : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)]; |
1316 | |
1317 | fname = std::get<0>(bi); |
1318 | ftype = std::get<1>(bi); |
1319 | |
1320 | op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV); |
1321 | cmpArgs.emplace_back(op); |
1322 | // reverse the argument order |
1323 | cmpArgs.emplace_back(argBases[1]); |
1324 | cmpArgs.emplace_back(argBases[0]); |
1325 | } else if (vTypeInfo.isFloat()) { |
1326 | if (vTypeInfo.isFloat32()) { |
1327 | fname = "llvm.ppc.vsx.xvcmpgesp.p" ; |
1328 | ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>, |
1329 | Ty::RealVector<4>>(context, builder); |
1330 | } else { |
1331 | fname = "llvm.ppc.vsx.xvcmpgedp.p" ; |
1332 | ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>, |
1333 | Ty::RealVector<8>>(context, builder); |
1334 | } |
1335 | op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV); |
1336 | cmpArgs.emplace_back(op); |
1337 | cmpArgs.emplace_back(argBases[0]); |
1338 | cmpArgs.emplace_back(argBases[1]); |
1339 | } |
1340 | assert((!fname.empty() && ftype) && "invalid type" ); |
1341 | |
1342 | mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)}; |
1343 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)}; |
1344 | return callOp.getResult(0); |
1345 | } |
1346 | |
1347 | static std::pair<llvm::StringRef, mlir::FunctionType> |
1348 | getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop, |
1349 | fir::FirOpBuilder &builder) { |
1350 | auto context{builder.getContext()}; |
1351 | static std::map<std::pair<ParamTypeId, unsigned>, |
1352 | std::pair<llvm::StringRef, mlir::FunctionType>> |
1353 | iuBuiltinName{ |
1354 | {std::make_pair(ParamTypeId::IntegerVector, 8), |
1355 | std::make_pair( |
1356 | "llvm.ppc.altivec.vcmpgtsb" , |
1357 | genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>, |
1358 | Ty::IntegerVector<1>>(context, builder))}, |
1359 | {std::make_pair(ParamTypeId::IntegerVector, 16), |
1360 | std::make_pair( |
1361 | "llvm.ppc.altivec.vcmpgtsh" , |
1362 | genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>, |
1363 | Ty::IntegerVector<2>>(context, builder))}, |
1364 | {std::make_pair(ParamTypeId::IntegerVector, 32), |
1365 | std::make_pair( |
1366 | "llvm.ppc.altivec.vcmpgtsw" , |
1367 | genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>, |
1368 | Ty::IntegerVector<4>>(context, builder))}, |
1369 | {std::make_pair(ParamTypeId::IntegerVector, 64), |
1370 | std::make_pair( |
1371 | "llvm.ppc.altivec.vcmpgtsd" , |
1372 | genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>, |
1373 | Ty::IntegerVector<8>>(context, builder))}, |
1374 | {std::make_pair(ParamTypeId::UnsignedVector, 8), |
1375 | std::make_pair( |
1376 | "llvm.ppc.altivec.vcmpgtub" , |
1377 | genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, |
1378 | Ty::UnsignedVector<1>>(context, builder))}, |
1379 | {std::make_pair(ParamTypeId::UnsignedVector, 16), |
1380 | std::make_pair( |
1381 | "llvm.ppc.altivec.vcmpgtuh" , |
1382 | genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, |
1383 | Ty::UnsignedVector<2>>(context, builder))}, |
1384 | {std::make_pair(ParamTypeId::UnsignedVector, 32), |
1385 | std::make_pair( |
1386 | "llvm.ppc.altivec.vcmpgtuw" , |
1387 | genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, |
1388 | Ty::UnsignedVector<4>>(context, builder))}, |
1389 | {std::make_pair(ParamTypeId::UnsignedVector, 64), |
1390 | std::make_pair( |
1391 | "llvm.ppc.altivec.vcmpgtud" , |
1392 | genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, |
1393 | Ty::UnsignedVector<8>>(context, builder))}}; |
1394 | |
1395 | // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with |
1396 | // arguments revsered. |
1397 | enum class Cmp { gtOrLt, geOrLe }; |
1398 | static std::map<std::pair<Cmp, int>, |
1399 | std::pair<llvm::StringRef, mlir::FunctionType>> |
1400 | rGBI{{std::make_pair(Cmp::geOrLe, 32), |
1401 | std::make_pair("llvm.ppc.vsx.xvcmpgesp" , |
1402 | genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>, |
1403 | Ty::RealVector<4>>(context, builder))}, |
1404 | {std::make_pair(Cmp::geOrLe, 64), |
1405 | std::make_pair("llvm.ppc.vsx.xvcmpgedp" , |
1406 | genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>, |
1407 | Ty::RealVector<8>>(context, builder))}, |
1408 | {std::make_pair(Cmp::gtOrLt, 32), |
1409 | std::make_pair("llvm.ppc.vsx.xvcmpgtsp" , |
1410 | genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>, |
1411 | Ty::RealVector<4>>(context, builder))}, |
1412 | {std::make_pair(Cmp::gtOrLt, 64), |
1413 | std::make_pair("llvm.ppc.vsx.xvcmpgtdp" , |
1414 | genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>, |
1415 | Ty::RealVector<8>>(context, builder))}}; |
1416 | |
1417 | const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; |
1418 | std::pair<llvm::StringRef, mlir::FunctionType> specFunc; |
1419 | if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) |
1420 | specFunc = |
1421 | (elementTy.isUnsignedInteger()) |
1422 | ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)] |
1423 | : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)]; |
1424 | else if (vTypeInfo.isFloat()) |
1425 | specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple) |
1426 | ? rGBI[std::make_pair(Cmp::geOrLe, width)] |
1427 | : rGBI[std::make_pair(Cmp::gtOrLt, width)]; |
1428 | |
1429 | assert(!std::get<0>(specFunc).empty() && "unknown builtin name" ); |
1430 | assert(std::get<1>(specFunc) && "unknown function type" ); |
1431 | return specFunc; |
1432 | } |
1433 | |
1434 | // VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT |
1435 | template <VecOp vop> |
1436 | fir::ExtendedValue |
1437 | PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType, |
1438 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1439 | assert(args.size() == 2); |
1440 | auto context{builder.getContext()}; |
1441 | auto argBases{getBasesForArgs(args)}; |
1442 | VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])}; |
1443 | auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)}; |
1444 | |
1445 | std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{ |
1446 | getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)}; |
1447 | |
1448 | mlir::func::FuncOp funcOp = builder.createFunction( |
1449 | loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam)); |
1450 | |
1451 | mlir::Value res{nullptr}; |
1452 | |
1453 | if (auto eTy = vecTyInfo.eleTy.dyn_cast<mlir::IntegerType>()) { |
1454 | constexpr int firstArg{0}; |
1455 | constexpr int secondArg{1}; |
1456 | std::map<VecOp, std::array<int, 2>> argOrder{ |
1457 | {VecOp::Cmpge, {secondArg, firstArg}}, |
1458 | {VecOp::Cmple, {firstArg, secondArg}}, |
1459 | {VecOp::Cmpgt, {firstArg, secondArg}}, |
1460 | {VecOp::Cmplt, {secondArg, firstArg}}}; |
1461 | |
1462 | // Construct the function return type, unsigned vector, for conversion. |
1463 | auto itype = mlir::IntegerType::get(context, eTy.getWidth(), |
1464 | mlir::IntegerType::Unsigned); |
1465 | auto returnType = fir::VectorType::get(vecTyInfo.len, itype); |
1466 | |
1467 | switch (vop) { |
1468 | case VecOp::Cmpgt: |
1469 | case VecOp::Cmplt: { |
1470 | // arg1 > arg2 --> vcmpgt(arg1, arg2) |
1471 | // arg1 < arg2 --> vcmpgt(arg2, arg1) |
1472 | mlir::Value vargs[]{argBases[argOrder[vop][0]], |
1473 | argBases[argOrder[vop][1]]}; |
1474 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; |
1475 | res = callOp.getResult(0); |
1476 | break; |
1477 | } |
1478 | case VecOp::Cmpge: |
1479 | case VecOp::Cmple: { |
1480 | // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1) |
1481 | // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1) |
1482 | mlir::Value vargs[]{argBases[argOrder[vop][0]], |
1483 | argBases[argOrder[vop][1]]}; |
1484 | |
1485 | // Construct a constant vector(-1) |
1486 | auto negOneVal{builder.createIntegerConstant( |
1487 | loc, getConvertedElementType(context, eTy), -1)}; |
1488 | auto vNegOne{builder.create<mlir::vector::BroadcastOp>( |
1489 | loc, vecTyInfo.toMlirVectorType(context), negOneVal)}; |
1490 | |
1491 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; |
1492 | mlir::Value callRes{callOp.getResult(0)}; |
1493 | auto vargs2{ |
1494 | convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})}; |
1495 | auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)}; |
1496 | |
1497 | res = builder.createConvert(loc, returnType, xorRes); |
1498 | break; |
1499 | } |
1500 | default: |
1501 | llvm_unreachable("Invalid vector operation for generator" ); |
1502 | } |
1503 | } else if (vecTyInfo.isFloat()) { |
1504 | mlir::Value vargs[2]; |
1505 | switch (vop) { |
1506 | case VecOp::Cmpge: |
1507 | case VecOp::Cmpgt: |
1508 | vargs[0] = argBases[0]; |
1509 | vargs[1] = argBases[1]; |
1510 | break; |
1511 | case VecOp::Cmple: |
1512 | case VecOp::Cmplt: |
1513 | // Swap the arguments as xvcmpg[et] is used |
1514 | vargs[0] = argBases[1]; |
1515 | vargs[1] = argBases[0]; |
1516 | break; |
1517 | default: |
1518 | llvm_unreachable("Invalid vector operation for generator" ); |
1519 | } |
1520 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; |
1521 | res = callOp.getResult(0); |
1522 | } else |
1523 | llvm_unreachable("invalid vector type" ); |
1524 | |
1525 | return res; |
1526 | } |
1527 | |
1528 | static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder, |
1529 | mlir::Location loc, |
1530 | mlir::Value arg) { |
1531 | auto ty = arg.getType(); |
1532 | auto context{builder.getContext()}; |
1533 | auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; |
1534 | |
1535 | if (ty != vtype) |
1536 | arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult(); |
1537 | |
1538 | llvm::SmallVector<int64_t, 16> mask{4, 5, 6, 7, 0, 1, 2, 3, |
1539 | 12, 13, 14, 15, 8, 9, 10, 11}; |
1540 | arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask); |
1541 | if (ty != vtype) |
1542 | arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg); |
1543 | return arg; |
1544 | } |
1545 | |
1546 | // VEC_CONVERT, VEC_CTF, VEC_CVF |
1547 | template <VecOp vop> |
1548 | fir::ExtendedValue |
1549 | PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, |
1550 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1551 | auto context{builder.getContext()}; |
1552 | auto argBases{getBasesForArgs(args)}; |
1553 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
1554 | auto mlirTy{vecTyInfo.toMlirVectorType(context)}; |
1555 | auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])}; |
1556 | const auto i32Ty{mlir::IntegerType::get(context, 32)}; |
1557 | |
1558 | switch (vop) { |
1559 | case VecOp::Ctf: { |
1560 | assert(args.size() == 2); |
1561 | auto convArg{builder.createConvert(loc, i32Ty, argBases[1])}; |
1562 | auto eTy{vecTyInfo.eleTy.dyn_cast<mlir::IntegerType>()}; |
1563 | assert(eTy && "Unsupported vector type" ); |
1564 | const auto isUnsigned{eTy.isUnsignedInteger()}; |
1565 | const auto width{eTy.getWidth()}; |
1566 | |
1567 | if (width == 32) { |
1568 | auto ftype{(isUnsigned) |
1569 | ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>, |
1570 | Ty::Integer<4>>(context, builder) |
1571 | : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>, |
1572 | Ty::Integer<4>>(context, builder)}; |
1573 | const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux" |
1574 | : "llvm.ppc.altivec.vcfsx" }; |
1575 | auto funcOp{builder.createFunction(loc, fname, ftype)}; |
1576 | mlir::Value newArgs[] = {argBases[0], convArg}; |
1577 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; |
1578 | |
1579 | return callOp.getResult(0); |
1580 | } else if (width == 64) { |
1581 | auto fTy{mlir::FloatType::getF64(context)}; |
1582 | auto ty{mlir::VectorType::get(2, fTy)}; |
1583 | |
1584 | // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1)) |
1585 | auto convOp{(isUnsigned) |
1586 | ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1) |
1587 | : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)}; |
1588 | |
1589 | // construct vector<1./(1<<arg1), 1.0/(1<<arg1)> |
1590 | auto constInt{ |
1591 | mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp()) |
1592 | .getValue() |
1593 | .dyn_cast_or_null<mlir::IntegerAttr>()}; |
1594 | assert(constInt && "expected integer constant argument" ); |
1595 | double f{1.0 / (1 << constInt.getInt())}; |
1596 | llvm::SmallVector<double> vals{f, f}; |
1597 | auto constOp{builder.create<mlir::arith::ConstantOp>( |
1598 | loc, ty, builder.getF64VectorAttr(vals))}; |
1599 | |
1600 | auto mulOp{builder.create<mlir::LLVM::FMulOp>( |
1601 | loc, ty, convOp->getResult(0), constOp)}; |
1602 | |
1603 | return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp); |
1604 | } |
1605 | llvm_unreachable("invalid element integer kind" ); |
1606 | } |
1607 | case VecOp::Convert: { |
1608 | assert(args.size() == 2); |
1609 | // resultType has mold type (if scalar) or element type (if array) |
1610 | auto resTyInfo{getVecTypeFromFirType(resultType)}; |
1611 | auto moldTy{resTyInfo.toMlirVectorType(context)}; |
1612 | auto firTy{resTyInfo.toFirVectorType()}; |
1613 | |
1614 | // vec_convert(v, mold) = bitcast v to "type of mold" |
1615 | auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)}; |
1616 | |
1617 | return builder.createConvert(loc, firTy, conv); |
1618 | } |
1619 | case VecOp::Cvf: { |
1620 | assert(args.size() == 1); |
1621 | |
1622 | mlir::Value newArgs[]{vArg1}; |
1623 | if (vecTyInfo.isFloat32()) { |
1624 | if (changeVecElemOrder()) |
1625 | newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); |
1626 | |
1627 | const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp" }; |
1628 | auto ftype{ |
1629 | genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)}; |
1630 | auto funcOp{builder.createFunction(loc, fname, ftype)}; |
1631 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; |
1632 | |
1633 | return callOp.getResult(0); |
1634 | } else if (vecTyInfo.isFloat64()) { |
1635 | const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp" }; |
1636 | auto ftype{ |
1637 | genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)}; |
1638 | auto funcOp{builder.createFunction(loc, fname, ftype)}; |
1639 | newArgs[0] = |
1640 | builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0); |
1641 | auto fvf32Ty{newArgs[0].getType()}; |
1642 | auto f32type{mlir::FloatType::getF32(context)}; |
1643 | auto mvf32Ty{mlir::VectorType::get(4, f32type)}; |
1644 | newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); |
1645 | |
1646 | if (changeVecElemOrder()) |
1647 | newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); |
1648 | |
1649 | return builder.createConvert(loc, fvf32Ty, newArgs[0]); |
1650 | } |
1651 | llvm_unreachable("invalid element integer kind" ); |
1652 | } |
1653 | default: |
1654 | llvm_unreachable("Invalid vector operation for generator" ); |
1655 | } |
1656 | } |
1657 | |
1658 | static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder, |
1659 | mlir::Location loc, |
1660 | VecTypeInfo vecInfo, |
1661 | mlir::Value idx) { |
1662 | mlir::Value numSub1{ |
1663 | builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)}; |
1664 | return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx); |
1665 | } |
1666 | |
1667 | // VEC_EXTRACT |
1668 | fir::ExtendedValue |
1669 | PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType, |
1670 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1671 | assert(args.size() == 2); |
1672 | auto argBases{getBasesForArgs(args)}; |
1673 | auto argTypes{getTypesForArgs(argBases)}; |
1674 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
1675 | |
1676 | auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; |
1677 | auto varg0{builder.createConvert(loc, mlirTy, argBases[0])}; |
1678 | |
1679 | // arg2 modulo the number of elements in arg1 to determine the element |
1680 | // position |
1681 | auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)}; |
1682 | mlir::Value uremOp{ |
1683 | builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)}; |
1684 | |
1685 | if (!isNativeVecElemOrderOnLE()) |
1686 | uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); |
1687 | |
1688 | return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp); |
1689 | } |
1690 | |
1691 | // VEC_INSERT |
1692 | fir::ExtendedValue |
1693 | PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType, |
1694 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1695 | assert(args.size() == 3); |
1696 | auto argBases{getBasesForArgs(args)}; |
1697 | auto argTypes{getTypesForArgs(argBases)}; |
1698 | auto vecTyInfo{getVecTypeFromFir(argBases[1])}; |
1699 | auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; |
1700 | auto varg1{builder.createConvert(loc, mlirTy, argBases[1])}; |
1701 | |
1702 | auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)}; |
1703 | mlir::Value uremOp{ |
1704 | builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)}; |
1705 | |
1706 | if (!isNativeVecElemOrderOnLE()) |
1707 | uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); |
1708 | |
1709 | auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0], |
1710 | varg1, uremOp)}; |
1711 | return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res); |
1712 | } |
1713 | |
1714 | // VEC_MERGEH, VEC_MERGEL |
1715 | template <VecOp vop> |
1716 | fir::ExtendedValue |
1717 | PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType, |
1718 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1719 | assert(args.size() == 2); |
1720 | auto argBases{getBasesForArgs(args)}; |
1721 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
1722 | llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask |
1723 | llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask |
1724 | |
1725 | switch (vop) { |
1726 | case VecOp::Mergeh: { |
1727 | switch (vecTyInfo.len) { |
1728 | case 2: { |
1729 | enum { V1 = 0, V2 = 2 }; |
1730 | mMask = {V1 + 0, V2 + 0}; |
1731 | rMask = {V2 + 1, V1 + 1}; |
1732 | break; |
1733 | } |
1734 | case 4: { |
1735 | enum { V1 = 0, V2 = 4 }; |
1736 | mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1}; |
1737 | rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3}; |
1738 | break; |
1739 | } |
1740 | case 8: { |
1741 | enum { V1 = 0, V2 = 8 }; |
1742 | mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3}; |
1743 | rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7}; |
1744 | break; |
1745 | } |
1746 | case 16: |
1747 | mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, |
1748 | 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; |
1749 | rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B, |
1750 | 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F}; |
1751 | break; |
1752 | default: |
1753 | llvm_unreachable("unexpected vector length" ); |
1754 | } |
1755 | break; |
1756 | } |
1757 | case VecOp::Mergel: { |
1758 | switch (vecTyInfo.len) { |
1759 | case 2: { |
1760 | enum { V1 = 0, V2 = 2 }; |
1761 | mMask = {V1 + 1, V2 + 1}; |
1762 | rMask = {V2 + 0, V1 + 0}; |
1763 | break; |
1764 | } |
1765 | case 4: { |
1766 | enum { V1 = 0, V2 = 4 }; |
1767 | mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3}; |
1768 | rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1}; |
1769 | break; |
1770 | } |
1771 | case 8: { |
1772 | enum { V1 = 0, V2 = 8 }; |
1773 | mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7}; |
1774 | rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3}; |
1775 | break; |
1776 | } |
1777 | case 16: |
1778 | mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, |
1779 | 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F}; |
1780 | rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03, |
1781 | 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07}; |
1782 | break; |
1783 | default: |
1784 | llvm_unreachable("unexpected vector length" ); |
1785 | } |
1786 | break; |
1787 | } |
1788 | default: |
1789 | llvm_unreachable("invalid vector operation for generator" ); |
1790 | } |
1791 | |
1792 | auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; |
1793 | |
1794 | llvm::SmallVector<int64_t, 16> &mergeMask = |
1795 | (isBEVecElemOrderOnLE()) ? rMask : mMask; |
1796 | |
1797 | auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1], |
1798 | mergeMask)}; |
1799 | return builder.createConvert(loc, resultType, callOp); |
1800 | } |
1801 | |
1802 | static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, |
1803 | mlir::Location loc, mlir::Value baseAddr, |
1804 | mlir::Value offset) { |
1805 | auto typeExtent{fir::SequenceType::getUnknownExtent()}; |
1806 | // Construct an !fir.ref<!ref.array<?xi8>> type |
1807 | auto arrRefTy{builder.getRefType(fir::SequenceType::get( |
1808 | {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; |
1809 | // Convert arg to !fir.ref<!ref.array<?xi8>> |
1810 | auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)}; |
1811 | |
1812 | return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset); |
1813 | } |
1814 | |
1815 | static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, |
1816 | mlir::Location loc, mlir::Value v, |
1817 | int64_t len) { |
1818 | assert(v.getType().isa<mlir::VectorType>()); |
1819 | assert(len > 0); |
1820 | llvm::SmallVector<int64_t, 16> mask; |
1821 | for (int64_t i = 0; i < len; ++i) { |
1822 | mask.push_back(Elt: len - 1 - i); |
1823 | } |
1824 | auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())}; |
1825 | return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask); |
1826 | } |
1827 | |
1828 | static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder, |
1829 | const int val) { |
1830 | auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)}; |
1831 | auto alignAttr{mlir::IntegerAttr::get(i64ty, val)}; |
1832 | return builder.getNamedAttr("alignment" , alignAttr); |
1833 | } |
1834 | |
1835 | fir::ExtendedValue |
1836 | PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType, |
1837 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1838 | VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)}; |
1839 | switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) { |
1840 | case 8: |
1841 | // vec_xlb1 |
1842 | return genVecLdNoCallGrp<VecOp::Xl>(resultType, args); |
1843 | case 16: |
1844 | // vec_xlh8 |
1845 | return genVecLdNoCallGrp<VecOp::Xl>(resultType, args); |
1846 | case 32: |
1847 | // vec_xlw4 |
1848 | return genVecLdCallGrp<VecOp::Xlw4>(resultType, args); |
1849 | case 64: |
1850 | // vec_xld2 |
1851 | return genVecLdCallGrp<VecOp::Xld2>(resultType, args); |
1852 | default: |
1853 | llvm_unreachable("invalid kind" ); |
1854 | } |
1855 | llvm_unreachable("invalid vector operation for generator" ); |
1856 | } |
1857 | |
1858 | template <VecOp vop> |
1859 | fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp( |
1860 | mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { |
1861 | assert(args.size() == 2); |
1862 | auto arg0{getBase(args[0])}; |
1863 | auto arg1{getBase(args[1])}; |
1864 | |
1865 | auto vecTyInfo{getVecTypeFromFirType(resultType)}; |
1866 | auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; |
1867 | auto firTy{vecTyInfo.toFirVectorType()}; |
1868 | |
1869 | // Add the %val of arg0 to %addr of arg1 |
1870 | auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; |
1871 | |
1872 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
1873 | // Need to get align 1. |
1874 | auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr, |
1875 | getAlignmentAttr(builder, 1))}; |
1876 | if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) || |
1877 | (vop == VecOp::Xlbe && triple.isLittleEndian())) |
1878 | return builder.createConvert( |
1879 | loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); |
1880 | |
1881 | return builder.createConvert(loc, firTy, result); |
1882 | } |
1883 | |
1884 | // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4 |
1885 | template <VecOp vop> |
1886 | fir::ExtendedValue |
1887 | PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType, |
1888 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1889 | assert(args.size() == 2); |
1890 | auto context{builder.getContext()}; |
1891 | auto arg0{getBase(args[0])}; |
1892 | auto arg1{getBase(args[1])}; |
1893 | |
1894 | // Prepare the return type in FIR. |
1895 | auto vecResTyInfo{getVecTypeFromFirType(resultType)}; |
1896 | auto mlirTy{vecResTyInfo.toMlirVectorType(context)}; |
1897 | auto firTy{vecResTyInfo.toFirVectorType()}; |
1898 | |
1899 | // llvm.ppc.altivec.lvx* returns <4xi32> |
1900 | // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type |
1901 | const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)}; |
1902 | const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)}; |
1903 | |
1904 | // For vec_ld, need to convert arg0 from i64 to i32 |
1905 | if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64) |
1906 | arg0 = builder.createConvert(loc, i32Ty, arg0); |
1907 | |
1908 | // Add the %val of arg0 to %addr of arg1 |
1909 | auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; |
1910 | llvm::SmallVector<mlir::Value, 4> parsedArgs{addr}; |
1911 | |
1912 | mlir::Type intrinResTy{nullptr}; |
1913 | llvm::StringRef fname{}; |
1914 | switch (vop) { |
1915 | case VecOp::Ld: |
1916 | fname = "llvm.ppc.altivec.lvx" ; |
1917 | intrinResTy = mVecI32Ty; |
1918 | break; |
1919 | case VecOp::Lde: |
1920 | switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) { |
1921 | case 8: |
1922 | fname = "llvm.ppc.altivec.lvebx" ; |
1923 | intrinResTy = mlirTy; |
1924 | break; |
1925 | case 16: |
1926 | fname = "llvm.ppc.altivec.lvehx" ; |
1927 | intrinResTy = mlirTy; |
1928 | break; |
1929 | case 32: |
1930 | fname = "llvm.ppc.altivec.lvewx" ; |
1931 | if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy)) |
1932 | intrinResTy = mlirTy; |
1933 | else |
1934 | intrinResTy = mVecI32Ty; |
1935 | break; |
1936 | default: |
1937 | llvm_unreachable("invalid vector for vec_lde" ); |
1938 | } |
1939 | break; |
1940 | case VecOp::Ldl: |
1941 | fname = "llvm.ppc.altivec.lvxl" ; |
1942 | intrinResTy = mVecI32Ty; |
1943 | break; |
1944 | case VecOp::Lxvp: |
1945 | fname = "llvm.ppc.vsx.lxvp" ; |
1946 | intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1)); |
1947 | break; |
1948 | case VecOp::Xld2: { |
1949 | fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be" |
1950 | : "llvm.ppc.vsx.lxvd2x" ; |
1951 | // llvm.ppc.altivec.lxvd2x* returns <2 x double> |
1952 | intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context)); |
1953 | } break; |
1954 | case VecOp::Xlw4: |
1955 | fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be" |
1956 | : "llvm.ppc.vsx.lxvw4x" ; |
1957 | // llvm.ppc.altivec.lxvw4x* returns <4xi32> |
1958 | intrinResTy = mVecI32Ty; |
1959 | break; |
1960 | default: |
1961 | llvm_unreachable("invalid vector operation for generator" ); |
1962 | } |
1963 | |
1964 | auto funcType{ |
1965 | mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})}; |
1966 | auto funcOp{builder.createFunction(loc, fname, funcType)}; |
1967 | auto result{ |
1968 | builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)}; |
1969 | |
1970 | if (vop == VecOp::Lxvp) |
1971 | return result; |
1972 | |
1973 | if (intrinResTy != mlirTy) |
1974 | result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result); |
1975 | |
1976 | if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE()) |
1977 | return builder.createConvert( |
1978 | loc, firTy, |
1979 | reverseVectorElements(builder, loc, result, vecResTyInfo.len)); |
1980 | |
1981 | return builder.createConvert(loc, firTy, result); |
1982 | } |
1983 | |
1984 | // VEC_LVSL, VEC_LVSR |
1985 | template <VecOp vop> |
1986 | fir::ExtendedValue |
1987 | PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType, |
1988 | llvm::ArrayRef<fir::ExtendedValue> args) { |
1989 | assert(args.size() == 2); |
1990 | auto context{builder.getContext()}; |
1991 | auto arg0{getBase(args[0])}; |
1992 | auto arg1{getBase(args[1])}; |
1993 | |
1994 | auto vecTyInfo{getVecTypeFromFirType(resultType)}; |
1995 | auto mlirTy{vecTyInfo.toMlirVectorType(context)}; |
1996 | auto firTy{vecTyInfo.toFirVectorType()}; |
1997 | |
1998 | // Convert arg0 to i64 type if needed |
1999 | auto i64ty{mlir::IntegerType::get(context, 64)}; |
2000 | if (arg0.getType() != i64ty) |
2001 | arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0); |
2002 | |
2003 | // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear |
2004 | // upper 56 bit while preserving sign |
2005 | auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)}; |
2006 | auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)}; |
2007 | auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)}; |
2008 | |
2009 | // Add the offsetArg to %addr of arg1 |
2010 | auto addr{addOffsetToAddress(builder, loc, arg1, offset2)}; |
2011 | llvm::SmallVector<mlir::Value, 4> parsedArgs{addr}; |
2012 | |
2013 | llvm::StringRef fname{}; |
2014 | switch (vop) { |
2015 | case VecOp::Lvsl: |
2016 | fname = "llvm.ppc.altivec.lvsl" ; |
2017 | break; |
2018 | case VecOp::Lvsr: |
2019 | fname = "llvm.ppc.altivec.lvsr" ; |
2020 | break; |
2021 | default: |
2022 | llvm_unreachable("invalid vector operation for generator" ); |
2023 | } |
2024 | auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})}; |
2025 | auto funcOp{builder.createFunction(loc, fname, funcType)}; |
2026 | auto result{ |
2027 | builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)}; |
2028 | |
2029 | if (isNativeVecElemOrderOnLE()) |
2030 | return builder.createConvert( |
2031 | loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); |
2032 | |
2033 | return builder.createConvert(loc, firTy, result); |
2034 | } |
2035 | |
2036 | // VEC_NMADD, VEC_MSUB |
2037 | template <VecOp vop> |
2038 | fir::ExtendedValue |
2039 | PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType, |
2040 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2041 | assert(args.size() == 3); |
2042 | auto context{builder.getContext()}; |
2043 | auto argBases{getBasesForArgs(args)}; |
2044 | auto vTypeInfo{getVecTypeFromFir(argBases[0])}; |
2045 | auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)}; |
2046 | const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; |
2047 | |
2048 | static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{ |
2049 | {32, |
2050 | std::make_pair( |
2051 | "llvm.fma.v4f32" , |
2052 | genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>( |
2053 | context, builder))}, |
2054 | {64, |
2055 | std::make_pair( |
2056 | "llvm.fma.v2f64" , |
2057 | genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>( |
2058 | context, builder))}}; |
2059 | |
2060 | auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]), |
2061 | std::get<1>(fmaMap[width]))}; |
2062 | if (vop == VecOp::Nmadd) { |
2063 | // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3) |
2064 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; |
2065 | |
2066 | // We need to convert fir.vector to MLIR vector to use fneg and then back |
2067 | // to fir.vector to store. |
2068 | auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context), |
2069 | callOp.getResult(0))}; |
2070 | auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)}; |
2071 | return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg); |
2072 | } else if (vop == VecOp::Msub) { |
2073 | // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3) |
2074 | newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]); |
2075 | |
2076 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; |
2077 | return callOp.getResult(0); |
2078 | } |
2079 | llvm_unreachable("Invalid vector operation for generator" ); |
2080 | } |
2081 | |
2082 | // VEC_PERM, VEC_PERMI |
2083 | template <VecOp vop> |
2084 | fir::ExtendedValue |
2085 | PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, |
2086 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2087 | assert(args.size() == 3); |
2088 | auto context{builder.getContext()}; |
2089 | auto argBases{getBasesForArgs(args)}; |
2090 | auto argTypes{getTypesForArgs(argBases)}; |
2091 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
2092 | auto mlirTy{vecTyInfo.toMlirVectorType(context)}; |
2093 | |
2094 | auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))}; |
2095 | auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))}; |
2096 | |
2097 | auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])}; |
2098 | auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])}; |
2099 | |
2100 | switch (vop) { |
2101 | case VecOp::Perm: { |
2102 | VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])}; |
2103 | auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)}; |
2104 | auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])}; |
2105 | |
2106 | if (mlirTy != vi32Ty) { |
2107 | mArg0 = |
2108 | builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult(); |
2109 | mArg1 = |
2110 | builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult(); |
2111 | } |
2112 | |
2113 | auto funcOp{builder.createFunction( |
2114 | loc, "llvm.ppc.altivec.vperm" , |
2115 | genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, |
2116 | Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context, |
2117 | builder))}; |
2118 | |
2119 | llvm::SmallVector<mlir::Value> newArgs; |
2120 | if (isNativeVecElemOrderOnLE()) { |
2121 | auto i8Ty{mlir::IntegerType::get(context, 8)}; |
2122 | auto v8Ty{mlir::VectorType::get(16, i8Ty)}; |
2123 | auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; |
2124 | auto vNegOne{ |
2125 | builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)}; |
2126 | |
2127 | mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne); |
2128 | newArgs = {mArg1, mArg0, mMask}; |
2129 | } else { |
2130 | newArgs = {mArg0, mArg1, mMask}; |
2131 | } |
2132 | |
2133 | auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)}; |
2134 | |
2135 | if (res.getType() != argTypes[0]) { |
2136 | // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32> |
2137 | // convert the result back to the original type |
2138 | res = builder.createConvert(loc, vi32Ty, res); |
2139 | if (mlirTy != vi32Ty) |
2140 | res = |
2141 | builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult(); |
2142 | } |
2143 | return builder.createConvert(loc, resultType, res); |
2144 | } |
2145 | case VecOp::Permi: { |
2146 | // arg3 is a constant |
2147 | auto constIntOp{ |
2148 | mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp()) |
2149 | .getValue() |
2150 | .dyn_cast_or_null<mlir::IntegerAttr>()}; |
2151 | assert(constIntOp && "expected integer constant argument" ); |
2152 | auto constInt{constIntOp.getInt()}; |
2153 | // arg1, arg2, and result type share same VecTypeInfo |
2154 | if (vecTyInfo.isFloat()) { |
2155 | mArg0 = |
2156 | builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult(); |
2157 | mArg1 = |
2158 | builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult(); |
2159 | } |
2160 | |
2161 | llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask |
2162 | llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask |
2163 | enum { V1 = 0, V2 = 2 }; |
2164 | switch (constInt) { |
2165 | case 0: |
2166 | nMask = {V1 + 0, V2 + 0}; |
2167 | rMask = {V2 + 1, V1 + 1}; |
2168 | break; |
2169 | case 1: |
2170 | nMask = {V1 + 0, V2 + 1}; |
2171 | rMask = {V2 + 0, V1 + 1}; |
2172 | break; |
2173 | case 2: |
2174 | nMask = {V1 + 1, V2 + 0}; |
2175 | rMask = {V2 + 1, V1 + 0}; |
2176 | break; |
2177 | case 3: |
2178 | nMask = {V1 + 1, V2 + 1}; |
2179 | rMask = {V2 + 0, V1 + 0}; |
2180 | break; |
2181 | default: |
2182 | llvm_unreachable("unexpected arg3 value for vec_permi" ); |
2183 | } |
2184 | |
2185 | llvm::SmallVector<int64_t, 2> mask = |
2186 | (isBEVecElemOrderOnLE()) ? rMask : nMask; |
2187 | auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)}; |
2188 | if (res.getType() != mlirTy) { |
2189 | auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)}; |
2190 | return builder.createConvert(loc, resultType, cast); |
2191 | } |
2192 | return builder.createConvert(loc, resultType, res); |
2193 | } |
2194 | default: |
2195 | llvm_unreachable("invalid vector operation for generator" ); |
2196 | } |
2197 | } |
2198 | |
2199 | // VEC_SEL |
2200 | fir::ExtendedValue |
2201 | PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, |
2202 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2203 | assert(args.size() == 3); |
2204 | auto argBases{getBasesForArgs(args)}; |
2205 | llvm::SmallVector<VecTypeInfo, 4> vecTyInfos; |
2206 | for (size_t i = 0; i < argBases.size(); i++) { |
2207 | vecTyInfos.push_back(getVecTypeFromFir(argBases[i])); |
2208 | } |
2209 | auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)}; |
2210 | |
2211 | auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)}; |
2212 | auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; |
2213 | |
2214 | // construct a constant <16 x i8> vector with value -1 for bitcast |
2215 | auto bcVecTy{mlir::VectorType::get(16, i8Ty)}; |
2216 | auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)}; |
2217 | |
2218 | // bitcast arguments to bcVecTy |
2219 | auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])}; |
2220 | auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])}; |
2221 | auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])}; |
2222 | |
2223 | // vec_sel(arg1, arg2, arg3) = |
2224 | // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...))) |
2225 | auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)}; |
2226 | auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)}; |
2227 | auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)}; |
2228 | auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)}; |
2229 | |
2230 | auto bcRes{ |
2231 | builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)}; |
2232 | |
2233 | return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes); |
2234 | } |
2235 | |
2236 | // VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO |
2237 | template <VecOp vop> |
2238 | fir::ExtendedValue |
2239 | PPCIntrinsicLibrary::genVecShift(mlir::Type resultType, |
2240 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2241 | auto context{builder.getContext()}; |
2242 | auto argBases{getBasesForArgs(args)}; |
2243 | auto argTypes{getTypesForArgs(argBases)}; |
2244 | |
2245 | llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs; |
2246 | vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0])); |
2247 | vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1])); |
2248 | |
2249 | // Convert the first two arguments to MLIR vectors |
2250 | llvm::SmallVector<mlir::Type, 2> mlirTyArgs; |
2251 | mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context)); |
2252 | mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context)); |
2253 | |
2254 | llvm::SmallVector<mlir::Value, 2> mlirVecArgs; |
2255 | mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0])); |
2256 | mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1])); |
2257 | |
2258 | mlir::Value shftRes{nullptr}; |
2259 | |
2260 | if (vop == VecOp::Sl || vop == VecOp::Sr) { |
2261 | assert(args.size() == 2); |
2262 | // Construct the mask |
2263 | auto width{ |
2264 | mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()}; |
2265 | auto vecVal{builder.createIntegerConstant( |
2266 | loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)}; |
2267 | auto mask{ |
2268 | builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)}; |
2269 | auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)}; |
2270 | |
2271 | mlir::Value res{nullptr}; |
2272 | if (vop == VecOp::Sr) |
2273 | res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft); |
2274 | else if (vop == VecOp::Sl) |
2275 | res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft); |
2276 | |
2277 | shftRes = builder.createConvert(loc, argTypes[0], res); |
2278 | } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl || |
2279 | vop == VecOp::Sro) { |
2280 | assert(args.size() == 2); |
2281 | |
2282 | // Bitcast to vector<4xi32> |
2283 | auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))}; |
2284 | if (mlirTyArgs[0] != bcVecTy) |
2285 | mlirVecArgs[0] = |
2286 | builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]); |
2287 | if (mlirTyArgs[1] != bcVecTy) |
2288 | mlirVecArgs[1] = |
2289 | builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]); |
2290 | |
2291 | llvm::StringRef funcName; |
2292 | switch (vop) { |
2293 | case VecOp::Srl: |
2294 | funcName = "llvm.ppc.altivec.vsr" ; |
2295 | break; |
2296 | case VecOp::Sro: |
2297 | funcName = "llvm.ppc.altivec.vsro" ; |
2298 | break; |
2299 | case VecOp::Sll: |
2300 | funcName = "llvm.ppc.altivec.vsl" ; |
2301 | break; |
2302 | case VecOp::Slo: |
2303 | funcName = "llvm.ppc.altivec.vslo" ; |
2304 | break; |
2305 | default: |
2306 | llvm_unreachable("unknown vector shift operation" ); |
2307 | } |
2308 | auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, |
2309 | Ty::IntegerVector<4>>(context, builder)}; |
2310 | mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)}; |
2311 | auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)}; |
2312 | |
2313 | // If the result vector type is different from the original type, need |
2314 | // to convert to mlir vector, bitcast and then convert back to fir vector. |
2315 | if (callOp.getResult(0).getType() != argTypes[0]) { |
2316 | auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0)); |
2317 | res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res); |
2318 | shftRes = builder.createConvert(loc, argTypes[0], res); |
2319 | } else { |
2320 | shftRes = callOp.getResult(0); |
2321 | } |
2322 | } else if (vop == VecOp::Sld || vop == VecOp::Sldw) { |
2323 | assert(args.size() == 3); |
2324 | auto constIntOp = |
2325 | mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp()) |
2326 | .getValue() |
2327 | .dyn_cast_or_null<mlir::IntegerAttr>(); |
2328 | assert(constIntOp && "expected integer constant argument" ); |
2329 | |
2330 | // Bitcast to vector<16xi8> |
2331 | auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))}; |
2332 | if (mlirTyArgs[0] != vi8Ty) { |
2333 | mlirVecArgs[0] = |
2334 | builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0]) |
2335 | .getResult(); |
2336 | mlirVecArgs[1] = |
2337 | builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1]) |
2338 | .getResult(); |
2339 | } |
2340 | |
2341 | // Construct the mask for shuffling |
2342 | auto shiftVal{constIntOp.getInt()}; |
2343 | if (vop == VecOp::Sldw) |
2344 | shiftVal = shiftVal << 2; |
2345 | shiftVal &= 0xF; |
2346 | llvm::SmallVector<int64_t, 16> mask; |
2347 | // Shuffle with mask based on the endianness |
2348 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
2349 | if (triple.isLittleEndian()) { |
2350 | for (int i = 16; i < 32; ++i) |
2351 | mask.push_back(i - shiftVal); |
2352 | shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1], |
2353 | mlirVecArgs[0], mask); |
2354 | } else { |
2355 | for (int i = 0; i < 16; ++i) |
2356 | mask.push_back(i + shiftVal); |
2357 | shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0], |
2358 | mlirVecArgs[1], mask); |
2359 | } |
2360 | |
2361 | // Bitcast to the original type |
2362 | if (shftRes.getType() != mlirTyArgs[0]) |
2363 | shftRes = |
2364 | builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes); |
2365 | |
2366 | return builder.createConvert(loc, resultType, shftRes); |
2367 | } else |
2368 | llvm_unreachable("Invalid vector operation for generator" ); |
2369 | |
2370 | return shftRes; |
2371 | } |
2372 | |
2373 | // VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32 |
2374 | template <VecOp vop> |
2375 | fir::ExtendedValue |
2376 | PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, |
2377 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2378 | auto context{builder.getContext()}; |
2379 | auto argBases{getBasesForArgs(args)}; |
2380 | |
2381 | mlir::vector::SplatOp splatOp{nullptr}; |
2382 | mlir::Type retTy{nullptr}; |
2383 | switch (vop) { |
2384 | case VecOp::Splat: { |
2385 | assert(args.size() == 2); |
2386 | auto vecTyInfo{getVecTypeFromFir(argBases[0])}; |
2387 | |
2388 | auto extractOp{genVecExtract(resultType, args)}; |
2389 | splatOp = builder.create<mlir::vector::SplatOp>( |
2390 | loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context)); |
2391 | retTy = vecTyInfo.toFirVectorType(); |
2392 | break; |
2393 | } |
2394 | case VecOp::Splats: { |
2395 | assert(args.size() == 1); |
2396 | auto vecTyInfo{getVecTypeFromEle(argBases[0])}; |
2397 | |
2398 | splatOp = builder.create<mlir::vector::SplatOp>( |
2399 | loc, argBases[0], vecTyInfo.toMlirVectorType(context)); |
2400 | retTy = vecTyInfo.toFirVectorType(); |
2401 | break; |
2402 | } |
2403 | case VecOp::Splat_s32: { |
2404 | assert(args.size() == 1); |
2405 | auto eleTy{builder.getIntegerType(32)}; |
2406 | auto intOp{builder.createConvert(loc, eleTy, argBases[0])}; |
2407 | |
2408 | // the intrinsic always returns vector(integer(4)) |
2409 | splatOp = builder.create<mlir::vector::SplatOp>( |
2410 | loc, intOp, mlir::VectorType::get(4, eleTy)); |
2411 | retTy = fir::VectorType::get(4, eleTy); |
2412 | break; |
2413 | } |
2414 | default: |
2415 | llvm_unreachable("invalid vector operation for generator" ); |
2416 | } |
2417 | return builder.createConvert(loc, retTy, splatOp); |
2418 | } |
2419 | |
2420 | fir::ExtendedValue |
2421 | PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType, |
2422 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2423 | assert(args.size() == 2); |
2424 | auto arg0{getBase(args[0])}; |
2425 | auto arg1{getBase(args[1])}; |
2426 | |
2427 | // Prepare the return type in FIR. |
2428 | auto vecTyInfo{getVecTypeFromFirType(resultType)}; |
2429 | auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; |
2430 | auto firTy{vecTyInfo.toFirVectorType()}; |
2431 | |
2432 | // Add the %val of arg0 to %addr of arg1 |
2433 | auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; |
2434 | |
2435 | auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)}; |
2436 | auto i64VecTy{mlir::VectorType::get(2, i64Ty)}; |
2437 | auto i64RefTy{builder.getRefType(i64Ty)}; |
2438 | auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)}; |
2439 | |
2440 | auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)}; |
2441 | auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)}; |
2442 | |
2443 | mlir::Value result{nullptr}; |
2444 | if (mlirTy != splatRes.getType()) { |
2445 | result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes); |
2446 | } else |
2447 | result = splatRes; |
2448 | |
2449 | return builder.createConvert(loc, firTy, result); |
2450 | } |
2451 | |
2452 | const char *getMmaIrIntrName(MMAOp mmaOp) { |
2453 | switch (mmaOp) { |
2454 | case MMAOp::AssembleAcc: |
2455 | return "llvm.ppc.mma.assemble.acc" ; |
2456 | case MMAOp::AssemblePair: |
2457 | return "llvm.ppc.vsx.assemble.pair" ; |
2458 | case MMAOp::DisassembleAcc: |
2459 | return "llvm.ppc.mma.disassemble.acc" ; |
2460 | case MMAOp::DisassemblePair: |
2461 | return "llvm.ppc.vsx.disassemble.pair" ; |
2462 | case MMAOp::Xxmfacc: |
2463 | return "llvm.ppc.mma.xxmfacc" ; |
2464 | case MMAOp::Xxmtacc: |
2465 | return "llvm.ppc.mma.xxmtacc" ; |
2466 | case MMAOp::Xxsetaccz: |
2467 | return "llvm.ppc.mma.xxsetaccz" ; |
2468 | case MMAOp::Pmxvbf16ger2: |
2469 | return "llvm.ppc.mma.pmxvbf16ger2" ; |
2470 | case MMAOp::Pmxvbf16ger2nn: |
2471 | return "llvm.ppc.mma.pmxvbf16ger2nn" ; |
2472 | case MMAOp::Pmxvbf16ger2np: |
2473 | return "llvm.ppc.mma.pmxvbf16ger2np" ; |
2474 | case MMAOp::Pmxvbf16ger2pn: |
2475 | return "llvm.ppc.mma.pmxvbf16ger2pn" ; |
2476 | case MMAOp::Pmxvbf16ger2pp: |
2477 | return "llvm.ppc.mma.pmxvbf16ger2pp" ; |
2478 | case MMAOp::Pmxvf16ger2: |
2479 | return "llvm.ppc.mma.pmxvf16ger2" ; |
2480 | case MMAOp::Pmxvf16ger2nn: |
2481 | return "llvm.ppc.mma.pmxvf16ger2nn" ; |
2482 | case MMAOp::Pmxvf16ger2np: |
2483 | return "llvm.ppc.mma.pmxvf16ger2np" ; |
2484 | case MMAOp::Pmxvf16ger2pn: |
2485 | return "llvm.ppc.mma.pmxvf16ger2pn" ; |
2486 | case MMAOp::Pmxvf16ger2pp: |
2487 | return "llvm.ppc.mma.pmxvf16ger2pp" ; |
2488 | case MMAOp::Pmxvf32ger: |
2489 | return "llvm.ppc.mma.pmxvf32ger" ; |
2490 | case MMAOp::Pmxvf32gernn: |
2491 | return "llvm.ppc.mma.pmxvf32gernn" ; |
2492 | case MMAOp::Pmxvf32gernp: |
2493 | return "llvm.ppc.mma.pmxvf32gernp" ; |
2494 | case MMAOp::Pmxvf32gerpn: |
2495 | return "llvm.ppc.mma.pmxvf32gerpn" ; |
2496 | case MMAOp::Pmxvf32gerpp: |
2497 | return "llvm.ppc.mma.pmxvf32gerpp" ; |
2498 | case MMAOp::Pmxvf64ger: |
2499 | return "llvm.ppc.mma.pmxvf64ger" ; |
2500 | case MMAOp::Pmxvf64gernn: |
2501 | return "llvm.ppc.mma.pmxvf64gernn" ; |
2502 | case MMAOp::Pmxvf64gernp: |
2503 | return "llvm.ppc.mma.pmxvf64gernp" ; |
2504 | case MMAOp::Pmxvf64gerpn: |
2505 | return "llvm.ppc.mma.pmxvf64gerpn" ; |
2506 | case MMAOp::Pmxvf64gerpp: |
2507 | return "llvm.ppc.mma.pmxvf64gerpp" ; |
2508 | case MMAOp::Pmxvi16ger2: |
2509 | return "llvm.ppc.mma.pmxvi16ger2" ; |
2510 | case MMAOp::Pmxvi16ger2pp: |
2511 | return "llvm.ppc.mma.pmxvi16ger2pp" ; |
2512 | case MMAOp::Pmxvi16ger2s: |
2513 | return "llvm.ppc.mma.pmxvi16ger2s" ; |
2514 | case MMAOp::Pmxvi16ger2spp: |
2515 | return "llvm.ppc.mma.pmxvi16ger2spp" ; |
2516 | case MMAOp::Pmxvi4ger8: |
2517 | return "llvm.ppc.mma.pmxvi4ger8" ; |
2518 | case MMAOp::Pmxvi4ger8pp: |
2519 | return "llvm.ppc.mma.pmxvi4ger8pp" ; |
2520 | case MMAOp::Pmxvi8ger4: |
2521 | return "llvm.ppc.mma.pmxvi8ger4" ; |
2522 | case MMAOp::Pmxvi8ger4pp: |
2523 | return "llvm.ppc.mma.pmxvi8ger4pp" ; |
2524 | case MMAOp::Pmxvi8ger4spp: |
2525 | return "llvm.ppc.mma.pmxvi8ger4spp" ; |
2526 | case MMAOp::Xvbf16ger2: |
2527 | return "llvm.ppc.mma.xvbf16ger2" ; |
2528 | case MMAOp::Xvbf16ger2nn: |
2529 | return "llvm.ppc.mma.xvbf16ger2nn" ; |
2530 | case MMAOp::Xvbf16ger2np: |
2531 | return "llvm.ppc.mma.xvbf16ger2np" ; |
2532 | case MMAOp::Xvbf16ger2pn: |
2533 | return "llvm.ppc.mma.xvbf16ger2pn" ; |
2534 | case MMAOp::Xvbf16ger2pp: |
2535 | return "llvm.ppc.mma.xvbf16ger2pp" ; |
2536 | case MMAOp::Xvf16ger2: |
2537 | return "llvm.ppc.mma.xvf16ger2" ; |
2538 | case MMAOp::Xvf16ger2nn: |
2539 | return "llvm.ppc.mma.xvf16ger2nn" ; |
2540 | case MMAOp::Xvf16ger2np: |
2541 | return "llvm.ppc.mma.xvf16ger2np" ; |
2542 | case MMAOp::Xvf16ger2pn: |
2543 | return "llvm.ppc.mma.xvf16ger2pn" ; |
2544 | case MMAOp::Xvf16ger2pp: |
2545 | return "llvm.ppc.mma.xvf16ger2pp" ; |
2546 | case MMAOp::Xvf32ger: |
2547 | return "llvm.ppc.mma.xvf32ger" ; |
2548 | case MMAOp::Xvf32gernn: |
2549 | return "llvm.ppc.mma.xvf32gernn" ; |
2550 | case MMAOp::Xvf32gernp: |
2551 | return "llvm.ppc.mma.xvf32gernp" ; |
2552 | case MMAOp::Xvf32gerpn: |
2553 | return "llvm.ppc.mma.xvf32gerpn" ; |
2554 | case MMAOp::Xvf32gerpp: |
2555 | return "llvm.ppc.mma.xvf32gerpp" ; |
2556 | case MMAOp::Xvf64ger: |
2557 | return "llvm.ppc.mma.xvf64ger" ; |
2558 | case MMAOp::Xvf64gernn: |
2559 | return "llvm.ppc.mma.xvf64gernn" ; |
2560 | case MMAOp::Xvf64gernp: |
2561 | return "llvm.ppc.mma.xvf64gernp" ; |
2562 | case MMAOp::Xvf64gerpn: |
2563 | return "llvm.ppc.mma.xvf64gerpn" ; |
2564 | case MMAOp::Xvf64gerpp: |
2565 | return "llvm.ppc.mma.xvf64gerpp" ; |
2566 | case MMAOp::Xvi16ger2: |
2567 | return "llvm.ppc.mma.xvi16ger2" ; |
2568 | case MMAOp::Xvi16ger2pp: |
2569 | return "llvm.ppc.mma.xvi16ger2pp" ; |
2570 | case MMAOp::Xvi16ger2s: |
2571 | return "llvm.ppc.mma.xvi16ger2s" ; |
2572 | case MMAOp::Xvi16ger2spp: |
2573 | return "llvm.ppc.mma.xvi16ger2spp" ; |
2574 | case MMAOp::Xvi4ger8: |
2575 | return "llvm.ppc.mma.xvi4ger8" ; |
2576 | case MMAOp::Xvi4ger8pp: |
2577 | return "llvm.ppc.mma.xvi4ger8pp" ; |
2578 | case MMAOp::Xvi8ger4: |
2579 | return "llvm.ppc.mma.xvi8ger4" ; |
2580 | case MMAOp::Xvi8ger4pp: |
2581 | return "llvm.ppc.mma.xvi8ger4pp" ; |
2582 | case MMAOp::Xvi8ger4spp: |
2583 | return "llvm.ppc.mma.xvi8ger4spp" ; |
2584 | } |
2585 | llvm_unreachable("getMmaIrIntrName" ); |
2586 | } |
2587 | |
2588 | mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) { |
2589 | switch (mmaOp) { |
2590 | case MMAOp::AssembleAcc: |
2591 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4); |
2592 | case MMAOp::AssemblePair: |
2593 | return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2594 | case MMAOp::DisassembleAcc: |
2595 | return genMmaDisassembleFuncType(context, mmaOp); |
2596 | case MMAOp::DisassemblePair: |
2597 | return genMmaDisassembleFuncType(context, mmaOp); |
2598 | case MMAOp::Xxmfacc: |
2599 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); |
2600 | case MMAOp::Xxmtacc: |
2601 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); |
2602 | case MMAOp::Xxsetaccz: |
2603 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0); |
2604 | case MMAOp::Pmxvbf16ger2: |
2605 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2606 | /*Integer*/ 3); |
2607 | case MMAOp::Pmxvbf16ger2nn: |
2608 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2609 | /*Integer*/ 3); |
2610 | case MMAOp::Pmxvbf16ger2np: |
2611 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2612 | /*Integer*/ 3); |
2613 | case MMAOp::Pmxvbf16ger2pn: |
2614 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2615 | /*Integer*/ 3); |
2616 | case MMAOp::Pmxvbf16ger2pp: |
2617 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2618 | /*Integer*/ 3); |
2619 | case MMAOp::Pmxvf16ger2: |
2620 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2621 | /*Integer*/ 3); |
2622 | case MMAOp::Pmxvf16ger2nn: |
2623 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2624 | /*Integer*/ 3); |
2625 | case MMAOp::Pmxvf16ger2np: |
2626 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2627 | /*Integer*/ 3); |
2628 | case MMAOp::Pmxvf16ger2pn: |
2629 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2630 | /*Integer*/ 3); |
2631 | case MMAOp::Pmxvf16ger2pp: |
2632 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2633 | /*Integer*/ 3); |
2634 | case MMAOp::Pmxvf32ger: |
2635 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2636 | /*Integer*/ 2); |
2637 | case MMAOp::Pmxvf32gernn: |
2638 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2639 | /*Integer*/ 2); |
2640 | case MMAOp::Pmxvf32gernp: |
2641 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2642 | /*Integer*/ 2); |
2643 | case MMAOp::Pmxvf32gerpn: |
2644 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2645 | /*Integer*/ 2); |
2646 | case MMAOp::Pmxvf32gerpp: |
2647 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2648 | /*Integer*/ 2); |
2649 | case MMAOp::Pmxvf64ger: |
2650 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1, |
2651 | /*Integer*/ 2); |
2652 | case MMAOp::Pmxvf64gernn: |
2653 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, |
2654 | /*Integer*/ 2); |
2655 | case MMAOp::Pmxvf64gernp: |
2656 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, |
2657 | /*Integer*/ 2); |
2658 | case MMAOp::Pmxvf64gerpn: |
2659 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, |
2660 | /*Integer*/ 2); |
2661 | case MMAOp::Pmxvf64gerpp: |
2662 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, |
2663 | /*Integer*/ 2); |
2664 | case MMAOp::Pmxvi16ger2: |
2665 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2666 | /*Integer*/ 3); |
2667 | case MMAOp::Pmxvi16ger2pp: |
2668 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2669 | /*Integer*/ 3); |
2670 | case MMAOp::Pmxvi16ger2s: |
2671 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2672 | /*Integer*/ 3); |
2673 | case MMAOp::Pmxvi16ger2spp: |
2674 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2675 | /*Integer*/ 3); |
2676 | case MMAOp::Pmxvi4ger8: |
2677 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2678 | /*Integer*/ 3); |
2679 | case MMAOp::Pmxvi4ger8pp: |
2680 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2681 | /*Integer*/ 3); |
2682 | case MMAOp::Pmxvi8ger4: |
2683 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, |
2684 | /*Integer*/ 3); |
2685 | case MMAOp::Pmxvi8ger4pp: |
2686 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2687 | /*Integer*/ 3); |
2688 | case MMAOp::Pmxvi8ger4spp: |
2689 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, |
2690 | /*Integer*/ 3); |
2691 | case MMAOp::Xvbf16ger2: |
2692 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2693 | case MMAOp::Xvbf16ger2nn: |
2694 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2695 | case MMAOp::Xvbf16ger2np: |
2696 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2697 | case MMAOp::Xvbf16ger2pn: |
2698 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2699 | case MMAOp::Xvbf16ger2pp: |
2700 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2701 | case MMAOp::Xvf16ger2: |
2702 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2703 | case MMAOp::Xvf16ger2nn: |
2704 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2705 | case MMAOp::Xvf16ger2np: |
2706 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2707 | case MMAOp::Xvf16ger2pn: |
2708 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2709 | case MMAOp::Xvf16ger2pp: |
2710 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2711 | case MMAOp::Xvf32ger: |
2712 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2713 | case MMAOp::Xvf32gernn: |
2714 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2715 | case MMAOp::Xvf32gernp: |
2716 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2717 | case MMAOp::Xvf32gerpn: |
2718 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2719 | case MMAOp::Xvf32gerpp: |
2720 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2721 | case MMAOp::Xvf64ger: |
2722 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1); |
2723 | case MMAOp::Xvf64gernn: |
2724 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); |
2725 | case MMAOp::Xvf64gernp: |
2726 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); |
2727 | case MMAOp::Xvf64gerpn: |
2728 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); |
2729 | case MMAOp::Xvf64gerpp: |
2730 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); |
2731 | case MMAOp::Xvi16ger2: |
2732 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2733 | case MMAOp::Xvi16ger2pp: |
2734 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2735 | case MMAOp::Xvi16ger2s: |
2736 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2737 | case MMAOp::Xvi16ger2spp: |
2738 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2739 | case MMAOp::Xvi4ger8: |
2740 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2741 | case MMAOp::Xvi4ger8pp: |
2742 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2743 | case MMAOp::Xvi8ger4: |
2744 | return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); |
2745 | case MMAOp::Xvi8ger4pp: |
2746 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2747 | case MMAOp::Xvi8ger4spp: |
2748 | return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); |
2749 | } |
2750 | llvm_unreachable("getMmaIrFuncType" ); |
2751 | } |
2752 | |
2753 | template <MMAOp IntrId, MMAHandlerOp HandlerOp> |
2754 | void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) { |
2755 | auto context{builder.getContext()}; |
2756 | mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)}; |
2757 | mlir::func::FuncOp funcOp{ |
2758 | builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)}; |
2759 | llvm::SmallVector<mlir::Value> intrArgs; |
2760 | |
2761 | // Depending on SubToFunc, change the subroutine call to a function call. |
2762 | // First argument represents the result. Rest of the arguments |
2763 | // are shifted one position to form the actual argument list. |
2764 | size_t argStart{0}; |
2765 | size_t argStep{1}; |
2766 | size_t e{args.size()}; |
2767 | if (HandlerOp == MMAHandlerOp::SubToFunc) { |
2768 | // The first argument becomes function result. Start from the second |
2769 | // argument. |
2770 | argStart = 1; |
2771 | } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) { |
2772 | // Reverse argument order on little-endian target only. |
2773 | // The reversal does not depend on the setting of non-native-order option. |
2774 | const auto triple{fir::getTargetTriple(builder.getModule())}; |
2775 | if (triple.isLittleEndian()) { |
2776 | // Load the arguments in reverse order. |
2777 | argStart = args.size() - 1; |
2778 | // The first argument becomes function result. Stop at the second |
2779 | // argument. |
2780 | e = 0; |
2781 | argStep = -1; |
2782 | } else { |
2783 | // Load the arguments in natural order. |
2784 | // The first argument becomes function result. Start from the second |
2785 | // argument. |
2786 | argStart = 1; |
2787 | } |
2788 | } |
2789 | |
2790 | for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) { |
2791 | auto v{fir::getBase(args[i])}; |
2792 | if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) { |
2793 | // First argument is passed in as an address. We need to load |
2794 | // the content to match the LLVM interface. |
2795 | v = builder.create<fir::LoadOp>(loc, v); |
2796 | } |
2797 | auto vType{v.getType()}; |
2798 | mlir::Type targetType{intrFuncType.getInput(j)}; |
2799 | if (vType != targetType) { |
2800 | if (targetType.isa<mlir::VectorType>()) { |
2801 | // Perform vector type conversion for arguments passed by value. |
2802 | auto eleTy{vType.dyn_cast<fir::VectorType>().getEleTy()}; |
2803 | auto len{vType.dyn_cast<fir::VectorType>().getLen()}; |
2804 | mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy); |
2805 | auto v0{builder.createConvert(loc, mlirType, v)}; |
2806 | auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)}; |
2807 | intrArgs.push_back(v1); |
2808 | } else if (targetType.isa<mlir::IntegerType>() && |
2809 | vType.isa<mlir::IntegerType>()) { |
2810 | auto v0{builder.createConvert(loc, targetType, v)}; |
2811 | intrArgs.push_back(v0); |
2812 | } else { |
2813 | llvm::errs() << "\nUnexpected type conversion requested: " |
2814 | << " from " << vType << " to " << targetType << "\n" ; |
2815 | llvm_unreachable("Unsupported type conversion for argument to PowerPC " |
2816 | "MMA intrinsic" ); |
2817 | } |
2818 | } else { |
2819 | intrArgs.push_back(v); |
2820 | } |
2821 | } |
2822 | auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)}; |
2823 | if (HandlerOp == MMAHandlerOp::SubToFunc || |
2824 | HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE || |
2825 | HandlerOp == MMAHandlerOp::FirstArgIsResult) { |
2826 | // Convert pointer type if needed. |
2827 | mlir::Value callResult{callSt.getResult(0)}; |
2828 | mlir::Value destPtr{fir::getBase(args[0])}; |
2829 | mlir::Type callResultPtrType{builder.getRefType(callResult.getType())}; |
2830 | if (destPtr.getType() != callResultPtrType) { |
2831 | destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr); |
2832 | } |
2833 | // Copy the result. |
2834 | builder.create<fir::StoreOp>(loc, callResult, destPtr); |
2835 | } |
2836 | } |
2837 | |
2838 | // VEC_ST, VEC_STE |
2839 | template <VecOp vop> |
2840 | void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) { |
2841 | assert(args.size() == 3); |
2842 | |
2843 | auto context{builder.getContext()}; |
2844 | auto argBases{getBasesForArgs(args)}; |
2845 | auto arg1TyInfo{getVecTypeFromFir(argBases[0])}; |
2846 | |
2847 | auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; |
2848 | |
2849 | llvm::StringRef fname{}; |
2850 | mlir::VectorType stTy{nullptr}; |
2851 | auto i32ty{mlir::IntegerType::get(context, 32)}; |
2852 | switch (vop) { |
2853 | case VecOp::St: |
2854 | stTy = mlir::VectorType::get(4, i32ty); |
2855 | fname = "llvm.ppc.altivec.stvx" ; |
2856 | break; |
2857 | case VecOp::Ste: { |
2858 | const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()}; |
2859 | const auto len{arg1TyInfo.len}; |
2860 | |
2861 | if (arg1TyInfo.isFloat32()) { |
2862 | stTy = mlir::VectorType::get(len, i32ty); |
2863 | fname = "llvm.ppc.altivec.stvewx" ; |
2864 | } else if (arg1TyInfo.eleTy.isa<mlir::IntegerType>()) { |
2865 | stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width)); |
2866 | |
2867 | switch (width) { |
2868 | case 8: |
2869 | fname = "llvm.ppc.altivec.stvebx" ; |
2870 | break; |
2871 | case 16: |
2872 | fname = "llvm.ppc.altivec.stvehx" ; |
2873 | break; |
2874 | case 32: |
2875 | fname = "llvm.ppc.altivec.stvewx" ; |
2876 | break; |
2877 | default: |
2878 | assert(false && "invalid element size" ); |
2879 | } |
2880 | } else |
2881 | assert(false && "unknown type" ); |
2882 | break; |
2883 | } |
2884 | case VecOp::Stxvp: |
2885 | // __vector_pair type |
2886 | stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1)); |
2887 | fname = "llvm.ppc.vsx.stxvp" ; |
2888 | break; |
2889 | default: |
2890 | llvm_unreachable("invalid vector operation for generator" ); |
2891 | } |
2892 | |
2893 | auto funcType{ |
2894 | mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)}; |
2895 | mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType); |
2896 | |
2897 | llvm::SmallVector<mlir::Value, 4> biArgs; |
2898 | |
2899 | if (vop == VecOp::Stxvp) { |
2900 | biArgs.push_back(argBases[0]); |
2901 | biArgs.push_back(addr); |
2902 | builder.create<fir::CallOp>(loc, funcOp, biArgs); |
2903 | return; |
2904 | } |
2905 | |
2906 | auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())}; |
2907 | auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context), |
2908 | argBases[0])}; |
2909 | |
2910 | mlir::Value newArg1{nullptr}; |
2911 | if (stTy != arg1TyInfo.toMlirVectorType(context)) |
2912 | newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv); |
2913 | else |
2914 | newArg1 = cnv; |
2915 | |
2916 | if (isBEVecElemOrderOnLE()) |
2917 | newArg1 = builder.createConvert( |
2918 | loc, stTy, reverseVectorElements(builder, loc, newArg1, 4)); |
2919 | |
2920 | biArgs.push_back(newArg1); |
2921 | biArgs.push_back(addr); |
2922 | |
2923 | builder.create<fir::CallOp>(loc, funcOp, biArgs); |
2924 | } |
2925 | |
2926 | // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4 |
2927 | template <VecOp vop> |
2928 | void PPCIntrinsicLibrary::genVecXStore( |
2929 | llvm::ArrayRef<fir::ExtendedValue> args) { |
2930 | assert(args.size() == 3); |
2931 | auto context{builder.getContext()}; |
2932 | auto argBases{getBasesForArgs(args)}; |
2933 | VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])}; |
2934 | |
2935 | auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; |
2936 | |
2937 | mlir::Value trg{nullptr}; |
2938 | mlir::Value src{nullptr}; |
2939 | |
2940 | switch (vop) { |
2941 | case VecOp::Xst: |
2942 | case VecOp::Xst_be: { |
2943 | src = argBases[0]; |
2944 | trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), |
2945 | addr); |
2946 | |
2947 | if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) { |
2948 | auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), |
2949 | argBases[0])}; |
2950 | auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)}; |
2951 | |
2952 | src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf); |
2953 | } |
2954 | break; |
2955 | } |
2956 | case VecOp::Xstd2: |
2957 | case VecOp::Xstw4: { |
2958 | // an 16-byte vector arg1 is treated as two 8-byte elements or |
2959 | // four 4-byte elements |
2960 | mlir::IntegerType elemTy; |
2961 | uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4; |
2962 | elemTy = builder.getIntegerType(128 / numElem); |
2963 | |
2964 | mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)}; |
2965 | fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)}; |
2966 | |
2967 | auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), |
2968 | argBases[0])}; |
2969 | |
2970 | mlir::Type srcTy{nullptr}; |
2971 | if (numElem != arg1TyInfo.len) { |
2972 | cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv); |
2973 | srcTy = firVecTy; |
2974 | } else { |
2975 | srcTy = arg1TyInfo.toFirVectorType(); |
2976 | } |
2977 | |
2978 | trg = builder.createConvert(loc, builder.getRefType(srcTy), addr); |
2979 | |
2980 | if (isBEVecElemOrderOnLE()) { |
2981 | cnv = reverseVectorElements(builder, loc, cnv, numElem); |
2982 | } |
2983 | |
2984 | src = builder.createConvert(loc, srcTy, cnv); |
2985 | break; |
2986 | } |
2987 | case VecOp::Stxv: |
2988 | src = argBases[0]; |
2989 | trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), |
2990 | addr); |
2991 | break; |
2992 | default: |
2993 | assert(false && "Invalid vector operation for generator" ); |
2994 | } |
2995 | builder.create<fir::StoreOp>(loc, mlir::TypeRange{}, |
2996 | mlir::ValueRange{src, trg}, |
2997 | getAlignmentAttr(builder, 1)); |
2998 | } |
2999 | |
3000 | } // namespace fir |
3001 | |