1//===-- PPCIntrinsicCall.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Helper routines for constructing the FIR dialect of MLIR for PowerPC
10// intrinsics. Extensive use of MLIR interfaces and MLIR's coding style
11// (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this
12// module.
13//
14//===----------------------------------------------------------------------===//
15
16#include "flang/Optimizer/Builder/PPCIntrinsicCall.h"
17#include "flang/Evaluate/common.h"
18#include "flang/Optimizer/Builder/FIRBuilder.h"
19#include "flang/Optimizer/Builder/MutableBox.h"
20#include "mlir/Dialect/Vector/IR/VectorOps.h"
21
22namespace fir {
23
24using PI = PPCIntrinsicLibrary;
25
26// PPC specific intrinsic handlers.
27static constexpr IntrinsicHandler ppcHandlers[]{
28 {"__ppc_mma_assemble_acc",
29 static_cast<IntrinsicLibrary::SubroutineGenerator>(
30 &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>),
31 {{{"acc", asAddr},
32 {"arg1", asValue},
33 {"arg2", asValue},
34 {"arg3", asValue},
35 {"arg4", asValue}}},
36 /*isElemental=*/true},
37 {"__ppc_mma_assemble_pair",
38 static_cast<IntrinsicLibrary::SubroutineGenerator>(
39 &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>),
40 {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}},
41 /*isElemental=*/true},
42 {"__ppc_mma_build_acc",
43 static_cast<IntrinsicLibrary::SubroutineGenerator>(
44 &PI::genMmaIntr<MMAOp::AssembleAcc,
45 MMAHandlerOp::SubToFuncReverseArgOnLE>),
46 {{{"acc", asAddr},
47 {"arg1", asValue},
48 {"arg2", asValue},
49 {"arg3", asValue},
50 {"arg4", asValue}}},
51 /*isElemental=*/true},
52 {"__ppc_mma_disassemble_acc",
53 static_cast<IntrinsicLibrary::SubroutineGenerator>(
54 &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>),
55 {{{"data", asAddr}, {"acc", asValue}}},
56 /*isElemental=*/true},
57 {"__ppc_mma_disassemble_pair",
58 static_cast<IntrinsicLibrary::SubroutineGenerator>(
59 &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>),
60 {{{"data", asAddr}, {"pair", asValue}}},
61 /*isElemental=*/true},
62 {"__ppc_mma_pmxvbf16ger2_",
63 static_cast<IntrinsicLibrary::SubroutineGenerator>(
64 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>),
65 {{{"acc", asAddr},
66 {"a", asValue},
67 {"b", asValue},
68 {"xmask", asValue},
69 {"ymask", asValue},
70 {"pmask", asValue}}},
71 /*isElemental=*/true},
72 {"__ppc_mma_pmxvbf16ger2nn",
73 static_cast<IntrinsicLibrary::SubroutineGenerator>(
74 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn,
75 MMAHandlerOp::FirstArgIsResult>),
76 {{{"acc", asAddr},
77 {"a", asValue},
78 {"b", asValue},
79 {"xmask", asValue},
80 {"ymask", asValue},
81 {"pmask", asValue}}},
82 /*isElemental=*/true},
83 {"__ppc_mma_pmxvbf16ger2np",
84 static_cast<IntrinsicLibrary::SubroutineGenerator>(
85 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np,
86 MMAHandlerOp::FirstArgIsResult>),
87 {{{"acc", asAddr},
88 {"a", asValue},
89 {"b", asValue},
90 {"xmask", asValue},
91 {"ymask", asValue},
92 {"pmask", asValue}}},
93 /*isElemental=*/true},
94 {"__ppc_mma_pmxvbf16ger2pn",
95 static_cast<IntrinsicLibrary::SubroutineGenerator>(
96 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn,
97 MMAHandlerOp::FirstArgIsResult>),
98 {{{"acc", asAddr},
99 {"a", asValue},
100 {"b", asValue},
101 {"xmask", asValue},
102 {"ymask", asValue},
103 {"pmask", asValue}}},
104 /*isElemental=*/true},
105 {"__ppc_mma_pmxvbf16ger2pp",
106 static_cast<IntrinsicLibrary::SubroutineGenerator>(
107 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp,
108 MMAHandlerOp::FirstArgIsResult>),
109 {{{"acc", asAddr},
110 {"a", asValue},
111 {"b", asValue},
112 {"xmask", asValue},
113 {"ymask", asValue},
114 {"pmask", asValue}}},
115 /*isElemental=*/true},
116 {"__ppc_mma_pmxvf16ger2_",
117 static_cast<IntrinsicLibrary::SubroutineGenerator>(
118 &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>),
119 {{{"acc", asAddr},
120 {"a", asValue},
121 {"b", asValue},
122 {"xmask", asValue},
123 {"ymask", asValue},
124 {"pmask", asValue}}},
125 /*isElemental=*/true},
126 {"__ppc_mma_pmxvf16ger2nn",
127 static_cast<IntrinsicLibrary::SubroutineGenerator>(
128 &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
129 {{{"acc", asAddr},
130 {"a", asValue},
131 {"b", asValue},
132 {"xmask", asValue},
133 {"ymask", asValue},
134 {"pmask", asValue}}},
135 /*isElemental=*/true},
136 {"__ppc_mma_pmxvf16ger2np",
137 static_cast<IntrinsicLibrary::SubroutineGenerator>(
138 &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
139 {{{"acc", asAddr},
140 {"a", asValue},
141 {"b", asValue},
142 {"xmask", asValue},
143 {"ymask", asValue},
144 {"pmask", asValue}}},
145 /*isElemental=*/true},
146 {"__ppc_mma_pmxvf16ger2pn",
147 static_cast<IntrinsicLibrary::SubroutineGenerator>(
148 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
149 {{{"acc", asAddr},
150 {"a", asValue},
151 {"b", asValue},
152 {"xmask", asValue},
153 {"ymask", asValue},
154 {"pmask", asValue}}},
155 /*isElemental=*/true},
156 {"__ppc_mma_pmxvf16ger2pp",
157 static_cast<IntrinsicLibrary::SubroutineGenerator>(
158 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
159 {{{"acc", asAddr},
160 {"a", asValue},
161 {"b", asValue},
162 {"xmask", asValue},
163 {"ymask", asValue},
164 {"pmask", asValue}}},
165 /*isElemental=*/true},
166 {"__ppc_mma_pmxvf32ger",
167 static_cast<IntrinsicLibrary::SubroutineGenerator>(
168 &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>),
169 {{{"acc", asAddr},
170 {"a", asValue},
171 {"b", asValue},
172 {"xmask", asValue},
173 {"ymask", asValue}}},
174 /*isElemental=*/true},
175 {"__ppc_mma_pmxvf32gernn",
176 static_cast<IntrinsicLibrary::SubroutineGenerator>(
177 &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>),
178 {{{"acc", asAddr},
179 {"a", asValue},
180 {"b", asValue},
181 {"xmask", asValue},
182 {"ymask", asValue}}},
183 /*isElemental=*/true},
184 {"__ppc_mma_pmxvf32gernp",
185 static_cast<IntrinsicLibrary::SubroutineGenerator>(
186 &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>),
187 {{{"acc", asAddr},
188 {"a", asValue},
189 {"b", asValue},
190 {"xmask", asValue},
191 {"ymask", asValue}}},
192 /*isElemental=*/true},
193 {"__ppc_mma_pmxvf32gerpn",
194 static_cast<IntrinsicLibrary::SubroutineGenerator>(
195 &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
196 {{{"acc", asAddr},
197 {"a", asValue},
198 {"b", asValue},
199 {"xmask", asValue},
200 {"ymask", asValue}}},
201 /*isElemental=*/true},
202 {"__ppc_mma_pmxvf32gerpp",
203 static_cast<IntrinsicLibrary::SubroutineGenerator>(
204 &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
205 {{{"acc", asAddr},
206 {"a", asValue},
207 {"b", asValue},
208 {"xmask", asValue},
209 {"ymask", asValue}}},
210 /*isElemental=*/true},
211 {"__ppc_mma_pmxvf64ger",
212 static_cast<IntrinsicLibrary::SubroutineGenerator>(
213 &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>),
214 {{{"acc", asAddr},
215 {"a", asValue},
216 {"b", asValue},
217 {"xmask", asValue},
218 {"ymask", asValue}}},
219 /*isElemental=*/true},
220 {"__ppc_mma_pmxvf64gernn",
221 static_cast<IntrinsicLibrary::SubroutineGenerator>(
222 &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>),
223 {{{"acc", asAddr},
224 {"a", asValue},
225 {"b", asValue},
226 {"xmask", asValue},
227 {"ymask", asValue}}},
228 /*isElemental=*/true},
229 {"__ppc_mma_pmxvf64gernp",
230 static_cast<IntrinsicLibrary::SubroutineGenerator>(
231 &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>),
232 {{{"acc", asAddr},
233 {"a", asValue},
234 {"b", asValue},
235 {"xmask", asValue},
236 {"ymask", asValue}}},
237 /*isElemental=*/true},
238 {"__ppc_mma_pmxvf64gerpn",
239 static_cast<IntrinsicLibrary::SubroutineGenerator>(
240 &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
241 {{{"acc", asAddr},
242 {"a", asValue},
243 {"b", asValue},
244 {"xmask", asValue},
245 {"ymask", asValue}}},
246 /*isElemental=*/true},
247 {"__ppc_mma_pmxvf64gerpp",
248 static_cast<IntrinsicLibrary::SubroutineGenerator>(
249 &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
250 {{{"acc", asAddr},
251 {"a", asValue},
252 {"b", asValue},
253 {"xmask", asValue},
254 {"ymask", asValue}}},
255 /*isElemental=*/true},
256 {"__ppc_mma_pmxvi16ger2_",
257 static_cast<IntrinsicLibrary::SubroutineGenerator>(
258 &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>),
259 {{{"acc", asAddr},
260 {"a", asValue},
261 {"b", asValue},
262 {"xmask", asValue},
263 {"ymask", asValue},
264 {"pmask", asValue}}},
265 /*isElemental=*/true},
266 {"__ppc_mma_pmxvi16ger2pp",
267 static_cast<IntrinsicLibrary::SubroutineGenerator>(
268 &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
269 {{{"acc", asAddr},
270 {"a", asValue},
271 {"b", asValue},
272 {"xmask", asValue},
273 {"ymask", asValue},
274 {"pmask", asValue}}},
275 /*isElemental=*/true},
276 {"__ppc_mma_pmxvi16ger2s",
277 static_cast<IntrinsicLibrary::SubroutineGenerator>(
278 &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>),
279 {{{"acc", asAddr},
280 {"a", asValue},
281 {"b", asValue},
282 {"xmask", asValue},
283 {"ymask", asValue},
284 {"pmask", asValue}}},
285 /*isElemental=*/true},
286 {"__ppc_mma_pmxvi16ger2spp",
287 static_cast<IntrinsicLibrary::SubroutineGenerator>(
288 &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp,
289 MMAHandlerOp::FirstArgIsResult>),
290 {{{"acc", asAddr},
291 {"a", asValue},
292 {"b", asValue},
293 {"xmask", asValue},
294 {"ymask", asValue},
295 {"pmask", asValue}}},
296 /*isElemental=*/true},
297 {"__ppc_mma_pmxvi4ger8_",
298 static_cast<IntrinsicLibrary::SubroutineGenerator>(
299 &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>),
300 {{{"acc", asAddr},
301 {"a", asValue},
302 {"b", asValue},
303 {"xmask", asValue},
304 {"ymask", asValue},
305 {"pmask", asValue}}},
306 /*isElemental=*/true},
307 {"__ppc_mma_pmxvi4ger8pp",
308 static_cast<IntrinsicLibrary::SubroutineGenerator>(
309 &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
310 {{{"acc", asAddr},
311 {"a", asValue},
312 {"b", asValue},
313 {"xmask", asValue},
314 {"ymask", asValue},
315 {"pmask", asValue}}},
316 /*isElemental=*/true},
317 {"__ppc_mma_pmxvi8ger4_",
318 static_cast<IntrinsicLibrary::SubroutineGenerator>(
319 &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>),
320 {{{"acc", asAddr},
321 {"a", asValue},
322 {"b", asValue},
323 {"xmask", asValue},
324 {"ymask", asValue},
325 {"pmask", asValue}}},
326 /*isElemental=*/true},
327 {"__ppc_mma_pmxvi8ger4pp",
328 static_cast<IntrinsicLibrary::SubroutineGenerator>(
329 &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
330 {{{"acc", asAddr},
331 {"a", asValue},
332 {"b", asValue},
333 {"xmask", asValue},
334 {"ymask", asValue},
335 {"pmask", asValue}}},
336 /*isElemental=*/true},
337 {"__ppc_mma_pmxvi8ger4spp",
338 static_cast<IntrinsicLibrary::SubroutineGenerator>(
339 &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
340 {{{"acc", asAddr},
341 {"a", asValue},
342 {"b", asValue},
343 {"xmask", asValue},
344 {"ymask", asValue},
345 {"pmask", asValue}}},
346 /*isElemental=*/true},
347 {"__ppc_mma_xvbf16ger2_",
348 static_cast<IntrinsicLibrary::SubroutineGenerator>(
349 &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>),
350 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
351 /*isElemental=*/true},
352 {"__ppc_mma_xvbf16ger2nn",
353 static_cast<IntrinsicLibrary::SubroutineGenerator>(
354 &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
355 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
356 /*isElemental=*/true},
357 {"__ppc_mma_xvbf16ger2np",
358 static_cast<IntrinsicLibrary::SubroutineGenerator>(
359 &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>),
360 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
361 /*isElemental=*/true},
362 {"__ppc_mma_xvbf16ger2pn",
363 static_cast<IntrinsicLibrary::SubroutineGenerator>(
364 &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
365 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
366 /*isElemental=*/true},
367 {"__ppc_mma_xvbf16ger2pp",
368 static_cast<IntrinsicLibrary::SubroutineGenerator>(
369 &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
370 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
371 /*isElemental=*/true},
372 {"__ppc_mma_xvf16ger2_",
373 static_cast<IntrinsicLibrary::SubroutineGenerator>(
374 &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>),
375 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
376 /*isElemental=*/true},
377 {"__ppc_mma_xvf16ger2nn",
378 static_cast<IntrinsicLibrary::SubroutineGenerator>(
379 &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
380 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
381 /*isElemental=*/true},
382 {"__ppc_mma_xvf16ger2np",
383 static_cast<IntrinsicLibrary::SubroutineGenerator>(
384 &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
385 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
386 /*isElemental=*/true},
387 {"__ppc_mma_xvf16ger2pn",
388 static_cast<IntrinsicLibrary::SubroutineGenerator>(
389 &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
390 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
391 /*isElemental=*/true},
392 {"__ppc_mma_xvf16ger2pp",
393 static_cast<IntrinsicLibrary::SubroutineGenerator>(
394 &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
395 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
396 /*isElemental=*/true},
397 {"__ppc_mma_xvf32ger",
398 static_cast<IntrinsicLibrary::SubroutineGenerator>(
399 &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>),
400 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
401 /*isElemental=*/true},
402 {"__ppc_mma_xvf32gernn",
403 static_cast<IntrinsicLibrary::SubroutineGenerator>(
404 &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>),
405 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
406 /*isElemental=*/true},
407 {"__ppc_mma_xvf32gernp",
408 static_cast<IntrinsicLibrary::SubroutineGenerator>(
409 &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>),
410 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
411 /*isElemental=*/true},
412 {"__ppc_mma_xvf32gerpn",
413 static_cast<IntrinsicLibrary::SubroutineGenerator>(
414 &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
415 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
416 /*isElemental=*/true},
417 {"__ppc_mma_xvf32gerpp",
418 static_cast<IntrinsicLibrary::SubroutineGenerator>(
419 &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
420 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
421 /*isElemental=*/true},
422 {"__ppc_mma_xvf64ger",
423 static_cast<IntrinsicLibrary::SubroutineGenerator>(
424 &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>),
425 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
426 /*isElemental=*/true},
427 {"__ppc_mma_xvf64gernn",
428 static_cast<IntrinsicLibrary::SubroutineGenerator>(
429 &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>),
430 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
431 /*isElemental=*/true},
432 {"__ppc_mma_xvf64gernp",
433 static_cast<IntrinsicLibrary::SubroutineGenerator>(
434 &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>),
435 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
436 /*isElemental=*/true},
437 {"__ppc_mma_xvf64gerpn",
438 static_cast<IntrinsicLibrary::SubroutineGenerator>(
439 &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
440 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
441 /*isElemental=*/true},
442 {"__ppc_mma_xvf64gerpp",
443 static_cast<IntrinsicLibrary::SubroutineGenerator>(
444 &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
445 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
446 /*isElemental=*/true},
447 {"__ppc_mma_xvi16ger2_",
448 static_cast<IntrinsicLibrary::SubroutineGenerator>(
449 &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>),
450 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
451 /*isElemental=*/true},
452 {"__ppc_mma_xvi16ger2pp",
453 static_cast<IntrinsicLibrary::SubroutineGenerator>(
454 &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
455 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
456 /*isElemental=*/true},
457 {"__ppc_mma_xvi16ger2s",
458 static_cast<IntrinsicLibrary::SubroutineGenerator>(
459 &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>),
460 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
461 /*isElemental=*/true},
462 {"__ppc_mma_xvi16ger2spp",
463 static_cast<IntrinsicLibrary::SubroutineGenerator>(
464 &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>),
465 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
466 /*isElemental=*/true},
467 {"__ppc_mma_xvi4ger8_",
468 static_cast<IntrinsicLibrary::SubroutineGenerator>(
469 &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>),
470 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
471 /*isElemental=*/true},
472 {"__ppc_mma_xvi4ger8pp",
473 static_cast<IntrinsicLibrary::SubroutineGenerator>(
474 &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
475 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
476 /*isElemental=*/true},
477 {"__ppc_mma_xvi8ger4_",
478 static_cast<IntrinsicLibrary::SubroutineGenerator>(
479 &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>),
480 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
481 /*isElemental=*/true},
482 {"__ppc_mma_xvi8ger4pp",
483 static_cast<IntrinsicLibrary::SubroutineGenerator>(
484 &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
485 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
486 /*isElemental=*/true},
487 {"__ppc_mma_xvi8ger4spp",
488 static_cast<IntrinsicLibrary::SubroutineGenerator>(
489 &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
490 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
491 /*isElemental=*/true},
492 {"__ppc_mma_xxmfacc",
493 static_cast<IntrinsicLibrary::SubroutineGenerator>(
494 &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>),
495 {{{"acc", asAddr}}},
496 /*isElemental=*/true},
497 {"__ppc_mma_xxmtacc",
498 static_cast<IntrinsicLibrary::SubroutineGenerator>(
499 &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>),
500 {{{"acc", asAddr}}},
501 /*isElemental=*/true},
502 {"__ppc_mma_xxsetaccz",
503 static_cast<IntrinsicLibrary::SubroutineGenerator>(
504 &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>),
505 {{{"acc", asAddr}}},
506 /*isElemental=*/true},
507 {"__ppc_mtfsf",
508 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>),
509 {{{"mask", asValue}, {"r", asValue}}},
510 /*isElemental=*/false},
511 {"__ppc_mtfsfi",
512 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
513 {{{"bf", asValue}, {"i", asValue}}},
514 /*isElemental=*/false},
515 {"__ppc_vec_abs",
516 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
517 {{{"arg1", asValue}}},
518 /*isElemental=*/true},
519 {"__ppc_vec_add",
520 static_cast<IntrinsicLibrary::ExtendedGenerator>(
521 &PI::genVecAddAndMulSubXor<VecOp::Add>),
522 {{{"arg1", asValue}, {"arg2", asValue}}},
523 /*isElemental=*/true},
524 {"__ppc_vec_and",
525 static_cast<IntrinsicLibrary::ExtendedGenerator>(
526 &PI::genVecAddAndMulSubXor<VecOp::And>),
527 {{{"arg1", asValue}, {"arg2", asValue}}},
528 /*isElemental=*/true},
529 {"__ppc_vec_any_ge",
530 static_cast<IntrinsicLibrary::ExtendedGenerator>(
531 &PI::genVecAnyCompare<VecOp::Anyge>),
532 {{{"arg1", asValue}, {"arg2", asValue}}},
533 /*isElemental=*/true},
534 {"__ppc_vec_cmpge",
535 static_cast<IntrinsicLibrary::ExtendedGenerator>(
536 &PI::genVecCmp<VecOp::Cmpge>),
537 {{{"arg1", asValue}, {"arg2", asValue}}},
538 /*isElemental=*/true},
539 {"__ppc_vec_cmpgt",
540 static_cast<IntrinsicLibrary::ExtendedGenerator>(
541 &PI::genVecCmp<VecOp::Cmpgt>),
542 {{{"arg1", asValue}, {"arg2", asValue}}},
543 /*isElemental=*/true},
544 {"__ppc_vec_cmple",
545 static_cast<IntrinsicLibrary::ExtendedGenerator>(
546 &PI::genVecCmp<VecOp::Cmple>),
547 {{{"arg1", asValue}, {"arg2", asValue}}},
548 /*isElemental=*/true},
549 {"__ppc_vec_cmplt",
550 static_cast<IntrinsicLibrary::ExtendedGenerator>(
551 &PI::genVecCmp<VecOp::Cmplt>),
552 {{{"arg1", asValue}, {"arg2", asValue}}},
553 /*isElemental=*/true},
554 {"__ppc_vec_convert",
555 static_cast<IntrinsicLibrary::ExtendedGenerator>(
556 &PI::genVecConvert<VecOp::Convert>),
557 {{{"v", asValue}, {"mold", asValue}}},
558 /*isElemental=*/false},
559 {"__ppc_vec_ctf",
560 static_cast<IntrinsicLibrary::ExtendedGenerator>(
561 &PI::genVecConvert<VecOp::Ctf>),
562 {{{"arg1", asValue}, {"arg2", asValue}}},
563 /*isElemental=*/true},
564 {"__ppc_vec_cvf",
565 static_cast<IntrinsicLibrary::ExtendedGenerator>(
566 &PI::genVecConvert<VecOp::Cvf>),
567 {{{"arg1", asValue}}},
568 /*isElemental=*/true},
569 {"__ppc_vec_extract",
570 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract),
571 {{{"arg1", asValue}, {"arg2", asValue}}},
572 /*isElemental=*/true},
573 {"__ppc_vec_insert",
574 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
575 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
576 /*isElemental=*/true},
577 {"__ppc_vec_ld",
578 static_cast<IntrinsicLibrary::ExtendedGenerator>(
579 &PI::genVecLdCallGrp<VecOp::Ld>),
580 {{{"arg1", asValue}, {"arg2", asAddr}}},
581 /*isElemental=*/false},
582 {"__ppc_vec_lde",
583 static_cast<IntrinsicLibrary::ExtendedGenerator>(
584 &PI::genVecLdCallGrp<VecOp::Lde>),
585 {{{"arg1", asValue}, {"arg2", asAddr}}},
586 /*isElemental=*/false},
587 {"__ppc_vec_ldl",
588 static_cast<IntrinsicLibrary::ExtendedGenerator>(
589 &PI::genVecLdCallGrp<VecOp::Ldl>),
590 {{{"arg1", asValue}, {"arg2", asAddr}}},
591 /*isElemental=*/false},
592 {"__ppc_vec_lvsl",
593 static_cast<IntrinsicLibrary::ExtendedGenerator>(
594 &PI::genVecLvsGrp<VecOp::Lvsl>),
595 {{{"arg1", asValue}, {"arg2", asAddr}}},
596 /*isElemental=*/false},
597 {"__ppc_vec_lvsr",
598 static_cast<IntrinsicLibrary::ExtendedGenerator>(
599 &PI::genVecLvsGrp<VecOp::Lvsr>),
600 {{{"arg1", asValue}, {"arg2", asAddr}}},
601 /*isElemental=*/false},
602 {"__ppc_vec_lxv",
603 static_cast<IntrinsicLibrary::ExtendedGenerator>(
604 &PI::genVecLdNoCallGrp<VecOp::Lxv>),
605 {{{"arg1", asValue}, {"arg2", asAddr}}},
606 /*isElemental=*/false},
607 {"__ppc_vec_lxvp",
608 static_cast<IntrinsicLibrary::ExtendedGenerator>(
609 &PI::genVecLdCallGrp<VecOp::Lxvp>),
610 {{{"arg1", asValue}, {"arg2", asAddr}}},
611 /*isElemental=*/false},
612 {"__ppc_vec_mergeh",
613 static_cast<IntrinsicLibrary::ExtendedGenerator>(
614 &PI::genVecMerge<VecOp::Mergeh>),
615 {{{"arg1", asValue}, {"arg2", asValue}}},
616 /*isElemental=*/true},
617 {"__ppc_vec_mergel",
618 static_cast<IntrinsicLibrary::ExtendedGenerator>(
619 &PI::genVecMerge<VecOp::Mergel>),
620 {{{"arg1", asValue}, {"arg2", asValue}}},
621 /*isElemental=*/true},
622 {"__ppc_vec_msub",
623 static_cast<IntrinsicLibrary::ExtendedGenerator>(
624 &PI::genVecNmaddMsub<VecOp::Msub>),
625 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
626 /*isElemental=*/true},
627 {"__ppc_vec_mul",
628 static_cast<IntrinsicLibrary::ExtendedGenerator>(
629 &PI::genVecAddAndMulSubXor<VecOp::Mul>),
630 {{{"arg1", asValue}, {"arg2", asValue}}},
631 /*isElemental=*/true},
632 {"__ppc_vec_nmadd",
633 static_cast<IntrinsicLibrary::ExtendedGenerator>(
634 &PI::genVecNmaddMsub<VecOp::Nmadd>),
635 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
636 /*isElemental=*/true},
637 {"__ppc_vec_perm",
638 static_cast<IntrinsicLibrary::ExtendedGenerator>(
639 &PI::genVecPerm<VecOp::Perm>),
640 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
641 /*isElemental=*/true},
642 {"__ppc_vec_permi",
643 static_cast<IntrinsicLibrary::ExtendedGenerator>(
644 &PI::genVecPerm<VecOp::Permi>),
645 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
646 /*isElemental=*/true},
647 {"__ppc_vec_sel",
648 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
649 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
650 /*isElemental=*/true},
651 {"__ppc_vec_sl",
652 static_cast<IntrinsicLibrary::ExtendedGenerator>(
653 &PI::genVecShift<VecOp::Sl>),
654 {{{"arg1", asValue}, {"arg2", asValue}}},
655 /*isElemental=*/true},
656 {"__ppc_vec_sld",
657 static_cast<IntrinsicLibrary::ExtendedGenerator>(
658 &PI::genVecShift<VecOp::Sld>),
659 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
660 /*isElemental=*/true},
661 {"__ppc_vec_sldw",
662 static_cast<IntrinsicLibrary::ExtendedGenerator>(
663 &PI::genVecShift<VecOp::Sldw>),
664 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
665 /*isElemental=*/true},
666 {"__ppc_vec_sll",
667 static_cast<IntrinsicLibrary::ExtendedGenerator>(
668 &PI::genVecShift<VecOp::Sll>),
669 {{{"arg1", asValue}, {"arg2", asValue}}},
670 /*isElemental=*/true},
671 {"__ppc_vec_slo",
672 static_cast<IntrinsicLibrary::ExtendedGenerator>(
673 &PI::genVecShift<VecOp::Slo>),
674 {{{"arg1", asValue}, {"arg2", asValue}}},
675 /*isElemental=*/true},
676 {"__ppc_vec_splat",
677 static_cast<IntrinsicLibrary::ExtendedGenerator>(
678 &PI::genVecSplat<VecOp::Splat>),
679 {{{"arg1", asValue}, {"arg2", asValue}}},
680 /*isElemental=*/true},
681 {"__ppc_vec_splat_s32_",
682 static_cast<IntrinsicLibrary::ExtendedGenerator>(
683 &PI::genVecSplat<VecOp::Splat_s32>),
684 {{{"arg1", asValue}}},
685 /*isElemental=*/true},
686 {"__ppc_vec_splats",
687 static_cast<IntrinsicLibrary::ExtendedGenerator>(
688 &PI::genVecSplat<VecOp::Splats>),
689 {{{"arg1", asValue}}},
690 /*isElemental=*/true},
691 {"__ppc_vec_sr",
692 static_cast<IntrinsicLibrary::ExtendedGenerator>(
693 &PI::genVecShift<VecOp::Sr>),
694 {{{"arg1", asValue}, {"arg2", asValue}}},
695 /*isElemental=*/true},
696 {"__ppc_vec_srl",
697 static_cast<IntrinsicLibrary::ExtendedGenerator>(
698 &PI::genVecShift<VecOp::Srl>),
699 {{{"arg1", asValue}, {"arg2", asValue}}},
700 /*isElemental=*/true},
701 {"__ppc_vec_sro",
702 static_cast<IntrinsicLibrary::ExtendedGenerator>(
703 &PI::genVecShift<VecOp::Sro>),
704 {{{"arg1", asValue}, {"arg2", asValue}}},
705 /*isElemental=*/true},
706 {"__ppc_vec_st",
707 static_cast<IntrinsicLibrary::SubroutineGenerator>(
708 &PI::genVecStore<VecOp::St>),
709 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
710 /*isElemental=*/false},
711 {"__ppc_vec_ste",
712 static_cast<IntrinsicLibrary::SubroutineGenerator>(
713 &PI::genVecStore<VecOp::Ste>),
714 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
715 /*isElemental=*/false},
716 {"__ppc_vec_stxv",
717 static_cast<IntrinsicLibrary::SubroutineGenerator>(
718 &PI::genVecXStore<VecOp::Stxv>),
719 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
720 /*isElemental=*/false},
721 {"__ppc_vec_stxvp",
722 static_cast<IntrinsicLibrary::SubroutineGenerator>(
723 &PI::genVecStore<VecOp::Stxvp>),
724 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
725 /*isElemental=*/false},
726 {"__ppc_vec_sub",
727 static_cast<IntrinsicLibrary::ExtendedGenerator>(
728 &PI::genVecAddAndMulSubXor<VecOp::Sub>),
729 {{{"arg1", asValue}, {"arg2", asValue}}},
730 /*isElemental=*/true},
731 {"__ppc_vec_xl",
732 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
733 {{{"arg1", asValue}, {"arg2", asAddr}}},
734 /*isElemental=*/false},
735 {"__ppc_vec_xl_be",
736 static_cast<IntrinsicLibrary::ExtendedGenerator>(
737 &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
738 {{{"arg1", asValue}, {"arg2", asAddr}}},
739 /*isElemental=*/false},
740 {"__ppc_vec_xld2_",
741 static_cast<IntrinsicLibrary::ExtendedGenerator>(
742 &PI::genVecLdCallGrp<VecOp::Xld2>),
743 {{{"arg1", asValue}, {"arg2", asAddr}}},
744 /*isElemental=*/false},
745 {"__ppc_vec_xlds",
746 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
747 {{{"arg1", asValue}, {"arg2", asAddr}}},
748 /*isElemental=*/false},
749 {"__ppc_vec_xlw4_",
750 static_cast<IntrinsicLibrary::ExtendedGenerator>(
751 &PI::genVecLdCallGrp<VecOp::Xlw4>),
752 {{{"arg1", asValue}, {"arg2", asAddr}}},
753 /*isElemental=*/false},
754 {"__ppc_vec_xor",
755 static_cast<IntrinsicLibrary::ExtendedGenerator>(
756 &PI::genVecAddAndMulSubXor<VecOp::Xor>),
757 {{{"arg1", asValue}, {"arg2", asValue}}},
758 /*isElemental=*/true},
759 {"__ppc_vec_xst",
760 static_cast<IntrinsicLibrary::SubroutineGenerator>(
761 &PI::genVecXStore<VecOp::Xst>),
762 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
763 /*isElemental=*/false},
764 {"__ppc_vec_xst_be",
765 static_cast<IntrinsicLibrary::SubroutineGenerator>(
766 &PI::genVecXStore<VecOp::Xst_be>),
767 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
768 /*isElemental=*/false},
769 {"__ppc_vec_xstd2_",
770 static_cast<IntrinsicLibrary::SubroutineGenerator>(
771 &PI::genVecXStore<VecOp::Xstd2>),
772 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
773 /*isElemental=*/false},
774 {"__ppc_vec_xstw4_",
775 static_cast<IntrinsicLibrary::SubroutineGenerator>(
776 &PI::genVecXStore<VecOp::Xstw4>),
777 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
778 /*isElemental=*/false},
779};
780
781static constexpr MathOperation ppcMathOperations[] = {
782 // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi.
783 {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
784 genLibCall},
785 {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
786 genLibCall},
787 {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>,
788 genLibCall},
789 {"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
790 genLibCall},
791 {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
792 genLibCall},
793 {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>,
794 genLibCall},
795 {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
796 genLibCall},
797 {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
798 genLibCall},
799 {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
800 genLibCall},
801 {"__ppc_fmadd", "llvm.fma.f32",
802 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
803 genMathOp<mlir::math::FmaOp>},
804 {"__ppc_fmadd", "llvm.fma.f64",
805 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
806 genMathOp<mlir::math::FmaOp>},
807 {"__ppc_fmsub", "llvm.ppc.fmsubs",
808 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
809 genLibCall},
810 {"__ppc_fmsub", "llvm.ppc.fmsub",
811 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
812 genLibCall},
813 {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>,
814 genLibCall},
815 {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>,
816 genLibCall},
817 {"__ppc_fnmadd", "llvm.ppc.fnmadds",
818 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
819 genLibCall},
820 {"__ppc_fnmadd", "llvm.ppc.fnmadd",
821 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
822 genLibCall},
823 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32",
824 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
825 genLibCall},
826 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64",
827 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
828 genLibCall},
829 {"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>,
830 genLibCall},
831 {"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>,
832 genLibCall},
833 {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>,
834 genLibCall},
835 {"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
836 genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
837 {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn",
838 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
839 {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16",
840 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
841 {"__ppc_vec_madd", "llvm.fma.v4f32",
842 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
843 Ty::RealVector<4>>,
844 genLibCall},
845 {"__ppc_vec_madd", "llvm.fma.v2f64",
846 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
847 Ty::RealVector<8>>,
848 genLibCall},
849 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
850 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
851 Ty::IntegerVector<1>>,
852 genLibCall},
853 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
854 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
855 Ty::IntegerVector<2>>,
856 genLibCall},
857 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
858 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
859 Ty::IntegerVector<4>>,
860 genLibCall},
861 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
862 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
863 Ty::IntegerVector<8>>,
864 genLibCall},
865 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
866 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
867 Ty::UnsignedVector<1>>,
868 genLibCall},
869 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
870 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
871 Ty::UnsignedVector<2>>,
872 genLibCall},
873 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
874 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
875 Ty::UnsignedVector<4>>,
876 genLibCall},
877 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
878 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
879 Ty::UnsignedVector<8>>,
880 genLibCall},
881 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
882 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
883 genLibCall},
884 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
885 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
886 genLibCall},
887 {"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
888 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
889 Ty::IntegerVector<1>>,
890 genLibCall},
891 {"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
892 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
893 Ty::IntegerVector<2>>,
894 genLibCall},
895 {"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
896 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
897 Ty::IntegerVector<4>>,
898 genLibCall},
899 {"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
900 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
901 Ty::IntegerVector<8>>,
902 genLibCall},
903 {"__ppc_vec_min", "llvm.ppc.altivec.vminub",
904 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
905 Ty::UnsignedVector<1>>,
906 genLibCall},
907 {"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
908 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
909 Ty::UnsignedVector<2>>,
910 genLibCall},
911 {"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
912 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
913 Ty::UnsignedVector<4>>,
914 genLibCall},
915 {"__ppc_vec_min", "llvm.ppc.altivec.vminud",
916 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
917 Ty::UnsignedVector<8>>,
918 genLibCall},
919 {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
920 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
921 genLibCall},
922 {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
923 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
924 genLibCall},
925 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
926 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
927 Ty::RealVector<4>>,
928 genLibCall},
929 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
930 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
931 Ty::RealVector<8>>,
932 genLibCall},
933};
934
935const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
936 auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) {
937 return name.compare(ppcHandler.name) > 0;
938 };
939 auto result = llvm::lower_bound(ppcHandlers, name, compare);
940 return result != std::end(ppcHandlers) && result->name == name ? result
941 : nullptr;
942}
943
944using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
945static constexpr RtMap ppcMathOps(ppcMathOperations);
946static_assert(ppcMathOps.Verify() && "map must be sorted");
947
948std::pair<const MathOperation *, const MathOperation *>
949checkPPCMathOperationsRange(llvm::StringRef name) {
950 return ppcMathOps.equal_range(name);
951}
952
953// Helper functions for vector element ordering.
954bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
955 const auto triple{fir::getTargetTriple(builder.getModule())};
956 return (triple.isLittleEndian() &&
957 converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
958}
959bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
960 const auto triple{fir::getTargetTriple(builder.getModule())};
961 return (triple.isLittleEndian() &&
962 !converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
963}
964bool PPCIntrinsicLibrary::changeVecElemOrder() {
965 const auto triple{fir::getTargetTriple(builder.getModule())};
966 return (triple.isLittleEndian() !=
967 converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
968}
969
970static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
971 int quadCnt, int pairCnt, int vecCnt,
972 int intCnt = 0,
973 int vecElemBitSize = 8,
974 int intBitSize = 32) {
975 // Constructs a function type with the following signature:
976 // Result type: __vector_pair
977 // Arguments:
978 // quadCnt: number of arguments that has __vector_quad type, followed by
979 // pairCnt: number of arguments that has __vector_pair type, followed by
980 // vecCnt: number of arguments that has vector(integer) type, followed by
981 // intCnt: number of arguments that has integer type
982 // vecElemBitSize: specifies the size of vector elements in bits
983 // intBitSize: specifies the size of integer arguments in bits
984 auto vType{mlir::VectorType::get(
985 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
986 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
987 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
988 auto iType{mlir::IntegerType::get(context, intBitSize)};
989 llvm::SmallVector<mlir::Type> argTypes;
990 for (int i = 0; i < quadCnt; ++i) {
991 argTypes.push_back(Elt: vqType);
992 }
993 for (int i = 0; i < pairCnt; ++i) {
994 argTypes.push_back(Elt: vpType);
995 }
996 for (int i = 0; i < vecCnt; ++i) {
997 argTypes.push_back(Elt: vType);
998 }
999 for (int i = 0; i < intCnt; ++i) {
1000 argTypes.push_back(Elt: iType);
1001 }
1002
1003 return mlir::FunctionType::get(context, argTypes, {vpType});
1004}
1005
1006static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context,
1007 int quadCnt, int pairCnt, int vecCnt,
1008 int intCnt = 0,
1009 int vecElemBitSize = 8,
1010 int intBitSize = 32) {
1011 // Constructs a function type with the following signature:
1012 // Result type: __vector_quad
1013 // Arguments:
1014 // quadCnt: number of arguments that has __vector_quad type, followed by
1015 // pairCnt: number of arguments that has __vector_pair type, followed by
1016 // vecCnt: number of arguments that has vector(integer) type, followed by
1017 // intCnt: number of arguments that has integer type
1018 // vecElemBitSize: specifies the size of vector elements in bits
1019 // intBitSize: specifies the size of integer arguments in bits
1020 auto vType{mlir::VectorType::get(
1021 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
1022 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1023 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1024 auto iType{mlir::IntegerType::get(context, intBitSize)};
1025 llvm::SmallVector<mlir::Type> argTypes;
1026 for (int i = 0; i < quadCnt; ++i) {
1027 argTypes.push_back(Elt: vqType);
1028 }
1029 for (int i = 0; i < pairCnt; ++i) {
1030 argTypes.push_back(Elt: vpType);
1031 }
1032 for (int i = 0; i < vecCnt; ++i) {
1033 argTypes.push_back(Elt: vType);
1034 }
1035 for (int i = 0; i < intCnt; ++i) {
1036 argTypes.push_back(Elt: iType);
1037 }
1038
1039 return mlir::FunctionType::get(context, argTypes, {vqType});
1040}
1041
1042mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context,
1043 MMAOp mmaOp) {
1044 auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1045 llvm::SmallVector<mlir::Type> members;
1046
1047 if (mmaOp == MMAOp::DisassembleAcc) {
1048 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1049 members.push_back(Elt: vType);
1050 members.push_back(Elt: vType);
1051 members.push_back(Elt: vType);
1052 members.push_back(Elt: vType);
1053 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1054 return mlir::FunctionType::get(context, {vqType}, {resType});
1055 } else if (mmaOp == MMAOp::DisassemblePair) {
1056 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1057 members.push_back(Elt: vType);
1058 members.push_back(Elt: vType);
1059 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1060 return mlir::FunctionType::get(context, {vpType}, {resType});
1061 } else {
1062 llvm_unreachable(
1063 "Unsupported intrinsic code for function signature generator");
1064 }
1065}
1066
1067//===----------------------------------------------------------------------===//
1068// PowerPC specific intrinsic handlers.
1069//===----------------------------------------------------------------------===//
1070
1071// MTFSF, MTFSFI
1072template <bool isImm>
1073void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
1074 assert(args.size() == 2);
1075 llvm::SmallVector<mlir::Value> scalarArgs;
1076 for (const fir::ExtendedValue &arg : args)
1077 if (arg.getUnboxed())
1078 scalarArgs.emplace_back(fir::getBase(arg));
1079 else
1080 mlir::emitError(loc, "nonscalar intrinsic argument");
1081
1082 mlir::FunctionType libFuncType;
1083 mlir::func::FuncOp funcOp;
1084 if (isImm) {
1085 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>(
1086 builder.getContext(), builder);
1087 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType);
1088 } else {
1089 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>(
1090 builder.getContext(), builder);
1091 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType);
1092 }
1093 builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
1094}
1095
1096// VEC_ABS
1097fir::ExtendedValue
1098PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
1099 llvm::ArrayRef<fir::ExtendedValue> args) {
1100 assert(args.size() == 1);
1101 auto context{builder.getContext()};
1102 auto argBases{getBasesForArgs(args)};
1103 auto vTypeInfo{getVecTypeFromFir(argBases[0])};
1104
1105 mlir::func::FuncOp funcOp{nullptr};
1106 mlir::FunctionType ftype;
1107 llvm::StringRef fname{};
1108 if (vTypeInfo.isFloat()) {
1109 if (vTypeInfo.isFloat32()) {
1110 fname = "llvm.fabs.v4f32";
1111 ftype =
1112 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
1113 } else if (vTypeInfo.isFloat64()) {
1114 fname = "llvm.fabs.v2f64";
1115 ftype =
1116 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
1117 }
1118
1119 funcOp = builder.createFunction(loc, fname, ftype);
1120 auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
1121 return callOp.getResult(0);
1122 } else if (auto eleTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
1123 // vec_abs(arg1) = max(0 - arg1, arg1)
1124
1125 auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
1126 auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
1127 // construct vector(0,..)
1128 auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
1129 auto vZero{
1130 builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
1131 auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};
1132
1133 mlir::func::FuncOp funcOp{nullptr};
1134 switch (eleTy.getWidth()) {
1135 case 8:
1136 fname = "llvm.ppc.altivec.vmaxsb";
1137 ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
1138 Ty::IntegerVector<1>>(context, builder);
1139 break;
1140 case 16:
1141 fname = "llvm.ppc.altivec.vmaxsh";
1142 ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
1143 Ty::IntegerVector<2>>(context, builder);
1144 break;
1145 case 32:
1146 fname = "llvm.ppc.altivec.vmaxsw";
1147 ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
1148 Ty::IntegerVector<4>>(context, builder);
1149 break;
1150 case 64:
1151 fname = "llvm.ppc.altivec.vmaxsd";
1152 ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
1153 Ty::IntegerVector<8>>(context, builder);
1154 break;
1155 default:
1156 llvm_unreachable("invalid integer size");
1157 }
1158 funcOp = builder.createFunction(loc, fname, ftype);
1159
1160 mlir::Value args[] = {zeroSubVarg1, varg1};
1161 auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
1162 return builder.createConvert(loc, argBases[0].getType(),
1163 callOp.getResult(0));
1164 }
1165
1166 llvm_unreachable("unknown vector type");
1167}
1168
1169// VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
1170template <VecOp vop>
1171fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
1172 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1173 assert(args.size() == 2);
1174 auto argBases{getBasesForArgs(args)};
1175 auto argsTy{getTypesForArgs(argBases)};
1176 assert(mlir::isa<fir::VectorType>(argsTy[0]) &&
1177 mlir::isa<fir::VectorType>(argsTy[1]));
1178
1179 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1180
1181 const auto isInteger{mlir::isa<mlir::IntegerType>(vecTyInfo.eleTy)};
1182 const auto isFloat{mlir::isa<mlir::FloatType>(vecTyInfo.eleTy)};
1183 assert((isInteger || isFloat) && "unknown vector type");
1184
1185 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1186
1187 mlir::Value r{nullptr};
1188 switch (vop) {
1189 case VecOp::Add:
1190 if (isInteger)
1191 r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]);
1192 else if (isFloat)
1193 r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]);
1194 break;
1195 case VecOp::Mul:
1196 if (isInteger)
1197 r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]);
1198 else if (isFloat)
1199 r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]);
1200 break;
1201 case VecOp::Sub:
1202 if (isInteger)
1203 r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]);
1204 else if (isFloat)
1205 r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]);
1206 break;
1207 case VecOp::And:
1208 case VecOp::Xor: {
1209 mlir::Value arg1{nullptr};
1210 mlir::Value arg2{nullptr};
1211 if (isInteger) {
1212 arg1 = vargs[0];
1213 arg2 = vargs[1];
1214 } else if (isFloat) {
1215 // bitcast the arguments to integer
1216 auto wd{mlir::dyn_cast<mlir::FloatType>(vecTyInfo.eleTy).getWidth()};
1217 auto ftype{builder.getIntegerType(wd)};
1218 auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)};
1219 arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]);
1220 arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]);
1221 }
1222 if (vop == VecOp::And)
1223 r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2);
1224 else if (vop == VecOp::Xor)
1225 r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2);
1226
1227 if (isFloat)
1228 r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r);
1229
1230 break;
1231 }
1232 }
1233
1234 return builder.createConvert(loc, argsTy[0], r);
1235}
1236
1237// VEC_ANY_GE
1238template <VecOp vop>
1239fir::ExtendedValue
1240PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType,
1241 llvm::ArrayRef<fir::ExtendedValue> args) {
1242 assert(args.size() == 2);
1243 assert(vop == VecOp::Anyge && "unknown vector compare operation");
1244 auto argBases{getBasesForArgs(args)};
1245 VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])};
1246 [[maybe_unused]] const auto isSupportedTy{
1247 mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>(
1248 vTypeInfo.eleTy)};
1249 assert(isSupportedTy && "unsupported vector type");
1250
1251 // Constants for mapping CR6 bits to predicate result
1252 enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 };
1253
1254 auto context{builder.getContext()};
1255
1256 static std::map<std::pair<ParamTypeId, unsigned>,
1257 std::pair<llvm::StringRef, mlir::FunctionType>>
1258 uiBuiltin{
1259 {std::make_pair(ParamTypeId::IntegerVector, 8),
1260 std::make_pair(
1261 "llvm.ppc.altivec.vcmpgtsb.p",
1262 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>,
1263 Ty::IntegerVector<1>>(context, builder))},
1264 {std::make_pair(ParamTypeId::IntegerVector, 16),
1265 std::make_pair(
1266 "llvm.ppc.altivec.vcmpgtsh.p",
1267 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>,
1268 Ty::IntegerVector<2>>(context, builder))},
1269 {std::make_pair(ParamTypeId::IntegerVector, 32),
1270 std::make_pair(
1271 "llvm.ppc.altivec.vcmpgtsw.p",
1272 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>,
1273 Ty::IntegerVector<4>>(context, builder))},
1274 {std::make_pair(ParamTypeId::IntegerVector, 64),
1275 std::make_pair(
1276 "llvm.ppc.altivec.vcmpgtsd.p",
1277 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>,
1278 Ty::IntegerVector<8>>(context, builder))},
1279 {std::make_pair(ParamTypeId::UnsignedVector, 8),
1280 std::make_pair(
1281 "llvm.ppc.altivec.vcmpgtub.p",
1282 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1283 Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(
1284 context, builder))},
1285 {std::make_pair(ParamTypeId::UnsignedVector, 16),
1286 std::make_pair(
1287 "llvm.ppc.altivec.vcmpgtuh.p",
1288 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1289 Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(
1290 context, builder))},
1291 {std::make_pair(ParamTypeId::UnsignedVector, 32),
1292 std::make_pair(
1293 "llvm.ppc.altivec.vcmpgtuw.p",
1294 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1295 Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(
1296 context, builder))},
1297 {std::make_pair(ParamTypeId::UnsignedVector, 64),
1298 std::make_pair(
1299 "llvm.ppc.altivec.vcmpgtud.p",
1300 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1301 Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(
1302 context, builder))},
1303 };
1304
1305 mlir::FunctionType ftype{nullptr};
1306 llvm::StringRef fname;
1307 const auto i32Ty{mlir::IntegerType::get(context, 32)};
1308 llvm::SmallVector<mlir::Value> cmpArgs;
1309 mlir::Value op{nullptr};
1310 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1311
1312 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
1313 std::pair<llvm::StringRef, mlir::FunctionType> bi;
1314 bi = (elementTy.isUnsignedInteger())
1315 ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)]
1316 : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)];
1317
1318 fname = std::get<0>(bi);
1319 ftype = std::get<1>(bi);
1320
1321 op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV);
1322 cmpArgs.emplace_back(op);
1323 // reverse the argument order
1324 cmpArgs.emplace_back(argBases[1]);
1325 cmpArgs.emplace_back(argBases[0]);
1326 } else if (vTypeInfo.isFloat()) {
1327 if (vTypeInfo.isFloat32()) {
1328 fname = "llvm.ppc.vsx.xvcmpgesp.p";
1329 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>,
1330 Ty::RealVector<4>>(context, builder);
1331 } else {
1332 fname = "llvm.ppc.vsx.xvcmpgedp.p";
1333 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>,
1334 Ty::RealVector<8>>(context, builder);
1335 }
1336 op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV);
1337 cmpArgs.emplace_back(op);
1338 cmpArgs.emplace_back(argBases[0]);
1339 cmpArgs.emplace_back(argBases[1]);
1340 }
1341 assert((!fname.empty() && ftype) && "invalid type");
1342
1343 mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)};
1344 auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)};
1345 return callOp.getResult(0);
1346}
1347
1348static std::pair<llvm::StringRef, mlir::FunctionType>
1349getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop,
1350 fir::FirOpBuilder &builder) {
1351 auto context{builder.getContext()};
1352 static std::map<std::pair<ParamTypeId, unsigned>,
1353 std::pair<llvm::StringRef, mlir::FunctionType>>
1354 iuBuiltinName{
1355 {std::make_pair(ParamTypeId::IntegerVector, 8),
1356 std::make_pair(
1357 "llvm.ppc.altivec.vcmpgtsb",
1358 genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>,
1359 Ty::IntegerVector<1>>(context, builder))},
1360 {std::make_pair(ParamTypeId::IntegerVector, 16),
1361 std::make_pair(
1362 "llvm.ppc.altivec.vcmpgtsh",
1363 genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>,
1364 Ty::IntegerVector<2>>(context, builder))},
1365 {std::make_pair(ParamTypeId::IntegerVector, 32),
1366 std::make_pair(
1367 "llvm.ppc.altivec.vcmpgtsw",
1368 genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>,
1369 Ty::IntegerVector<4>>(context, builder))},
1370 {std::make_pair(ParamTypeId::IntegerVector, 64),
1371 std::make_pair(
1372 "llvm.ppc.altivec.vcmpgtsd",
1373 genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>,
1374 Ty::IntegerVector<8>>(context, builder))},
1375 {std::make_pair(ParamTypeId::UnsignedVector, 8),
1376 std::make_pair(
1377 "llvm.ppc.altivec.vcmpgtub",
1378 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
1379 Ty::UnsignedVector<1>>(context, builder))},
1380 {std::make_pair(ParamTypeId::UnsignedVector, 16),
1381 std::make_pair(
1382 "llvm.ppc.altivec.vcmpgtuh",
1383 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
1384 Ty::UnsignedVector<2>>(context, builder))},
1385 {std::make_pair(ParamTypeId::UnsignedVector, 32),
1386 std::make_pair(
1387 "llvm.ppc.altivec.vcmpgtuw",
1388 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
1389 Ty::UnsignedVector<4>>(context, builder))},
1390 {std::make_pair(ParamTypeId::UnsignedVector, 64),
1391 std::make_pair(
1392 "llvm.ppc.altivec.vcmpgtud",
1393 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
1394 Ty::UnsignedVector<8>>(context, builder))}};
1395
1396 // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with
1397 // arguments revsered.
1398 enum class Cmp { gtOrLt, geOrLe };
1399 static std::map<std::pair<Cmp, int>,
1400 std::pair<llvm::StringRef, mlir::FunctionType>>
1401 rGBI{{std::make_pair(Cmp::geOrLe, 32),
1402 std::make_pair("llvm.ppc.vsx.xvcmpgesp",
1403 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1404 Ty::RealVector<4>>(context, builder))},
1405 {std::make_pair(Cmp::geOrLe, 64),
1406 std::make_pair("llvm.ppc.vsx.xvcmpgedp",
1407 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1408 Ty::RealVector<8>>(context, builder))},
1409 {std::make_pair(Cmp::gtOrLt, 32),
1410 std::make_pair("llvm.ppc.vsx.xvcmpgtsp",
1411 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1412 Ty::RealVector<4>>(context, builder))},
1413 {std::make_pair(Cmp::gtOrLt, 64),
1414 std::make_pair("llvm.ppc.vsx.xvcmpgtdp",
1415 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1416 Ty::RealVector<8>>(context, builder))}};
1417
1418 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1419 std::pair<llvm::StringRef, mlir::FunctionType> specFunc;
1420 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy))
1421 specFunc =
1422 (elementTy.isUnsignedInteger())
1423 ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)]
1424 : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)];
1425 else if (vTypeInfo.isFloat())
1426 specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple)
1427 ? rGBI[std::make_pair(Cmp::geOrLe, width)]
1428 : rGBI[std::make_pair(Cmp::gtOrLt, width)];
1429
1430 assert(!std::get<0>(specFunc).empty() && "unknown builtin name");
1431 assert(std::get<1>(specFunc) && "unknown function type");
1432 return specFunc;
1433}
1434
1435// VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT
1436template <VecOp vop>
1437fir::ExtendedValue
1438PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType,
1439 llvm::ArrayRef<fir::ExtendedValue> args) {
1440 assert(args.size() == 2);
1441 auto context{builder.getContext()};
1442 auto argBases{getBasesForArgs(args)};
1443 VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])};
1444 auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1445
1446 std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{
1447 getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)};
1448
1449 mlir::func::FuncOp funcOp = builder.createFunction(
1450 loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam));
1451
1452 mlir::Value res{nullptr};
1453
1454 if (auto eTy = mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)) {
1455 constexpr int firstArg{0};
1456 constexpr int secondArg{1};
1457 std::map<VecOp, std::array<int, 2>> argOrder{
1458 {VecOp::Cmpge, {secondArg, firstArg}},
1459 {VecOp::Cmple, {firstArg, secondArg}},
1460 {VecOp::Cmpgt, {firstArg, secondArg}},
1461 {VecOp::Cmplt, {secondArg, firstArg}}};
1462
1463 // Construct the function return type, unsigned vector, for conversion.
1464 auto itype = mlir::IntegerType::get(context, eTy.getWidth(),
1465 mlir::IntegerType::Unsigned);
1466 auto returnType = fir::VectorType::get(vecTyInfo.len, itype);
1467
1468 switch (vop) {
1469 case VecOp::Cmpgt:
1470 case VecOp::Cmplt: {
1471 // arg1 > arg2 --> vcmpgt(arg1, arg2)
1472 // arg1 < arg2 --> vcmpgt(arg2, arg1)
1473 mlir::Value vargs[]{argBases[argOrder[vop][0]],
1474 argBases[argOrder[vop][1]]};
1475 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1476 res = callOp.getResult(0);
1477 break;
1478 }
1479 case VecOp::Cmpge:
1480 case VecOp::Cmple: {
1481 // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1)
1482 // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1)
1483 mlir::Value vargs[]{argBases[argOrder[vop][0]],
1484 argBases[argOrder[vop][1]]};
1485
1486 // Construct a constant vector(-1)
1487 auto negOneVal{builder.createIntegerConstant(
1488 loc, getConvertedElementType(context, eTy), -1)};
1489 auto vNegOne{builder.create<mlir::vector::BroadcastOp>(
1490 loc, vecTyInfo.toMlirVectorType(context), negOneVal)};
1491
1492 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1493 mlir::Value callRes{callOp.getResult(0)};
1494 auto vargs2{
1495 convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})};
1496 auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)};
1497
1498 res = builder.createConvert(loc, returnType, xorRes);
1499 break;
1500 }
1501 default:
1502 llvm_unreachable("Invalid vector operation for generator");
1503 }
1504 } else if (vecTyInfo.isFloat()) {
1505 mlir::Value vargs[2];
1506 switch (vop) {
1507 case VecOp::Cmpge:
1508 case VecOp::Cmpgt:
1509 vargs[0] = argBases[0];
1510 vargs[1] = argBases[1];
1511 break;
1512 case VecOp::Cmple:
1513 case VecOp::Cmplt:
1514 // Swap the arguments as xvcmpg[et] is used
1515 vargs[0] = argBases[1];
1516 vargs[1] = argBases[0];
1517 break;
1518 default:
1519 llvm_unreachable("Invalid vector operation for generator");
1520 }
1521 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1522 res = callOp.getResult(0);
1523 } else
1524 llvm_unreachable("invalid vector type");
1525
1526 return res;
1527}
1528
1529static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder,
1530 mlir::Location loc,
1531 mlir::Value arg) {
1532 auto ty = arg.getType();
1533 auto context{builder.getContext()};
1534 auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1535
1536 if (ty != vtype)
1537 arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult();
1538
1539 llvm::SmallVector<int64_t, 16> mask{4, 5, 6, 7, 0, 1, 2, 3,
1540 12, 13, 14, 15, 8, 9, 10, 11};
1541 arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask);
1542 if (ty != vtype)
1543 arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg);
1544 return arg;
1545}
1546
1547// VEC_CONVERT, VEC_CTF, VEC_CVF
1548template <VecOp vop>
1549fir::ExtendedValue
1550PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
1551 llvm::ArrayRef<fir::ExtendedValue> args) {
1552 auto context{builder.getContext()};
1553 auto argBases{getBasesForArgs(args)};
1554 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1555 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1556 auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])};
1557 const auto i32Ty{mlir::IntegerType::get(context, 32)};
1558
1559 switch (vop) {
1560 case VecOp::Ctf: {
1561 assert(args.size() == 2);
1562 auto convArg{builder.createConvert(loc, i32Ty, argBases[1])};
1563 auto eTy{mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)};
1564 assert(eTy && "Unsupported vector type");
1565 const auto isUnsigned{eTy.isUnsignedInteger()};
1566 const auto width{eTy.getWidth()};
1567
1568 if (width == 32) {
1569 auto ftype{(isUnsigned)
1570 ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>,
1571 Ty::Integer<4>>(context, builder)
1572 : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>,
1573 Ty::Integer<4>>(context, builder)};
1574 const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux"
1575 : "llvm.ppc.altivec.vcfsx"};
1576 auto funcOp{builder.createFunction(loc, fname, ftype)};
1577 mlir::Value newArgs[] = {argBases[0], convArg};
1578 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1579
1580 return callOp.getResult(0);
1581 } else if (width == 64) {
1582 auto fTy{mlir::Float64Type::get(context)};
1583 auto ty{mlir::VectorType::get(2, fTy)};
1584
1585 // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1))
1586 auto convOp{(isUnsigned)
1587 ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1)
1588 : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)};
1589
1590 // construct vector<1./(1<<arg1), 1.0/(1<<arg1)>
1591 auto constInt{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
1592 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp())
1593 .getValue())};
1594 assert(constInt && "expected integer constant argument");
1595 double f{1.0 / (1 << constInt.getInt())};
1596 llvm::SmallVector<double> vals{f, f};
1597 auto constOp{builder.create<mlir::arith::ConstantOp>(
1598 loc, ty, builder.getF64VectorAttr(vals))};
1599
1600 auto mulOp{builder.create<mlir::LLVM::FMulOp>(
1601 loc, ty, convOp->getResult(0), constOp)};
1602
1603 return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp);
1604 }
1605 llvm_unreachable("invalid element integer kind");
1606 }
1607 case VecOp::Convert: {
1608 assert(args.size() == 2);
1609 // resultType has mold type (if scalar) or element type (if array)
1610 auto resTyInfo{getVecTypeFromFirType(resultType)};
1611 auto moldTy{resTyInfo.toMlirVectorType(context)};
1612 auto firTy{resTyInfo.toFirVectorType()};
1613
1614 // vec_convert(v, mold) = bitcast v to "type of mold"
1615 auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)};
1616
1617 return builder.createConvert(loc, firTy, conv);
1618 }
1619 case VecOp::Cvf: {
1620 assert(args.size() == 1);
1621
1622 mlir::Value newArgs[]{vArg1};
1623 if (vecTyInfo.isFloat32()) {
1624 if (changeVecElemOrder())
1625 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1626
1627 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
1628 auto ftype{
1629 genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)};
1630 auto funcOp{builder.createFunction(loc, fname, ftype)};
1631 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1632
1633 return callOp.getResult(0);
1634 } else if (vecTyInfo.isFloat64()) {
1635 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"};
1636 auto ftype{
1637 genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)};
1638 auto funcOp{builder.createFunction(loc, fname, ftype)};
1639 newArgs[0] =
1640 builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0);
1641 auto fvf32Ty{newArgs[0].getType()};
1642 auto f32type{mlir::Float32Type::get(context)};
1643 auto mvf32Ty{mlir::VectorType::get(4, f32type)};
1644 newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
1645
1646 if (changeVecElemOrder())
1647 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1648
1649 return builder.createConvert(loc, fvf32Ty, newArgs[0]);
1650 }
1651 llvm_unreachable("invalid element integer kind");
1652 }
1653 default:
1654 llvm_unreachable("Invalid vector operation for generator");
1655 }
1656}
1657
1658static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder,
1659 mlir::Location loc,
1660 VecTypeInfo vecInfo,
1661 mlir::Value idx) {
1662 mlir::Value numSub1{
1663 builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)};
1664 return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx);
1665}
1666
1667// VEC_EXTRACT
1668fir::ExtendedValue
1669PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType,
1670 llvm::ArrayRef<fir::ExtendedValue> args) {
1671 assert(args.size() == 2);
1672 auto argBases{getBasesForArgs(args)};
1673 auto argTypes{getTypesForArgs(argBases)};
1674 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1675
1676 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1677 auto varg0{builder.createConvert(loc, mlirTy, argBases[0])};
1678
1679 // arg2 modulo the number of elements in arg1 to determine the element
1680 // position
1681 auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)};
1682 mlir::Value uremOp{
1683 builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)};
1684
1685 if (!isNativeVecElemOrderOnLE())
1686 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1687
1688 return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp);
1689}
1690
1691// VEC_INSERT
1692fir::ExtendedValue
1693PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType,
1694 llvm::ArrayRef<fir::ExtendedValue> args) {
1695 assert(args.size() == 3);
1696 auto argBases{getBasesForArgs(args)};
1697 auto argTypes{getTypesForArgs(argBases)};
1698 auto vecTyInfo{getVecTypeFromFir(argBases[1])};
1699 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1700 auto varg1{builder.createConvert(loc, mlirTy, argBases[1])};
1701
1702 auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)};
1703 mlir::Value uremOp{
1704 builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)};
1705
1706 if (!isNativeVecElemOrderOnLE())
1707 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1708
1709 auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0],
1710 varg1, uremOp)};
1711 return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res);
1712}
1713
1714// VEC_MERGEH, VEC_MERGEL
1715template <VecOp vop>
1716fir::ExtendedValue
1717PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
1718 llvm::ArrayRef<fir::ExtendedValue> args) {
1719 assert(args.size() == 2);
1720 auto argBases{getBasesForArgs(args)};
1721 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1722 llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask
1723 llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask
1724
1725 switch (vop) {
1726 case VecOp::Mergeh: {
1727 switch (vecTyInfo.len) {
1728 case 2: {
1729 enum { V1 = 0, V2 = 2 };
1730 mMask = {V1 + 0, V2 + 0};
1731 rMask = {V2 + 1, V1 + 1};
1732 break;
1733 }
1734 case 4: {
1735 enum { V1 = 0, V2 = 4 };
1736 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1};
1737 rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1738 break;
1739 }
1740 case 8: {
1741 enum { V1 = 0, V2 = 8 };
1742 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1743 rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7};
1744 break;
1745 }
1746 case 16:
1747 mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13,
1748 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17};
1749 rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B,
1750 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F};
1751 break;
1752 default:
1753 llvm_unreachable("unexpected vector length");
1754 }
1755 break;
1756 }
1757 case VecOp::Mergel: {
1758 switch (vecTyInfo.len) {
1759 case 2: {
1760 enum { V1 = 0, V2 = 2 };
1761 mMask = {V1 + 1, V2 + 1};
1762 rMask = {V2 + 0, V1 + 0};
1763 break;
1764 }
1765 case 4: {
1766 enum { V1 = 0, V2 = 4 };
1767 mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1768 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1};
1769 break;
1770 }
1771 case 8: {
1772 enum { V1 = 0, V2 = 8 };
1773 mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7};
1774 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1775 break;
1776 }
1777 case 16:
1778 mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B,
1779 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F};
1780 rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03,
1781 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07};
1782 break;
1783 default:
1784 llvm_unreachable("unexpected vector length");
1785 }
1786 break;
1787 }
1788 default:
1789 llvm_unreachable("invalid vector operation for generator");
1790 }
1791
1792 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1793
1794 llvm::SmallVector<int64_t, 16> &mergeMask =
1795 (isBEVecElemOrderOnLE()) ? rMask : mMask;
1796
1797 auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1],
1798 mergeMask)};
1799 return builder.createConvert(loc, resultType, callOp);
1800}
1801
1802static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
1803 mlir::Location loc, mlir::Value baseAddr,
1804 mlir::Value offset) {
1805 auto typeExtent{fir::SequenceType::getUnknownExtent()};
1806 // Construct an !fir.ref<!ref.array<?xi8>> type
1807 auto arrRefTy{builder.getRefType(fir::SequenceType::get(
1808 {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
1809 // Convert arg to !fir.ref<!ref.array<?xi8>>
1810 auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
1811
1812 return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
1813}
1814
1815static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
1816 mlir::Location loc, mlir::Value v,
1817 int64_t len) {
1818 assert(mlir::isa<mlir::VectorType>(v.getType()));
1819 assert(len > 0);
1820 llvm::SmallVector<int64_t, 16> mask;
1821 for (int64_t i = 0; i < len; ++i) {
1822 mask.push_back(Elt: len - 1 - i);
1823 }
1824 auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
1825 return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
1826}
1827
1828static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
1829 const int val) {
1830 auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
1831 auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
1832 return builder.getNamedAttr("alignment", alignAttr);
1833}
1834
1835fir::ExtendedValue
1836PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
1837 llvm::ArrayRef<fir::ExtendedValue> args) {
1838 VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
1839 switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
1840 case 8:
1841 // vec_xlb1
1842 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1843 case 16:
1844 // vec_xlh8
1845 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1846 case 32:
1847 // vec_xlw4
1848 return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
1849 case 64:
1850 // vec_xld2
1851 return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
1852 default:
1853 llvm_unreachable("invalid kind");
1854 }
1855 llvm_unreachable("invalid vector operation for generator");
1856}
1857
1858template <VecOp vop>
1859fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
1860 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1861 assert(args.size() == 2);
1862 auto arg0{getBase(args[0])};
1863 auto arg1{getBase(args[1])};
1864
1865 auto vecTyInfo{getVecTypeFromFirType(resultType)};
1866 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1867 auto firTy{vecTyInfo.toFirVectorType()};
1868
1869 // Add the %val of arg0 to %addr of arg1
1870 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1871
1872 const auto triple{fir::getTargetTriple(builder.getModule())};
1873 // Need to get align 1.
1874 auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
1875 getAlignmentAttr(builder, 1))};
1876 if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
1877 (vop == VecOp::Xlbe && triple.isLittleEndian()))
1878 return builder.createConvert(
1879 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
1880
1881 return builder.createConvert(loc, firTy, result);
1882}
1883
1884// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
1885template <VecOp vop>
1886fir::ExtendedValue
1887PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
1888 llvm::ArrayRef<fir::ExtendedValue> args) {
1889 assert(args.size() == 2);
1890 auto context{builder.getContext()};
1891 auto arg0{getBase(args[0])};
1892 auto arg1{getBase(args[1])};
1893
1894 // Prepare the return type in FIR.
1895 auto vecResTyInfo{getVecTypeFromFirType(resultType)};
1896 auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
1897 auto firTy{vecResTyInfo.toFirVectorType()};
1898
1899 // llvm.ppc.altivec.lvx* returns <4xi32>
1900 // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
1901 const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
1902 const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
1903
1904 // For vec_ld, need to convert arg0 from i64 to i32
1905 if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
1906 arg0 = builder.createConvert(loc, i32Ty, arg0);
1907
1908 // Add the %val of arg0 to %addr of arg1
1909 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1910 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
1911
1912 mlir::Type intrinResTy{nullptr};
1913 llvm::StringRef fname{};
1914 switch (vop) {
1915 case VecOp::Ld:
1916 fname = "llvm.ppc.altivec.lvx";
1917 intrinResTy = mVecI32Ty;
1918 break;
1919 case VecOp::Lde:
1920 switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
1921 case 8:
1922 fname = "llvm.ppc.altivec.lvebx";
1923 intrinResTy = mlirTy;
1924 break;
1925 case 16:
1926 fname = "llvm.ppc.altivec.lvehx";
1927 intrinResTy = mlirTy;
1928 break;
1929 case 32:
1930 fname = "llvm.ppc.altivec.lvewx";
1931 if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
1932 intrinResTy = mlirTy;
1933 else
1934 intrinResTy = mVecI32Ty;
1935 break;
1936 default:
1937 llvm_unreachable("invalid vector for vec_lde");
1938 }
1939 break;
1940 case VecOp::Ldl:
1941 fname = "llvm.ppc.altivec.lvxl";
1942 intrinResTy = mVecI32Ty;
1943 break;
1944 case VecOp::Lxvp:
1945 fname = "llvm.ppc.vsx.lxvp";
1946 intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
1947 break;
1948 case VecOp::Xld2: {
1949 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
1950 : "llvm.ppc.vsx.lxvd2x";
1951 // llvm.ppc.altivec.lxvd2x* returns <2 x double>
1952 intrinResTy = mlir::VectorType::get(2, mlir::Float64Type::get(context));
1953 } break;
1954 case VecOp::Xlw4:
1955 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
1956 : "llvm.ppc.vsx.lxvw4x";
1957 // llvm.ppc.altivec.lxvw4x* returns <4xi32>
1958 intrinResTy = mVecI32Ty;
1959 break;
1960 default:
1961 llvm_unreachable("invalid vector operation for generator");
1962 }
1963
1964 auto funcType{
1965 mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
1966 auto funcOp{builder.createFunction(loc, fname, funcType)};
1967 auto result{
1968 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
1969
1970 if (vop == VecOp::Lxvp)
1971 return result;
1972
1973 if (intrinResTy != mlirTy)
1974 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
1975
1976 if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
1977 return builder.createConvert(
1978 loc, firTy,
1979 reverseVectorElements(builder, loc, result, vecResTyInfo.len));
1980
1981 return builder.createConvert(loc, firTy, result);
1982}
1983
1984// VEC_LVSL, VEC_LVSR
1985template <VecOp vop>
1986fir::ExtendedValue
1987PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
1988 llvm::ArrayRef<fir::ExtendedValue> args) {
1989 assert(args.size() == 2);
1990 auto context{builder.getContext()};
1991 auto arg0{getBase(args[0])};
1992 auto arg1{getBase(args[1])};
1993
1994 auto vecTyInfo{getVecTypeFromFirType(resultType)};
1995 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1996 auto firTy{vecTyInfo.toFirVectorType()};
1997
1998 // Convert arg0 to i64 type if needed
1999 auto i64ty{mlir::IntegerType::get(context, 64)};
2000 if (arg0.getType() != i64ty)
2001 arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
2002
2003 // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
2004 // upper 56 bit while preserving sign
2005 auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
2006 auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
2007 auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
2008
2009 // Add the offsetArg to %addr of arg1
2010 auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
2011 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
2012
2013 llvm::StringRef fname{};
2014 switch (vop) {
2015 case VecOp::Lvsl:
2016 fname = "llvm.ppc.altivec.lvsl";
2017 break;
2018 case VecOp::Lvsr:
2019 fname = "llvm.ppc.altivec.lvsr";
2020 break;
2021 default:
2022 llvm_unreachable("invalid vector operation for generator");
2023 }
2024 auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
2025 auto funcOp{builder.createFunction(loc, fname, funcType)};
2026 auto result{
2027 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
2028
2029 if (isNativeVecElemOrderOnLE())
2030 return builder.createConvert(
2031 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
2032
2033 return builder.createConvert(loc, firTy, result);
2034}
2035
2036// VEC_NMADD, VEC_MSUB
2037template <VecOp vop>
2038fir::ExtendedValue
2039PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
2040 llvm::ArrayRef<fir::ExtendedValue> args) {
2041 assert(args.size() == 3);
2042 auto context{builder.getContext()};
2043 auto argBases{getBasesForArgs(args)};
2044 auto vTypeInfo{getVecTypeFromFir(argBases[0])};
2045 auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
2046 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
2047
2048 static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
2049 {32,
2050 std::make_pair(
2051 "llvm.fma.v4f32",
2052 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
2053 context, builder))},
2054 {64,
2055 std::make_pair(
2056 "llvm.fma.v2f64",
2057 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
2058 context, builder))}};
2059
2060 auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]),
2061 std::get<1>(fmaMap[width]))};
2062 if (vop == VecOp::Nmadd) {
2063 // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
2064 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2065
2066 // We need to convert fir.vector to MLIR vector to use fneg and then back
2067 // to fir.vector to store.
2068 auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
2069 callOp.getResult(0))};
2070 auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
2071 return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
2072 } else if (vop == VecOp::Msub) {
2073 // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
2074 newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);
2075
2076 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2077 return callOp.getResult(0);
2078 }
2079 llvm_unreachable("Invalid vector operation for generator");
2080}
2081
2082// VEC_PERM, VEC_PERMI
2083template <VecOp vop>
2084fir::ExtendedValue
2085PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType,
2086 llvm::ArrayRef<fir::ExtendedValue> args) {
2087 assert(args.size() == 3);
2088 auto context{builder.getContext()};
2089 auto argBases{getBasesForArgs(args)};
2090 auto argTypes{getTypesForArgs(argBases)};
2091 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2092 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
2093
2094 auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))};
2095 auto vf64Ty{mlir::VectorType::get(2, mlir::Float64Type::get(context))};
2096
2097 auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])};
2098 auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])};
2099
2100 switch (vop) {
2101 case VecOp::Perm: {
2102 VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])};
2103 auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)};
2104 auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])};
2105
2106 if (mlirTy != vi32Ty) {
2107 mArg0 =
2108 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult();
2109 mArg1 =
2110 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult();
2111 }
2112
2113 auto funcOp{builder.createFunction(
2114 loc, "llvm.ppc.altivec.vperm",
2115 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2116 Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context,
2117 builder))};
2118
2119 llvm::SmallVector<mlir::Value> newArgs;
2120 if (isNativeVecElemOrderOnLE()) {
2121 auto i8Ty{mlir::IntegerType::get(context, 8)};
2122 auto v8Ty{mlir::VectorType::get(16, i8Ty)};
2123 auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2124 auto vNegOne{
2125 builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)};
2126
2127 mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne);
2128 newArgs = {mArg1, mArg0, mMask};
2129 } else {
2130 newArgs = {mArg0, mArg1, mMask};
2131 }
2132
2133 auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)};
2134
2135 if (res.getType() != argTypes[0]) {
2136 // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32>
2137 // convert the result back to the original type
2138 res = builder.createConvert(loc, vi32Ty, res);
2139 if (mlirTy != vi32Ty)
2140 res =
2141 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult();
2142 }
2143 return builder.createConvert(loc, resultType, res);
2144 }
2145 case VecOp::Permi: {
2146 // arg3 is a constant
2147 auto constIntOp{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
2148 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2149 .getValue())};
2150 assert(constIntOp && "expected integer constant argument");
2151 auto constInt{constIntOp.getInt()};
2152 // arg1, arg2, and result type share same VecTypeInfo
2153 if (vecTyInfo.isFloat()) {
2154 mArg0 =
2155 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult();
2156 mArg1 =
2157 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult();
2158 }
2159
2160 llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask
2161 llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask
2162 enum { V1 = 0, V2 = 2 };
2163 switch (constInt) {
2164 case 0:
2165 nMask = {V1 + 0, V2 + 0};
2166 rMask = {V2 + 1, V1 + 1};
2167 break;
2168 case 1:
2169 nMask = {V1 + 0, V2 + 1};
2170 rMask = {V2 + 0, V1 + 1};
2171 break;
2172 case 2:
2173 nMask = {V1 + 1, V2 + 0};
2174 rMask = {V2 + 1, V1 + 0};
2175 break;
2176 case 3:
2177 nMask = {V1 + 1, V2 + 1};
2178 rMask = {V2 + 0, V1 + 0};
2179 break;
2180 default:
2181 llvm_unreachable("unexpected arg3 value for vec_permi");
2182 }
2183
2184 llvm::SmallVector<int64_t, 2> mask =
2185 (isBEVecElemOrderOnLE()) ? rMask : nMask;
2186 auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)};
2187 if (res.getType() != mlirTy) {
2188 auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)};
2189 return builder.createConvert(loc, resultType, cast);
2190 }
2191 return builder.createConvert(loc, resultType, res);
2192 }
2193 default:
2194 llvm_unreachable("invalid vector operation for generator");
2195 }
2196}
2197
2198// VEC_SEL
2199fir::ExtendedValue
2200PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
2201 llvm::ArrayRef<fir::ExtendedValue> args) {
2202 assert(args.size() == 3);
2203 auto argBases{getBasesForArgs(args)};
2204 llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
2205 for (size_t i = 0; i < argBases.size(); i++) {
2206 vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
2207 }
2208 auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};
2209
2210 auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
2211 auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2212
2213 // construct a constant <16 x i8> vector with value -1 for bitcast
2214 auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
2215 auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};
2216
2217 // bitcast arguments to bcVecTy
2218 auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
2219 auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
2220 auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};
2221
2222 // vec_sel(arg1, arg2, arg3) =
2223 // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
2224 auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
2225 auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
2226 auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
2227 auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};
2228
2229 auto bcRes{
2230 builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};
2231
2232 return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
2233}
2234
2235// VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
2236template <VecOp vop>
2237fir::ExtendedValue
2238PPCIntrinsicLibrary::genVecShift(mlir::Type resultType,
2239 llvm::ArrayRef<fir::ExtendedValue> args) {
2240 auto context{builder.getContext()};
2241 auto argBases{getBasesForArgs(args)};
2242 auto argTypes{getTypesForArgs(argBases)};
2243
2244 llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs;
2245 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0]));
2246 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1]));
2247
2248 // Convert the first two arguments to MLIR vectors
2249 llvm::SmallVector<mlir::Type, 2> mlirTyArgs;
2250 mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context));
2251 mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context));
2252
2253 llvm::SmallVector<mlir::Value, 2> mlirVecArgs;
2254 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0]));
2255 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1]));
2256
2257 mlir::Value shftRes{nullptr};
2258
2259 if (vop == VecOp::Sl || vop == VecOp::Sr) {
2260 assert(args.size() == 2);
2261 // Construct the mask
2262 auto width{
2263 mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()};
2264 auto vecVal{builder.createIntegerConstant(
2265 loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)};
2266 auto mask{
2267 builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)};
2268 auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)};
2269
2270 mlir::Value res{nullptr};
2271 if (vop == VecOp::Sr)
2272 res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft);
2273 else if (vop == VecOp::Sl)
2274 res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft);
2275
2276 shftRes = builder.createConvert(loc, argTypes[0], res);
2277 } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl ||
2278 vop == VecOp::Sro) {
2279 assert(args.size() == 2);
2280
2281 // Bitcast to vector<4xi32>
2282 auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))};
2283 if (mlirTyArgs[0] != bcVecTy)
2284 mlirVecArgs[0] =
2285 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]);
2286 if (mlirTyArgs[1] != bcVecTy)
2287 mlirVecArgs[1] =
2288 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]);
2289
2290 llvm::StringRef funcName;
2291 switch (vop) {
2292 case VecOp::Srl:
2293 funcName = "llvm.ppc.altivec.vsr";
2294 break;
2295 case VecOp::Sro:
2296 funcName = "llvm.ppc.altivec.vsro";
2297 break;
2298 case VecOp::Sll:
2299 funcName = "llvm.ppc.altivec.vsl";
2300 break;
2301 case VecOp::Slo:
2302 funcName = "llvm.ppc.altivec.vslo";
2303 break;
2304 default:
2305 llvm_unreachable("unknown vector shift operation");
2306 }
2307 auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2308 Ty::IntegerVector<4>>(context, builder)};
2309 mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)};
2310 auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)};
2311
2312 // If the result vector type is different from the original type, need
2313 // to convert to mlir vector, bitcast and then convert back to fir vector.
2314 if (callOp.getResult(0).getType() != argTypes[0]) {
2315 auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0));
2316 res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res);
2317 shftRes = builder.createConvert(loc, argTypes[0], res);
2318 } else {
2319 shftRes = callOp.getResult(0);
2320 }
2321 } else if (vop == VecOp::Sld || vop == VecOp::Sldw) {
2322 assert(args.size() == 3);
2323 auto constIntOp = mlir::dyn_cast_or_null<mlir::IntegerAttr>(
2324 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2325 .getValue());
2326 assert(constIntOp && "expected integer constant argument");
2327
2328 // Bitcast to vector<16xi8>
2329 auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))};
2330 if (mlirTyArgs[0] != vi8Ty) {
2331 mlirVecArgs[0] =
2332 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0])
2333 .getResult();
2334 mlirVecArgs[1] =
2335 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1])
2336 .getResult();
2337 }
2338
2339 // Construct the mask for shuffling
2340 auto shiftVal{constIntOp.getInt()};
2341 if (vop == VecOp::Sldw)
2342 shiftVal = shiftVal << 2;
2343 shiftVal &= 0xF;
2344 llvm::SmallVector<int64_t, 16> mask;
2345 // Shuffle with mask based on the endianness
2346 const auto triple{fir::getTargetTriple(builder.getModule())};
2347 if (triple.isLittleEndian()) {
2348 for (int i = 16; i < 32; ++i)
2349 mask.push_back(i - shiftVal);
2350 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1],
2351 mlirVecArgs[0], mask);
2352 } else {
2353 for (int i = 0; i < 16; ++i)
2354 mask.push_back(i + shiftVal);
2355 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0],
2356 mlirVecArgs[1], mask);
2357 }
2358
2359 // Bitcast to the original type
2360 if (shftRes.getType() != mlirTyArgs[0])
2361 shftRes =
2362 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes);
2363
2364 return builder.createConvert(loc, resultType, shftRes);
2365 } else
2366 llvm_unreachable("Invalid vector operation for generator");
2367
2368 return shftRes;
2369}
2370
2371// VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32
2372template <VecOp vop>
2373fir::ExtendedValue
2374PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
2375 llvm::ArrayRef<fir::ExtendedValue> args) {
2376 auto context{builder.getContext()};
2377 auto argBases{getBasesForArgs(args)};
2378
2379 mlir::vector::SplatOp splatOp{nullptr};
2380 mlir::Type retTy{nullptr};
2381 switch (vop) {
2382 case VecOp::Splat: {
2383 assert(args.size() == 2);
2384 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2385
2386 auto extractOp{genVecExtract(resultType, args)};
2387 splatOp = builder.create<mlir::vector::SplatOp>(
2388 loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context));
2389 retTy = vecTyInfo.toFirVectorType();
2390 break;
2391 }
2392 case VecOp::Splats: {
2393 assert(args.size() == 1);
2394 auto vecTyInfo{getVecTypeFromEle(argBases[0])};
2395
2396 splatOp = builder.create<mlir::vector::SplatOp>(
2397 loc, argBases[0], vecTyInfo.toMlirVectorType(context));
2398 retTy = vecTyInfo.toFirVectorType();
2399 break;
2400 }
2401 case VecOp::Splat_s32: {
2402 assert(args.size() == 1);
2403 auto eleTy{builder.getIntegerType(32)};
2404 auto intOp{builder.createConvert(loc, eleTy, argBases[0])};
2405
2406 // the intrinsic always returns vector(integer(4))
2407 splatOp = builder.create<mlir::vector::SplatOp>(
2408 loc, intOp, mlir::VectorType::get(4, eleTy));
2409 retTy = fir::VectorType::get(4, eleTy);
2410 break;
2411 }
2412 default:
2413 llvm_unreachable("invalid vector operation for generator");
2414 }
2415 return builder.createConvert(loc, retTy, splatOp);
2416}
2417
2418fir::ExtendedValue
2419PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
2420 llvm::ArrayRef<fir::ExtendedValue> args) {
2421 assert(args.size() == 2);
2422 auto arg0{getBase(args[0])};
2423 auto arg1{getBase(args[1])};
2424
2425 // Prepare the return type in FIR.
2426 auto vecTyInfo{getVecTypeFromFirType(resultType)};
2427 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
2428 auto firTy{vecTyInfo.toFirVectorType()};
2429
2430 // Add the %val of arg0 to %addr of arg1
2431 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
2432
2433 auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
2434 auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
2435 auto i64RefTy{builder.getRefType(i64Ty)};
2436 auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
2437
2438 auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
2439 auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
2440
2441 mlir::Value result{nullptr};
2442 if (mlirTy != splatRes.getType()) {
2443 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
2444 } else
2445 result = splatRes;
2446
2447 return builder.createConvert(loc, firTy, result);
2448}
2449
2450const char *getMmaIrIntrName(MMAOp mmaOp) {
2451 switch (mmaOp) {
2452 case MMAOp::AssembleAcc:
2453 return "llvm.ppc.mma.assemble.acc";
2454 case MMAOp::AssemblePair:
2455 return "llvm.ppc.vsx.assemble.pair";
2456 case MMAOp::DisassembleAcc:
2457 return "llvm.ppc.mma.disassemble.acc";
2458 case MMAOp::DisassemblePair:
2459 return "llvm.ppc.vsx.disassemble.pair";
2460 case MMAOp::Xxmfacc:
2461 return "llvm.ppc.mma.xxmfacc";
2462 case MMAOp::Xxmtacc:
2463 return "llvm.ppc.mma.xxmtacc";
2464 case MMAOp::Xxsetaccz:
2465 return "llvm.ppc.mma.xxsetaccz";
2466 case MMAOp::Pmxvbf16ger2:
2467 return "llvm.ppc.mma.pmxvbf16ger2";
2468 case MMAOp::Pmxvbf16ger2nn:
2469 return "llvm.ppc.mma.pmxvbf16ger2nn";
2470 case MMAOp::Pmxvbf16ger2np:
2471 return "llvm.ppc.mma.pmxvbf16ger2np";
2472 case MMAOp::Pmxvbf16ger2pn:
2473 return "llvm.ppc.mma.pmxvbf16ger2pn";
2474 case MMAOp::Pmxvbf16ger2pp:
2475 return "llvm.ppc.mma.pmxvbf16ger2pp";
2476 case MMAOp::Pmxvf16ger2:
2477 return "llvm.ppc.mma.pmxvf16ger2";
2478 case MMAOp::Pmxvf16ger2nn:
2479 return "llvm.ppc.mma.pmxvf16ger2nn";
2480 case MMAOp::Pmxvf16ger2np:
2481 return "llvm.ppc.mma.pmxvf16ger2np";
2482 case MMAOp::Pmxvf16ger2pn:
2483 return "llvm.ppc.mma.pmxvf16ger2pn";
2484 case MMAOp::Pmxvf16ger2pp:
2485 return "llvm.ppc.mma.pmxvf16ger2pp";
2486 case MMAOp::Pmxvf32ger:
2487 return "llvm.ppc.mma.pmxvf32ger";
2488 case MMAOp::Pmxvf32gernn:
2489 return "llvm.ppc.mma.pmxvf32gernn";
2490 case MMAOp::Pmxvf32gernp:
2491 return "llvm.ppc.mma.pmxvf32gernp";
2492 case MMAOp::Pmxvf32gerpn:
2493 return "llvm.ppc.mma.pmxvf32gerpn";
2494 case MMAOp::Pmxvf32gerpp:
2495 return "llvm.ppc.mma.pmxvf32gerpp";
2496 case MMAOp::Pmxvf64ger:
2497 return "llvm.ppc.mma.pmxvf64ger";
2498 case MMAOp::Pmxvf64gernn:
2499 return "llvm.ppc.mma.pmxvf64gernn";
2500 case MMAOp::Pmxvf64gernp:
2501 return "llvm.ppc.mma.pmxvf64gernp";
2502 case MMAOp::Pmxvf64gerpn:
2503 return "llvm.ppc.mma.pmxvf64gerpn";
2504 case MMAOp::Pmxvf64gerpp:
2505 return "llvm.ppc.mma.pmxvf64gerpp";
2506 case MMAOp::Pmxvi16ger2:
2507 return "llvm.ppc.mma.pmxvi16ger2";
2508 case MMAOp::Pmxvi16ger2pp:
2509 return "llvm.ppc.mma.pmxvi16ger2pp";
2510 case MMAOp::Pmxvi16ger2s:
2511 return "llvm.ppc.mma.pmxvi16ger2s";
2512 case MMAOp::Pmxvi16ger2spp:
2513 return "llvm.ppc.mma.pmxvi16ger2spp";
2514 case MMAOp::Pmxvi4ger8:
2515 return "llvm.ppc.mma.pmxvi4ger8";
2516 case MMAOp::Pmxvi4ger8pp:
2517 return "llvm.ppc.mma.pmxvi4ger8pp";
2518 case MMAOp::Pmxvi8ger4:
2519 return "llvm.ppc.mma.pmxvi8ger4";
2520 case MMAOp::Pmxvi8ger4pp:
2521 return "llvm.ppc.mma.pmxvi8ger4pp";
2522 case MMAOp::Pmxvi8ger4spp:
2523 return "llvm.ppc.mma.pmxvi8ger4spp";
2524 case MMAOp::Xvbf16ger2:
2525 return "llvm.ppc.mma.xvbf16ger2";
2526 case MMAOp::Xvbf16ger2nn:
2527 return "llvm.ppc.mma.xvbf16ger2nn";
2528 case MMAOp::Xvbf16ger2np:
2529 return "llvm.ppc.mma.xvbf16ger2np";
2530 case MMAOp::Xvbf16ger2pn:
2531 return "llvm.ppc.mma.xvbf16ger2pn";
2532 case MMAOp::Xvbf16ger2pp:
2533 return "llvm.ppc.mma.xvbf16ger2pp";
2534 case MMAOp::Xvf16ger2:
2535 return "llvm.ppc.mma.xvf16ger2";
2536 case MMAOp::Xvf16ger2nn:
2537 return "llvm.ppc.mma.xvf16ger2nn";
2538 case MMAOp::Xvf16ger2np:
2539 return "llvm.ppc.mma.xvf16ger2np";
2540 case MMAOp::Xvf16ger2pn:
2541 return "llvm.ppc.mma.xvf16ger2pn";
2542 case MMAOp::Xvf16ger2pp:
2543 return "llvm.ppc.mma.xvf16ger2pp";
2544 case MMAOp::Xvf32ger:
2545 return "llvm.ppc.mma.xvf32ger";
2546 case MMAOp::Xvf32gernn:
2547 return "llvm.ppc.mma.xvf32gernn";
2548 case MMAOp::Xvf32gernp:
2549 return "llvm.ppc.mma.xvf32gernp";
2550 case MMAOp::Xvf32gerpn:
2551 return "llvm.ppc.mma.xvf32gerpn";
2552 case MMAOp::Xvf32gerpp:
2553 return "llvm.ppc.mma.xvf32gerpp";
2554 case MMAOp::Xvf64ger:
2555 return "llvm.ppc.mma.xvf64ger";
2556 case MMAOp::Xvf64gernn:
2557 return "llvm.ppc.mma.xvf64gernn";
2558 case MMAOp::Xvf64gernp:
2559 return "llvm.ppc.mma.xvf64gernp";
2560 case MMAOp::Xvf64gerpn:
2561 return "llvm.ppc.mma.xvf64gerpn";
2562 case MMAOp::Xvf64gerpp:
2563 return "llvm.ppc.mma.xvf64gerpp";
2564 case MMAOp::Xvi16ger2:
2565 return "llvm.ppc.mma.xvi16ger2";
2566 case MMAOp::Xvi16ger2pp:
2567 return "llvm.ppc.mma.xvi16ger2pp";
2568 case MMAOp::Xvi16ger2s:
2569 return "llvm.ppc.mma.xvi16ger2s";
2570 case MMAOp::Xvi16ger2spp:
2571 return "llvm.ppc.mma.xvi16ger2spp";
2572 case MMAOp::Xvi4ger8:
2573 return "llvm.ppc.mma.xvi4ger8";
2574 case MMAOp::Xvi4ger8pp:
2575 return "llvm.ppc.mma.xvi4ger8pp";
2576 case MMAOp::Xvi8ger4:
2577 return "llvm.ppc.mma.xvi8ger4";
2578 case MMAOp::Xvi8ger4pp:
2579 return "llvm.ppc.mma.xvi8ger4pp";
2580 case MMAOp::Xvi8ger4spp:
2581 return "llvm.ppc.mma.xvi8ger4spp";
2582 }
2583 llvm_unreachable("getMmaIrIntrName");
2584}
2585
2586mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) {
2587 switch (mmaOp) {
2588 case MMAOp::AssembleAcc:
2589 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4);
2590 case MMAOp::AssemblePair:
2591 return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2592 case MMAOp::DisassembleAcc:
2593 return genMmaDisassembleFuncType(context, mmaOp);
2594 case MMAOp::DisassemblePair:
2595 return genMmaDisassembleFuncType(context, mmaOp);
2596 case MMAOp::Xxmfacc:
2597 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2598 case MMAOp::Xxmtacc:
2599 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2600 case MMAOp::Xxsetaccz:
2601 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0);
2602 case MMAOp::Pmxvbf16ger2:
2603 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2604 /*Integer*/ 3);
2605 case MMAOp::Pmxvbf16ger2nn:
2606 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2607 /*Integer*/ 3);
2608 case MMAOp::Pmxvbf16ger2np:
2609 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2610 /*Integer*/ 3);
2611 case MMAOp::Pmxvbf16ger2pn:
2612 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2613 /*Integer*/ 3);
2614 case MMAOp::Pmxvbf16ger2pp:
2615 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2616 /*Integer*/ 3);
2617 case MMAOp::Pmxvf16ger2:
2618 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2619 /*Integer*/ 3);
2620 case MMAOp::Pmxvf16ger2nn:
2621 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2622 /*Integer*/ 3);
2623 case MMAOp::Pmxvf16ger2np:
2624 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2625 /*Integer*/ 3);
2626 case MMAOp::Pmxvf16ger2pn:
2627 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2628 /*Integer*/ 3);
2629 case MMAOp::Pmxvf16ger2pp:
2630 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2631 /*Integer*/ 3);
2632 case MMAOp::Pmxvf32ger:
2633 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2634 /*Integer*/ 2);
2635 case MMAOp::Pmxvf32gernn:
2636 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2637 /*Integer*/ 2);
2638 case MMAOp::Pmxvf32gernp:
2639 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2640 /*Integer*/ 2);
2641 case MMAOp::Pmxvf32gerpn:
2642 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2643 /*Integer*/ 2);
2644 case MMAOp::Pmxvf32gerpp:
2645 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2646 /*Integer*/ 2);
2647 case MMAOp::Pmxvf64ger:
2648 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1,
2649 /*Integer*/ 2);
2650 case MMAOp::Pmxvf64gernn:
2651 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2652 /*Integer*/ 2);
2653 case MMAOp::Pmxvf64gernp:
2654 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2655 /*Integer*/ 2);
2656 case MMAOp::Pmxvf64gerpn:
2657 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2658 /*Integer*/ 2);
2659 case MMAOp::Pmxvf64gerpp:
2660 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2661 /*Integer*/ 2);
2662 case MMAOp::Pmxvi16ger2:
2663 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2664 /*Integer*/ 3);
2665 case MMAOp::Pmxvi16ger2pp:
2666 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2667 /*Integer*/ 3);
2668 case MMAOp::Pmxvi16ger2s:
2669 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2670 /*Integer*/ 3);
2671 case MMAOp::Pmxvi16ger2spp:
2672 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2673 /*Integer*/ 3);
2674 case MMAOp::Pmxvi4ger8:
2675 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2676 /*Integer*/ 3);
2677 case MMAOp::Pmxvi4ger8pp:
2678 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2679 /*Integer*/ 3);
2680 case MMAOp::Pmxvi8ger4:
2681 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2682 /*Integer*/ 3);
2683 case MMAOp::Pmxvi8ger4pp:
2684 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2685 /*Integer*/ 3);
2686 case MMAOp::Pmxvi8ger4spp:
2687 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2688 /*Integer*/ 3);
2689 case MMAOp::Xvbf16ger2:
2690 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2691 case MMAOp::Xvbf16ger2nn:
2692 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2693 case MMAOp::Xvbf16ger2np:
2694 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2695 case MMAOp::Xvbf16ger2pn:
2696 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2697 case MMAOp::Xvbf16ger2pp:
2698 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2699 case MMAOp::Xvf16ger2:
2700 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2701 case MMAOp::Xvf16ger2nn:
2702 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2703 case MMAOp::Xvf16ger2np:
2704 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2705 case MMAOp::Xvf16ger2pn:
2706 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2707 case MMAOp::Xvf16ger2pp:
2708 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2709 case MMAOp::Xvf32ger:
2710 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2711 case MMAOp::Xvf32gernn:
2712 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2713 case MMAOp::Xvf32gernp:
2714 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2715 case MMAOp::Xvf32gerpn:
2716 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2717 case MMAOp::Xvf32gerpp:
2718 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2719 case MMAOp::Xvf64ger:
2720 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1);
2721 case MMAOp::Xvf64gernn:
2722 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2723 case MMAOp::Xvf64gernp:
2724 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2725 case MMAOp::Xvf64gerpn:
2726 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2727 case MMAOp::Xvf64gerpp:
2728 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2729 case MMAOp::Xvi16ger2:
2730 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2731 case MMAOp::Xvi16ger2pp:
2732 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2733 case MMAOp::Xvi16ger2s:
2734 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2735 case MMAOp::Xvi16ger2spp:
2736 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2737 case MMAOp::Xvi4ger8:
2738 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2739 case MMAOp::Xvi4ger8pp:
2740 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2741 case MMAOp::Xvi8ger4:
2742 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2743 case MMAOp::Xvi8ger4pp:
2744 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2745 case MMAOp::Xvi8ger4spp:
2746 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2747 }
2748 llvm_unreachable("getMmaIrFuncType");
2749}
2750
2751template <MMAOp IntrId, MMAHandlerOp HandlerOp>
2752void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
2753 auto context{builder.getContext()};
2754 mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)};
2755 mlir::func::FuncOp funcOp{
2756 builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)};
2757 llvm::SmallVector<mlir::Value> intrArgs;
2758
2759 // Depending on SubToFunc, change the subroutine call to a function call.
2760 // First argument represents the result. Rest of the arguments
2761 // are shifted one position to form the actual argument list.
2762 size_t argStart{0};
2763 size_t argStep{1};
2764 size_t e{args.size()};
2765 if (HandlerOp == MMAHandlerOp::SubToFunc) {
2766 // The first argument becomes function result. Start from the second
2767 // argument.
2768 argStart = 1;
2769 } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) {
2770 // Reverse argument order on little-endian target only.
2771 // The reversal does not depend on the setting of non-native-order option.
2772 const auto triple{fir::getTargetTriple(builder.getModule())};
2773 if (triple.isLittleEndian()) {
2774 // Load the arguments in reverse order.
2775 argStart = args.size() - 1;
2776 // The first argument becomes function result. Stop at the second
2777 // argument.
2778 e = 0;
2779 argStep = -1;
2780 } else {
2781 // Load the arguments in natural order.
2782 // The first argument becomes function result. Start from the second
2783 // argument.
2784 argStart = 1;
2785 }
2786 }
2787
2788 for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) {
2789 auto v{fir::getBase(args[i])};
2790 if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2791 // First argument is passed in as an address. We need to load
2792 // the content to match the LLVM interface.
2793 v = builder.create<fir::LoadOp>(loc, v);
2794 }
2795 auto vType{v.getType()};
2796 mlir::Type targetType{intrFuncType.getInput(j)};
2797 if (vType != targetType) {
2798 if (mlir::isa<mlir::VectorType>(targetType)) {
2799 // Perform vector type conversion for arguments passed by value.
2800 auto eleTy{mlir::dyn_cast<fir::VectorType>(vType).getElementType()};
2801 auto len{mlir::dyn_cast<fir::VectorType>(vType).getLen()};
2802 mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy);
2803 auto v0{builder.createConvert(loc, mlirType, v)};
2804 auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)};
2805 intrArgs.push_back(v1);
2806 } else if (mlir::isa<mlir::IntegerType>(targetType) &&
2807 mlir::isa<mlir::IntegerType>(vType)) {
2808 auto v0{builder.createConvert(loc, targetType, v)};
2809 intrArgs.push_back(v0);
2810 } else {
2811 llvm::errs() << "\nUnexpected type conversion requested: "
2812 << " from " << vType << " to " << targetType << "\n";
2813 llvm_unreachable("Unsupported type conversion for argument to PowerPC "
2814 "MMA intrinsic");
2815 }
2816 } else {
2817 intrArgs.push_back(v);
2818 }
2819 }
2820 auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)};
2821 if (HandlerOp == MMAHandlerOp::SubToFunc ||
2822 HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE ||
2823 HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2824 // Convert pointer type if needed.
2825 mlir::Value callResult{callSt.getResult(0)};
2826 mlir::Value destPtr{fir::getBase(args[0])};
2827 mlir::Type callResultPtrType{builder.getRefType(callResult.getType())};
2828 if (destPtr.getType() != callResultPtrType) {
2829 destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr);
2830 }
2831 // Copy the result.
2832 builder.create<fir::StoreOp>(loc, callResult, destPtr);
2833 }
2834}
2835
2836// VEC_ST, VEC_STE
2837template <VecOp vop>
2838void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
2839 assert(args.size() == 3);
2840
2841 auto context{builder.getContext()};
2842 auto argBases{getBasesForArgs(args)};
2843 auto arg1TyInfo{getVecTypeFromFir(argBases[0])};
2844
2845 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2846
2847 llvm::StringRef fname{};
2848 mlir::VectorType stTy{nullptr};
2849 auto i32ty{mlir::IntegerType::get(context, 32)};
2850 switch (vop) {
2851 case VecOp::St:
2852 stTy = mlir::VectorType::get(4, i32ty);
2853 fname = "llvm.ppc.altivec.stvx";
2854 break;
2855 case VecOp::Ste: {
2856 const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
2857 const auto len{arg1TyInfo.len};
2858
2859 if (arg1TyInfo.isFloat32()) {
2860 stTy = mlir::VectorType::get(len, i32ty);
2861 fname = "llvm.ppc.altivec.stvewx";
2862 } else if (mlir::isa<mlir::IntegerType>(arg1TyInfo.eleTy)) {
2863 stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));
2864
2865 switch (width) {
2866 case 8:
2867 fname = "llvm.ppc.altivec.stvebx";
2868 break;
2869 case 16:
2870 fname = "llvm.ppc.altivec.stvehx";
2871 break;
2872 case 32:
2873 fname = "llvm.ppc.altivec.stvewx";
2874 break;
2875 default:
2876 assert(false && "invalid element size");
2877 }
2878 } else
2879 assert(false && "unknown type");
2880 break;
2881 }
2882 case VecOp::Stxvp:
2883 // __vector_pair type
2884 stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1));
2885 fname = "llvm.ppc.vsx.stxvp";
2886 break;
2887 default:
2888 llvm_unreachable("invalid vector operation for generator");
2889 }
2890
2891 auto funcType{
2892 mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
2893 mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType);
2894
2895 llvm::SmallVector<mlir::Value, 4> biArgs;
2896
2897 if (vop == VecOp::Stxvp) {
2898 biArgs.push_back(argBases[0]);
2899 biArgs.push_back(addr);
2900 builder.create<fir::CallOp>(loc, funcOp, biArgs);
2901 return;
2902 }
2903
2904 auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
2905 auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
2906 argBases[0])};
2907
2908 mlir::Value newArg1{nullptr};
2909 if (stTy != arg1TyInfo.toMlirVectorType(context))
2910 newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
2911 else
2912 newArg1 = cnv;
2913
2914 if (isBEVecElemOrderOnLE())
2915 newArg1 = builder.createConvert(
2916 loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));
2917
2918 biArgs.push_back(newArg1);
2919 biArgs.push_back(addr);
2920
2921 builder.create<fir::CallOp>(loc, funcOp, biArgs);
2922}
2923
2924// VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
2925template <VecOp vop>
2926void PPCIntrinsicLibrary::genVecXStore(
2927 llvm::ArrayRef<fir::ExtendedValue> args) {
2928 assert(args.size() == 3);
2929 auto context{builder.getContext()};
2930 auto argBases{getBasesForArgs(args)};
2931 VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};
2932
2933 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2934
2935 mlir::Value trg{nullptr};
2936 mlir::Value src{nullptr};
2937
2938 switch (vop) {
2939 case VecOp::Xst:
2940 case VecOp::Xst_be: {
2941 src = argBases[0];
2942 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2943 addr);
2944
2945 if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
2946 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2947 argBases[0])};
2948 auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};
2949
2950 src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
2951 }
2952 break;
2953 }
2954 case VecOp::Xstd2:
2955 case VecOp::Xstw4: {
2956 // an 16-byte vector arg1 is treated as two 8-byte elements or
2957 // four 4-byte elements
2958 mlir::IntegerType elemTy;
2959 uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
2960 elemTy = builder.getIntegerType(128 / numElem);
2961
2962 mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
2963 fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};
2964
2965 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2966 argBases[0])};
2967
2968 mlir::Type srcTy{nullptr};
2969 if (numElem != arg1TyInfo.len) {
2970 cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
2971 srcTy = firVecTy;
2972 } else {
2973 srcTy = arg1TyInfo.toFirVectorType();
2974 }
2975
2976 trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);
2977
2978 if (isBEVecElemOrderOnLE()) {
2979 cnv = reverseVectorElements(builder, loc, cnv, numElem);
2980 }
2981
2982 src = builder.createConvert(loc, srcTy, cnv);
2983 break;
2984 }
2985 case VecOp::Stxv:
2986 src = argBases[0];
2987 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2988 addr);
2989 break;
2990 default:
2991 assert(false && "Invalid vector operation for generator");
2992 }
2993 builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
2994 mlir::ValueRange{src, trg},
2995 getAlignmentAttr(builder, 1));
2996}
2997
2998} // namespace fir
2999

source code of flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp