1//===-- PPCIntrinsicCall.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Helper routines for constructing the FIR dialect of MLIR for PowerPC
10// intrinsics. Extensive use of MLIR interfaces and MLIR's coding style
11// (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this
12// module.
13//
14//===----------------------------------------------------------------------===//
15
16#include "flang/Optimizer/Builder/PPCIntrinsicCall.h"
17#include "flang/Evaluate/common.h"
18#include "flang/Optimizer/Builder/FIRBuilder.h"
19#include "flang/Optimizer/Builder/MutableBox.h"
20#include "mlir/Dialect/Vector/IR/VectorOps.h"
21
22namespace fir {
23
24using PI = PPCIntrinsicLibrary;
25
26// PPC specific intrinsic handlers.
27static constexpr IntrinsicHandler ppcHandlers[]{
28 {"__ppc_mma_assemble_acc",
29 static_cast<IntrinsicLibrary::SubroutineGenerator>(
30 &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>),
31 {{{"acc", asAddr},
32 {"arg1", asValue},
33 {"arg2", asValue},
34 {"arg3", asValue},
35 {"arg4", asValue}}},
36 /*isElemental=*/true},
37 {"__ppc_mma_assemble_pair",
38 static_cast<IntrinsicLibrary::SubroutineGenerator>(
39 &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>),
40 {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}},
41 /*isElemental=*/true},
42 {"__ppc_mma_build_acc",
43 static_cast<IntrinsicLibrary::SubroutineGenerator>(
44 &PI::genMmaIntr<MMAOp::AssembleAcc,
45 MMAHandlerOp::SubToFuncReverseArgOnLE>),
46 {{{"acc", asAddr},
47 {"arg1", asValue},
48 {"arg2", asValue},
49 {"arg3", asValue},
50 {"arg4", asValue}}},
51 /*isElemental=*/true},
52 {"__ppc_mma_disassemble_acc",
53 static_cast<IntrinsicLibrary::SubroutineGenerator>(
54 &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>),
55 {{{"data", asAddr}, {"acc", asValue}}},
56 /*isElemental=*/true},
57 {"__ppc_mma_disassemble_pair",
58 static_cast<IntrinsicLibrary::SubroutineGenerator>(
59 &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>),
60 {{{"data", asAddr}, {"pair", asValue}}},
61 /*isElemental=*/true},
62 {"__ppc_mma_pmxvbf16ger2_",
63 static_cast<IntrinsicLibrary::SubroutineGenerator>(
64 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>),
65 {{{"acc", asAddr},
66 {"a", asValue},
67 {"b", asValue},
68 {"xmask", asValue},
69 {"ymask", asValue},
70 {"pmask", asValue}}},
71 /*isElemental=*/true},
72 {"__ppc_mma_pmxvbf16ger2nn",
73 static_cast<IntrinsicLibrary::SubroutineGenerator>(
74 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn,
75 MMAHandlerOp::FirstArgIsResult>),
76 {{{"acc", asAddr},
77 {"a", asValue},
78 {"b", asValue},
79 {"xmask", asValue},
80 {"ymask", asValue},
81 {"pmask", asValue}}},
82 /*isElemental=*/true},
83 {"__ppc_mma_pmxvbf16ger2np",
84 static_cast<IntrinsicLibrary::SubroutineGenerator>(
85 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np,
86 MMAHandlerOp::FirstArgIsResult>),
87 {{{"acc", asAddr},
88 {"a", asValue},
89 {"b", asValue},
90 {"xmask", asValue},
91 {"ymask", asValue},
92 {"pmask", asValue}}},
93 /*isElemental=*/true},
94 {"__ppc_mma_pmxvbf16ger2pn",
95 static_cast<IntrinsicLibrary::SubroutineGenerator>(
96 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn,
97 MMAHandlerOp::FirstArgIsResult>),
98 {{{"acc", asAddr},
99 {"a", asValue},
100 {"b", asValue},
101 {"xmask", asValue},
102 {"ymask", asValue},
103 {"pmask", asValue}}},
104 /*isElemental=*/true},
105 {"__ppc_mma_pmxvbf16ger2pp",
106 static_cast<IntrinsicLibrary::SubroutineGenerator>(
107 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp,
108 MMAHandlerOp::FirstArgIsResult>),
109 {{{"acc", asAddr},
110 {"a", asValue},
111 {"b", asValue},
112 {"xmask", asValue},
113 {"ymask", asValue},
114 {"pmask", asValue}}},
115 /*isElemental=*/true},
116 {"__ppc_mma_pmxvf16ger2_",
117 static_cast<IntrinsicLibrary::SubroutineGenerator>(
118 &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>),
119 {{{"acc", asAddr},
120 {"a", asValue},
121 {"b", asValue},
122 {"xmask", asValue},
123 {"ymask", asValue},
124 {"pmask", asValue}}},
125 /*isElemental=*/true},
126 {"__ppc_mma_pmxvf16ger2nn",
127 static_cast<IntrinsicLibrary::SubroutineGenerator>(
128 &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
129 {{{"acc", asAddr},
130 {"a", asValue},
131 {"b", asValue},
132 {"xmask", asValue},
133 {"ymask", asValue},
134 {"pmask", asValue}}},
135 /*isElemental=*/true},
136 {"__ppc_mma_pmxvf16ger2np",
137 static_cast<IntrinsicLibrary::SubroutineGenerator>(
138 &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
139 {{{"acc", asAddr},
140 {"a", asValue},
141 {"b", asValue},
142 {"xmask", asValue},
143 {"ymask", asValue},
144 {"pmask", asValue}}},
145 /*isElemental=*/true},
146 {"__ppc_mma_pmxvf16ger2pn",
147 static_cast<IntrinsicLibrary::SubroutineGenerator>(
148 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
149 {{{"acc", asAddr},
150 {"a", asValue},
151 {"b", asValue},
152 {"xmask", asValue},
153 {"ymask", asValue},
154 {"pmask", asValue}}},
155 /*isElemental=*/true},
156 {"__ppc_mma_pmxvf16ger2pp",
157 static_cast<IntrinsicLibrary::SubroutineGenerator>(
158 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
159 {{{"acc", asAddr},
160 {"a", asValue},
161 {"b", asValue},
162 {"xmask", asValue},
163 {"ymask", asValue},
164 {"pmask", asValue}}},
165 /*isElemental=*/true},
166 {"__ppc_mma_pmxvf32ger",
167 static_cast<IntrinsicLibrary::SubroutineGenerator>(
168 &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>),
169 {{{"acc", asAddr},
170 {"a", asValue},
171 {"b", asValue},
172 {"xmask", asValue},
173 {"ymask", asValue}}},
174 /*isElemental=*/true},
175 {"__ppc_mma_pmxvf32gernn",
176 static_cast<IntrinsicLibrary::SubroutineGenerator>(
177 &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>),
178 {{{"acc", asAddr},
179 {"a", asValue},
180 {"b", asValue},
181 {"xmask", asValue},
182 {"ymask", asValue}}},
183 /*isElemental=*/true},
184 {"__ppc_mma_pmxvf32gernp",
185 static_cast<IntrinsicLibrary::SubroutineGenerator>(
186 &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>),
187 {{{"acc", asAddr},
188 {"a", asValue},
189 {"b", asValue},
190 {"xmask", asValue},
191 {"ymask", asValue}}},
192 /*isElemental=*/true},
193 {"__ppc_mma_pmxvf32gerpn",
194 static_cast<IntrinsicLibrary::SubroutineGenerator>(
195 &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
196 {{{"acc", asAddr},
197 {"a", asValue},
198 {"b", asValue},
199 {"xmask", asValue},
200 {"ymask", asValue}}},
201 /*isElemental=*/true},
202 {"__ppc_mma_pmxvf32gerpp",
203 static_cast<IntrinsicLibrary::SubroutineGenerator>(
204 &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
205 {{{"acc", asAddr},
206 {"a", asValue},
207 {"b", asValue},
208 {"xmask", asValue},
209 {"ymask", asValue}}},
210 /*isElemental=*/true},
211 {"__ppc_mma_pmxvf64ger",
212 static_cast<IntrinsicLibrary::SubroutineGenerator>(
213 &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>),
214 {{{"acc", asAddr},
215 {"a", asValue},
216 {"b", asValue},
217 {"xmask", asValue},
218 {"ymask", asValue}}},
219 /*isElemental=*/true},
220 {"__ppc_mma_pmxvf64gernn",
221 static_cast<IntrinsicLibrary::SubroutineGenerator>(
222 &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>),
223 {{{"acc", asAddr},
224 {"a", asValue},
225 {"b", asValue},
226 {"xmask", asValue},
227 {"ymask", asValue}}},
228 /*isElemental=*/true},
229 {"__ppc_mma_pmxvf64gernp",
230 static_cast<IntrinsicLibrary::SubroutineGenerator>(
231 &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>),
232 {{{"acc", asAddr},
233 {"a", asValue},
234 {"b", asValue},
235 {"xmask", asValue},
236 {"ymask", asValue}}},
237 /*isElemental=*/true},
238 {"__ppc_mma_pmxvf64gerpn",
239 static_cast<IntrinsicLibrary::SubroutineGenerator>(
240 &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
241 {{{"acc", asAddr},
242 {"a", asValue},
243 {"b", asValue},
244 {"xmask", asValue},
245 {"ymask", asValue}}},
246 /*isElemental=*/true},
247 {"__ppc_mma_pmxvf64gerpp",
248 static_cast<IntrinsicLibrary::SubroutineGenerator>(
249 &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
250 {{{"acc", asAddr},
251 {"a", asValue},
252 {"b", asValue},
253 {"xmask", asValue},
254 {"ymask", asValue}}},
255 /*isElemental=*/true},
256 {"__ppc_mma_pmxvi16ger2_",
257 static_cast<IntrinsicLibrary::SubroutineGenerator>(
258 &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>),
259 {{{"acc", asAddr},
260 {"a", asValue},
261 {"b", asValue},
262 {"xmask", asValue},
263 {"ymask", asValue},
264 {"pmask", asValue}}},
265 /*isElemental=*/true},
266 {"__ppc_mma_pmxvi16ger2pp",
267 static_cast<IntrinsicLibrary::SubroutineGenerator>(
268 &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
269 {{{"acc", asAddr},
270 {"a", asValue},
271 {"b", asValue},
272 {"xmask", asValue},
273 {"ymask", asValue},
274 {"pmask", asValue}}},
275 /*isElemental=*/true},
276 {"__ppc_mma_pmxvi16ger2s",
277 static_cast<IntrinsicLibrary::SubroutineGenerator>(
278 &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>),
279 {{{"acc", asAddr},
280 {"a", asValue},
281 {"b", asValue},
282 {"xmask", asValue},
283 {"ymask", asValue},
284 {"pmask", asValue}}},
285 /*isElemental=*/true},
286 {"__ppc_mma_pmxvi16ger2spp",
287 static_cast<IntrinsicLibrary::SubroutineGenerator>(
288 &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp,
289 MMAHandlerOp::FirstArgIsResult>),
290 {{{"acc", asAddr},
291 {"a", asValue},
292 {"b", asValue},
293 {"xmask", asValue},
294 {"ymask", asValue},
295 {"pmask", asValue}}},
296 /*isElemental=*/true},
297 {"__ppc_mma_pmxvi4ger8_",
298 static_cast<IntrinsicLibrary::SubroutineGenerator>(
299 &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>),
300 {{{"acc", asAddr},
301 {"a", asValue},
302 {"b", asValue},
303 {"xmask", asValue},
304 {"ymask", asValue},
305 {"pmask", asValue}}},
306 /*isElemental=*/true},
307 {"__ppc_mma_pmxvi4ger8pp",
308 static_cast<IntrinsicLibrary::SubroutineGenerator>(
309 &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
310 {{{"acc", asAddr},
311 {"a", asValue},
312 {"b", asValue},
313 {"xmask", asValue},
314 {"ymask", asValue},
315 {"pmask", asValue}}},
316 /*isElemental=*/true},
317 {"__ppc_mma_pmxvi8ger4_",
318 static_cast<IntrinsicLibrary::SubroutineGenerator>(
319 &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>),
320 {{{"acc", asAddr},
321 {"a", asValue},
322 {"b", asValue},
323 {"xmask", asValue},
324 {"ymask", asValue},
325 {"pmask", asValue}}},
326 /*isElemental=*/true},
327 {"__ppc_mma_pmxvi8ger4pp",
328 static_cast<IntrinsicLibrary::SubroutineGenerator>(
329 &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
330 {{{"acc", asAddr},
331 {"a", asValue},
332 {"b", asValue},
333 {"xmask", asValue},
334 {"ymask", asValue},
335 {"pmask", asValue}}},
336 /*isElemental=*/true},
337 {"__ppc_mma_pmxvi8ger4spp",
338 static_cast<IntrinsicLibrary::SubroutineGenerator>(
339 &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
340 {{{"acc", asAddr},
341 {"a", asValue},
342 {"b", asValue},
343 {"xmask", asValue},
344 {"ymask", asValue},
345 {"pmask", asValue}}},
346 /*isElemental=*/true},
347 {"__ppc_mma_xvbf16ger2_",
348 static_cast<IntrinsicLibrary::SubroutineGenerator>(
349 &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>),
350 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
351 /*isElemental=*/true},
352 {"__ppc_mma_xvbf16ger2nn",
353 static_cast<IntrinsicLibrary::SubroutineGenerator>(
354 &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
355 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
356 /*isElemental=*/true},
357 {"__ppc_mma_xvbf16ger2np",
358 static_cast<IntrinsicLibrary::SubroutineGenerator>(
359 &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>),
360 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
361 /*isElemental=*/true},
362 {"__ppc_mma_xvbf16ger2pn",
363 static_cast<IntrinsicLibrary::SubroutineGenerator>(
364 &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
365 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
366 /*isElemental=*/true},
367 {"__ppc_mma_xvbf16ger2pp",
368 static_cast<IntrinsicLibrary::SubroutineGenerator>(
369 &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
370 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
371 /*isElemental=*/true},
372 {"__ppc_mma_xvf16ger2_",
373 static_cast<IntrinsicLibrary::SubroutineGenerator>(
374 &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>),
375 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
376 /*isElemental=*/true},
377 {"__ppc_mma_xvf16ger2nn",
378 static_cast<IntrinsicLibrary::SubroutineGenerator>(
379 &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
380 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
381 /*isElemental=*/true},
382 {"__ppc_mma_xvf16ger2np",
383 static_cast<IntrinsicLibrary::SubroutineGenerator>(
384 &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
385 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
386 /*isElemental=*/true},
387 {"__ppc_mma_xvf16ger2pn",
388 static_cast<IntrinsicLibrary::SubroutineGenerator>(
389 &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
390 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
391 /*isElemental=*/true},
392 {"__ppc_mma_xvf16ger2pp",
393 static_cast<IntrinsicLibrary::SubroutineGenerator>(
394 &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
395 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
396 /*isElemental=*/true},
397 {"__ppc_mma_xvf32ger",
398 static_cast<IntrinsicLibrary::SubroutineGenerator>(
399 &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>),
400 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
401 /*isElemental=*/true},
402 {"__ppc_mma_xvf32gernn",
403 static_cast<IntrinsicLibrary::SubroutineGenerator>(
404 &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>),
405 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
406 /*isElemental=*/true},
407 {"__ppc_mma_xvf32gernp",
408 static_cast<IntrinsicLibrary::SubroutineGenerator>(
409 &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>),
410 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
411 /*isElemental=*/true},
412 {"__ppc_mma_xvf32gerpn",
413 static_cast<IntrinsicLibrary::SubroutineGenerator>(
414 &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
415 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
416 /*isElemental=*/true},
417 {"__ppc_mma_xvf32gerpp",
418 static_cast<IntrinsicLibrary::SubroutineGenerator>(
419 &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
420 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
421 /*isElemental=*/true},
422 {"__ppc_mma_xvf64ger",
423 static_cast<IntrinsicLibrary::SubroutineGenerator>(
424 &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>),
425 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
426 /*isElemental=*/true},
427 {"__ppc_mma_xvf64gernn",
428 static_cast<IntrinsicLibrary::SubroutineGenerator>(
429 &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>),
430 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
431 /*isElemental=*/true},
432 {"__ppc_mma_xvf64gernp",
433 static_cast<IntrinsicLibrary::SubroutineGenerator>(
434 &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>),
435 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
436 /*isElemental=*/true},
437 {"__ppc_mma_xvf64gerpn",
438 static_cast<IntrinsicLibrary::SubroutineGenerator>(
439 &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
440 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
441 /*isElemental=*/true},
442 {"__ppc_mma_xvf64gerpp",
443 static_cast<IntrinsicLibrary::SubroutineGenerator>(
444 &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
445 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
446 /*isElemental=*/true},
447 {"__ppc_mma_xvi16ger2_",
448 static_cast<IntrinsicLibrary::SubroutineGenerator>(
449 &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>),
450 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
451 /*isElemental=*/true},
452 {"__ppc_mma_xvi16ger2pp",
453 static_cast<IntrinsicLibrary::SubroutineGenerator>(
454 &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
455 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
456 /*isElemental=*/true},
457 {"__ppc_mma_xvi16ger2s",
458 static_cast<IntrinsicLibrary::SubroutineGenerator>(
459 &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>),
460 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
461 /*isElemental=*/true},
462 {"__ppc_mma_xvi16ger2spp",
463 static_cast<IntrinsicLibrary::SubroutineGenerator>(
464 &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>),
465 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
466 /*isElemental=*/true},
467 {"__ppc_mma_xvi4ger8_",
468 static_cast<IntrinsicLibrary::SubroutineGenerator>(
469 &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>),
470 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
471 /*isElemental=*/true},
472 {"__ppc_mma_xvi4ger8pp",
473 static_cast<IntrinsicLibrary::SubroutineGenerator>(
474 &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
475 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
476 /*isElemental=*/true},
477 {"__ppc_mma_xvi8ger4_",
478 static_cast<IntrinsicLibrary::SubroutineGenerator>(
479 &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>),
480 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
481 /*isElemental=*/true},
482 {"__ppc_mma_xvi8ger4pp",
483 static_cast<IntrinsicLibrary::SubroutineGenerator>(
484 &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
485 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
486 /*isElemental=*/true},
487 {"__ppc_mma_xvi8ger4spp",
488 static_cast<IntrinsicLibrary::SubroutineGenerator>(
489 &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
490 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
491 /*isElemental=*/true},
492 {"__ppc_mma_xxmfacc",
493 static_cast<IntrinsicLibrary::SubroutineGenerator>(
494 &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>),
495 {{{"acc", asAddr}}},
496 /*isElemental=*/true},
497 {"__ppc_mma_xxmtacc",
498 static_cast<IntrinsicLibrary::SubroutineGenerator>(
499 &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>),
500 {{{"acc", asAddr}}},
501 /*isElemental=*/true},
502 {"__ppc_mma_xxsetaccz",
503 static_cast<IntrinsicLibrary::SubroutineGenerator>(
504 &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>),
505 {{{"acc", asAddr}}},
506 /*isElemental=*/true},
507 {"__ppc_mtfsf",
508 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>),
509 {{{"mask", asValue}, {"r", asValue}}},
510 /*isElemental=*/false},
511 {"__ppc_mtfsfi",
512 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
513 {{{"bf", asValue}, {"i", asValue}}},
514 /*isElemental=*/false},
515 {"__ppc_vec_abs",
516 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
517 {{{"arg1", asValue}}},
518 /*isElemental=*/true},
519 {"__ppc_vec_add",
520 static_cast<IntrinsicLibrary::ExtendedGenerator>(
521 &PI::genVecAddAndMulSubXor<VecOp::Add>),
522 {{{"arg1", asValue}, {"arg2", asValue}}},
523 /*isElemental=*/true},
524 {"__ppc_vec_and",
525 static_cast<IntrinsicLibrary::ExtendedGenerator>(
526 &PI::genVecAddAndMulSubXor<VecOp::And>),
527 {{{"arg1", asValue}, {"arg2", asValue}}},
528 /*isElemental=*/true},
529 {"__ppc_vec_any_ge",
530 static_cast<IntrinsicLibrary::ExtendedGenerator>(
531 &PI::genVecAnyCompare<VecOp::Anyge>),
532 {{{"arg1", asValue}, {"arg2", asValue}}},
533 /*isElemental=*/true},
534 {"__ppc_vec_cmpge",
535 static_cast<IntrinsicLibrary::ExtendedGenerator>(
536 &PI::genVecCmp<VecOp::Cmpge>),
537 {{{"arg1", asValue}, {"arg2", asValue}}},
538 /*isElemental=*/true},
539 {"__ppc_vec_cmpgt",
540 static_cast<IntrinsicLibrary::ExtendedGenerator>(
541 &PI::genVecCmp<VecOp::Cmpgt>),
542 {{{"arg1", asValue}, {"arg2", asValue}}},
543 /*isElemental=*/true},
544 {"__ppc_vec_cmple",
545 static_cast<IntrinsicLibrary::ExtendedGenerator>(
546 &PI::genVecCmp<VecOp::Cmple>),
547 {{{"arg1", asValue}, {"arg2", asValue}}},
548 /*isElemental=*/true},
549 {"__ppc_vec_cmplt",
550 static_cast<IntrinsicLibrary::ExtendedGenerator>(
551 &PI::genVecCmp<VecOp::Cmplt>),
552 {{{"arg1", asValue}, {"arg2", asValue}}},
553 /*isElemental=*/true},
554 {"__ppc_vec_convert",
555 static_cast<IntrinsicLibrary::ExtendedGenerator>(
556 &PI::genVecConvert<VecOp::Convert>),
557 {{{"v", asValue}, {"mold", asValue}}},
558 /*isElemental=*/false},
559 {"__ppc_vec_ctf",
560 static_cast<IntrinsicLibrary::ExtendedGenerator>(
561 &PI::genVecConvert<VecOp::Ctf>),
562 {{{"arg1", asValue}, {"arg2", asValue}}},
563 /*isElemental=*/true},
564 {"__ppc_vec_cvf",
565 static_cast<IntrinsicLibrary::ExtendedGenerator>(
566 &PI::genVecConvert<VecOp::Cvf>),
567 {{{"arg1", asValue}}},
568 /*isElemental=*/true},
569 {"__ppc_vec_extract",
570 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract),
571 {{{"arg1", asValue}, {"arg2", asValue}}},
572 /*isElemental=*/true},
573 {"__ppc_vec_insert",
574 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
575 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
576 /*isElemental=*/true},
577 {"__ppc_vec_ld",
578 static_cast<IntrinsicLibrary::ExtendedGenerator>(
579 &PI::genVecLdCallGrp<VecOp::Ld>),
580 {{{"arg1", asValue}, {"arg2", asAddr}}},
581 /*isElemental=*/false},
582 {"__ppc_vec_lde",
583 static_cast<IntrinsicLibrary::ExtendedGenerator>(
584 &PI::genVecLdCallGrp<VecOp::Lde>),
585 {{{"arg1", asValue}, {"arg2", asAddr}}},
586 /*isElemental=*/false},
587 {"__ppc_vec_ldl",
588 static_cast<IntrinsicLibrary::ExtendedGenerator>(
589 &PI::genVecLdCallGrp<VecOp::Ldl>),
590 {{{"arg1", asValue}, {"arg2", asAddr}}},
591 /*isElemental=*/false},
592 {"__ppc_vec_lvsl",
593 static_cast<IntrinsicLibrary::ExtendedGenerator>(
594 &PI::genVecLvsGrp<VecOp::Lvsl>),
595 {{{"arg1", asValue}, {"arg2", asAddr}}},
596 /*isElemental=*/false},
597 {"__ppc_vec_lvsr",
598 static_cast<IntrinsicLibrary::ExtendedGenerator>(
599 &PI::genVecLvsGrp<VecOp::Lvsr>),
600 {{{"arg1", asValue}, {"arg2", asAddr}}},
601 /*isElemental=*/false},
602 {"__ppc_vec_lxv",
603 static_cast<IntrinsicLibrary::ExtendedGenerator>(
604 &PI::genVecLdNoCallGrp<VecOp::Lxv>),
605 {{{"arg1", asValue}, {"arg2", asAddr}}},
606 /*isElemental=*/false},
607 {"__ppc_vec_lxvp",
608 static_cast<IntrinsicLibrary::ExtendedGenerator>(
609 &PI::genVecLdCallGrp<VecOp::Lxvp>),
610 {{{"arg1", asValue}, {"arg2", asAddr}}},
611 /*isElemental=*/false},
612 {"__ppc_vec_mergeh",
613 static_cast<IntrinsicLibrary::ExtendedGenerator>(
614 &PI::genVecMerge<VecOp::Mergeh>),
615 {{{"arg1", asValue}, {"arg2", asValue}}},
616 /*isElemental=*/true},
617 {"__ppc_vec_mergel",
618 static_cast<IntrinsicLibrary::ExtendedGenerator>(
619 &PI::genVecMerge<VecOp::Mergel>),
620 {{{"arg1", asValue}, {"arg2", asValue}}},
621 /*isElemental=*/true},
622 {"__ppc_vec_msub",
623 static_cast<IntrinsicLibrary::ExtendedGenerator>(
624 &PI::genVecNmaddMsub<VecOp::Msub>),
625 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
626 /*isElemental=*/true},
627 {"__ppc_vec_mul",
628 static_cast<IntrinsicLibrary::ExtendedGenerator>(
629 &PI::genVecAddAndMulSubXor<VecOp::Mul>),
630 {{{"arg1", asValue}, {"arg2", asValue}}},
631 /*isElemental=*/true},
632 {"__ppc_vec_nmadd",
633 static_cast<IntrinsicLibrary::ExtendedGenerator>(
634 &PI::genVecNmaddMsub<VecOp::Nmadd>),
635 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
636 /*isElemental=*/true},
637 {"__ppc_vec_perm",
638 static_cast<IntrinsicLibrary::ExtendedGenerator>(
639 &PI::genVecPerm<VecOp::Perm>),
640 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
641 /*isElemental=*/true},
642 {"__ppc_vec_permi",
643 static_cast<IntrinsicLibrary::ExtendedGenerator>(
644 &PI::genVecPerm<VecOp::Permi>),
645 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
646 /*isElemental=*/true},
647 {"__ppc_vec_sel",
648 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
649 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
650 /*isElemental=*/true},
651 {"__ppc_vec_sl",
652 static_cast<IntrinsicLibrary::ExtendedGenerator>(
653 &PI::genVecShift<VecOp::Sl>),
654 {{{"arg1", asValue}, {"arg2", asValue}}},
655 /*isElemental=*/true},
656 {"__ppc_vec_sld",
657 static_cast<IntrinsicLibrary::ExtendedGenerator>(
658 &PI::genVecShift<VecOp::Sld>),
659 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
660 /*isElemental=*/true},
661 {"__ppc_vec_sldw",
662 static_cast<IntrinsicLibrary::ExtendedGenerator>(
663 &PI::genVecShift<VecOp::Sldw>),
664 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
665 /*isElemental=*/true},
666 {"__ppc_vec_sll",
667 static_cast<IntrinsicLibrary::ExtendedGenerator>(
668 &PI::genVecShift<VecOp::Sll>),
669 {{{"arg1", asValue}, {"arg2", asValue}}},
670 /*isElemental=*/true},
671 {"__ppc_vec_slo",
672 static_cast<IntrinsicLibrary::ExtendedGenerator>(
673 &PI::genVecShift<VecOp::Slo>),
674 {{{"arg1", asValue}, {"arg2", asValue}}},
675 /*isElemental=*/true},
676 {"__ppc_vec_splat",
677 static_cast<IntrinsicLibrary::ExtendedGenerator>(
678 &PI::genVecSplat<VecOp::Splat>),
679 {{{"arg1", asValue}, {"arg2", asValue}}},
680 /*isElemental=*/true},
681 {"__ppc_vec_splat_s32_",
682 static_cast<IntrinsicLibrary::ExtendedGenerator>(
683 &PI::genVecSplat<VecOp::Splat_s32>),
684 {{{"arg1", asValue}}},
685 /*isElemental=*/true},
686 {"__ppc_vec_splats",
687 static_cast<IntrinsicLibrary::ExtendedGenerator>(
688 &PI::genVecSplat<VecOp::Splats>),
689 {{{"arg1", asValue}}},
690 /*isElemental=*/true},
691 {"__ppc_vec_sr",
692 static_cast<IntrinsicLibrary::ExtendedGenerator>(
693 &PI::genVecShift<VecOp::Sr>),
694 {{{"arg1", asValue}, {"arg2", asValue}}},
695 /*isElemental=*/true},
696 {"__ppc_vec_srl",
697 static_cast<IntrinsicLibrary::ExtendedGenerator>(
698 &PI::genVecShift<VecOp::Srl>),
699 {{{"arg1", asValue}, {"arg2", asValue}}},
700 /*isElemental=*/true},
701 {"__ppc_vec_sro",
702 static_cast<IntrinsicLibrary::ExtendedGenerator>(
703 &PI::genVecShift<VecOp::Sro>),
704 {{{"arg1", asValue}, {"arg2", asValue}}},
705 /*isElemental=*/true},
706 {"__ppc_vec_st",
707 static_cast<IntrinsicLibrary::SubroutineGenerator>(
708 &PI::genVecStore<VecOp::St>),
709 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
710 /*isElemental=*/false},
711 {"__ppc_vec_ste",
712 static_cast<IntrinsicLibrary::SubroutineGenerator>(
713 &PI::genVecStore<VecOp::Ste>),
714 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
715 /*isElemental=*/false},
716 {"__ppc_vec_stxv",
717 static_cast<IntrinsicLibrary::SubroutineGenerator>(
718 &PI::genVecXStore<VecOp::Stxv>),
719 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
720 /*isElemental=*/false},
721 {"__ppc_vec_stxvp",
722 static_cast<IntrinsicLibrary::SubroutineGenerator>(
723 &PI::genVecStore<VecOp::Stxvp>),
724 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
725 /*isElemental=*/false},
726 {"__ppc_vec_sub",
727 static_cast<IntrinsicLibrary::ExtendedGenerator>(
728 &PI::genVecAddAndMulSubXor<VecOp::Sub>),
729 {{{"arg1", asValue}, {"arg2", asValue}}},
730 /*isElemental=*/true},
731 {"__ppc_vec_xl",
732 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
733 {{{"arg1", asValue}, {"arg2", asAddr}}},
734 /*isElemental=*/false},
735 {"__ppc_vec_xl_be",
736 static_cast<IntrinsicLibrary::ExtendedGenerator>(
737 &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
738 {{{"arg1", asValue}, {"arg2", asAddr}}},
739 /*isElemental=*/false},
740 {"__ppc_vec_xld2_",
741 static_cast<IntrinsicLibrary::ExtendedGenerator>(
742 &PI::genVecLdCallGrp<VecOp::Xld2>),
743 {{{"arg1", asValue}, {"arg2", asAddr}}},
744 /*isElemental=*/false},
745 {"__ppc_vec_xlds",
746 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
747 {{{"arg1", asValue}, {"arg2", asAddr}}},
748 /*isElemental=*/false},
749 {"__ppc_vec_xlw4_",
750 static_cast<IntrinsicLibrary::ExtendedGenerator>(
751 &PI::genVecLdCallGrp<VecOp::Xlw4>),
752 {{{"arg1", asValue}, {"arg2", asAddr}}},
753 /*isElemental=*/false},
754 {"__ppc_vec_xor",
755 static_cast<IntrinsicLibrary::ExtendedGenerator>(
756 &PI::genVecAddAndMulSubXor<VecOp::Xor>),
757 {{{"arg1", asValue}, {"arg2", asValue}}},
758 /*isElemental=*/true},
759 {"__ppc_vec_xst",
760 static_cast<IntrinsicLibrary::SubroutineGenerator>(
761 &PI::genVecXStore<VecOp::Xst>),
762 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
763 /*isElemental=*/false},
764 {"__ppc_vec_xst_be",
765 static_cast<IntrinsicLibrary::SubroutineGenerator>(
766 &PI::genVecXStore<VecOp::Xst_be>),
767 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
768 /*isElemental=*/false},
769 {"__ppc_vec_xstd2_",
770 static_cast<IntrinsicLibrary::SubroutineGenerator>(
771 &PI::genVecXStore<VecOp::Xstd2>),
772 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
773 /*isElemental=*/false},
774 {"__ppc_vec_xstw4_",
775 static_cast<IntrinsicLibrary::SubroutineGenerator>(
776 &PI::genVecXStore<VecOp::Xstw4>),
777 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
778 /*isElemental=*/false},
779};
780
781static constexpr MathOperation ppcMathOperations[] = {
782 // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi.
783 {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
784 genLibCall},
785 {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
786 genLibCall},
787 {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>,
788 genLibCall},
789 {"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
790 genLibCall},
791 {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
792 genLibCall},
793 {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>,
794 genLibCall},
795 {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
796 genLibCall},
797 {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
798 genLibCall},
799 {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
800 genLibCall},
801 {"__ppc_fmadd", "llvm.fma.f32",
802 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
803 genMathOp<mlir::math::FmaOp>},
804 {"__ppc_fmadd", "llvm.fma.f64",
805 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
806 genMathOp<mlir::math::FmaOp>},
807 {"__ppc_fmsub", "llvm.ppc.fmsubs",
808 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
809 genLibCall},
810 {"__ppc_fmsub", "llvm.ppc.fmsub",
811 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
812 genLibCall},
813 {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>,
814 genLibCall},
815 {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>,
816 genLibCall},
817 {"__ppc_fnmadd", "llvm.ppc.fnmadds",
818 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
819 genLibCall},
820 {"__ppc_fnmadd", "llvm.ppc.fnmadd",
821 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
822 genLibCall},
823 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32",
824 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
825 genLibCall},
826 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64",
827 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
828 genLibCall},
829 {"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>,
830 genLibCall},
831 {"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>,
832 genLibCall},
833 {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>,
834 genLibCall},
835 {"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
836 genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
837 {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn",
838 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
839 {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16",
840 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
841 {"__ppc_vec_madd", "llvm.fma.v4f32",
842 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
843 Ty::RealVector<4>>,
844 genLibCall},
845 {"__ppc_vec_madd", "llvm.fma.v2f64",
846 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
847 Ty::RealVector<8>>,
848 genLibCall},
849 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
850 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
851 Ty::IntegerVector<1>>,
852 genLibCall},
853 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
854 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
855 Ty::IntegerVector<2>>,
856 genLibCall},
857 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
858 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
859 Ty::IntegerVector<4>>,
860 genLibCall},
861 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
862 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
863 Ty::IntegerVector<8>>,
864 genLibCall},
865 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
866 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
867 Ty::UnsignedVector<1>>,
868 genLibCall},
869 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
870 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
871 Ty::UnsignedVector<2>>,
872 genLibCall},
873 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
874 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
875 Ty::UnsignedVector<4>>,
876 genLibCall},
877 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
878 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
879 Ty::UnsignedVector<8>>,
880 genLibCall},
881 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
882 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
883 genLibCall},
884 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
885 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
886 genLibCall},
887 {"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
888 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
889 Ty::IntegerVector<1>>,
890 genLibCall},
891 {"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
892 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
893 Ty::IntegerVector<2>>,
894 genLibCall},
895 {"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
896 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
897 Ty::IntegerVector<4>>,
898 genLibCall},
899 {"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
900 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
901 Ty::IntegerVector<8>>,
902 genLibCall},
903 {"__ppc_vec_min", "llvm.ppc.altivec.vminub",
904 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
905 Ty::UnsignedVector<1>>,
906 genLibCall},
907 {"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
908 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
909 Ty::UnsignedVector<2>>,
910 genLibCall},
911 {"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
912 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
913 Ty::UnsignedVector<4>>,
914 genLibCall},
915 {"__ppc_vec_min", "llvm.ppc.altivec.vminud",
916 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
917 Ty::UnsignedVector<8>>,
918 genLibCall},
919 {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
920 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
921 genLibCall},
922 {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
923 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
924 genLibCall},
925 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
926 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
927 Ty::RealVector<4>>,
928 genLibCall},
929 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
930 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
931 Ty::RealVector<8>>,
932 genLibCall},
933};
934
935const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
936 auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) {
937 return name.compare(ppcHandler.name) > 0;
938 };
939 auto result = llvm::lower_bound(ppcHandlers, name, compare);
940 return result != std::end(ppcHandlers) && result->name == name ? result
941 : nullptr;
942}
943
944using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
945static constexpr RtMap ppcMathOps(ppcMathOperations);
946static_assert(ppcMathOps.Verify() && "map must be sorted");
947
948std::pair<const MathOperation *, const MathOperation *>
949checkPPCMathOperationsRange(llvm::StringRef name) {
950 return ppcMathOps.equal_range(name);
951}
952
953// Helper functions for vector element ordering.
954bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
955 const auto triple{fir::getTargetTriple(builder.getModule())};
956 return (triple.isLittleEndian() &&
957 converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
958}
959bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
960 const auto triple{fir::getTargetTriple(builder.getModule())};
961 return (triple.isLittleEndian() &&
962 !converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
963}
964bool PPCIntrinsicLibrary::changeVecElemOrder() {
965 const auto triple{fir::getTargetTriple(builder.getModule())};
966 return (triple.isLittleEndian() !=
967 converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
968}
969
970static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
971 int quadCnt, int pairCnt, int vecCnt,
972 int intCnt = 0,
973 int vecElemBitSize = 8,
974 int intBitSize = 32) {
975 // Constructs a function type with the following signature:
976 // Result type: __vector_pair
977 // Arguments:
978 // quadCnt: number of arguments that has __vector_quad type, followed by
979 // pairCnt: number of arguments that has __vector_pair type, followed by
980 // vecCnt: number of arguments that has vector(integer) type, followed by
981 // intCnt: number of arguments that has integer type
982 // vecElemBitSize: specifies the size of vector elements in bits
983 // intBitSize: specifies the size of integer arguments in bits
984 auto vType{mlir::VectorType::get(
985 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
986 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
987 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
988 auto iType{mlir::IntegerType::get(context, intBitSize)};
989 llvm::SmallVector<mlir::Type> argTypes;
990 for (int i = 0; i < quadCnt; ++i) {
991 argTypes.push_back(Elt: vqType);
992 }
993 for (int i = 0; i < pairCnt; ++i) {
994 argTypes.push_back(Elt: vpType);
995 }
996 for (int i = 0; i < vecCnt; ++i) {
997 argTypes.push_back(Elt: vType);
998 }
999 for (int i = 0; i < intCnt; ++i) {
1000 argTypes.push_back(Elt: iType);
1001 }
1002
1003 return mlir::FunctionType::get(context, argTypes, {vpType});
1004}
1005
1006static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context,
1007 int quadCnt, int pairCnt, int vecCnt,
1008 int intCnt = 0,
1009 int vecElemBitSize = 8,
1010 int intBitSize = 32) {
1011 // Constructs a function type with the following signature:
1012 // Result type: __vector_quad
1013 // Arguments:
1014 // quadCnt: number of arguments that has __vector_quad type, followed by
1015 // pairCnt: number of arguments that has __vector_pair type, followed by
1016 // vecCnt: number of arguments that has vector(integer) type, followed by
1017 // intCnt: number of arguments that has integer type
1018 // vecElemBitSize: specifies the size of vector elements in bits
1019 // intBitSize: specifies the size of integer arguments in bits
1020 auto vType{mlir::VectorType::get(
1021 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
1022 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1023 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1024 auto iType{mlir::IntegerType::get(context, intBitSize)};
1025 llvm::SmallVector<mlir::Type> argTypes;
1026 for (int i = 0; i < quadCnt; ++i) {
1027 argTypes.push_back(Elt: vqType);
1028 }
1029 for (int i = 0; i < pairCnt; ++i) {
1030 argTypes.push_back(Elt: vpType);
1031 }
1032 for (int i = 0; i < vecCnt; ++i) {
1033 argTypes.push_back(Elt: vType);
1034 }
1035 for (int i = 0; i < intCnt; ++i) {
1036 argTypes.push_back(Elt: iType);
1037 }
1038
1039 return mlir::FunctionType::get(context, argTypes, {vqType});
1040}
1041
1042mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context,
1043 MMAOp mmaOp) {
1044 auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1045 llvm::SmallVector<mlir::Type> members;
1046
1047 if (mmaOp == MMAOp::DisassembleAcc) {
1048 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1049 members.push_back(Elt: vType);
1050 members.push_back(Elt: vType);
1051 members.push_back(Elt: vType);
1052 members.push_back(Elt: vType);
1053 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1054 return mlir::FunctionType::get(context, {vqType}, {resType});
1055 } else if (mmaOp == MMAOp::DisassemblePair) {
1056 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1057 members.push_back(Elt: vType);
1058 members.push_back(Elt: vType);
1059 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1060 return mlir::FunctionType::get(context, {vpType}, {resType});
1061 } else {
1062 llvm_unreachable(
1063 "Unsupported intrinsic code for function signature generator");
1064 }
1065}
1066
1067//===----------------------------------------------------------------------===//
1068// PowerPC specific intrinsic handlers.
1069//===----------------------------------------------------------------------===//
1070
1071// MTFSF, MTFSFI
1072template <bool isImm>
1073void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
1074 assert(args.size() == 2);
1075 llvm::SmallVector<mlir::Value> scalarArgs;
1076 for (const fir::ExtendedValue &arg : args)
1077 if (arg.getUnboxed())
1078 scalarArgs.emplace_back(fir::getBase(arg));
1079 else
1080 mlir::emitError(loc, "nonscalar intrinsic argument");
1081
1082 mlir::FunctionType libFuncType;
1083 mlir::func::FuncOp funcOp;
1084 if (isImm) {
1085 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>(
1086 builder.getContext(), builder);
1087 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType);
1088 } else {
1089 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>(
1090 builder.getContext(), builder);
1091 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType);
1092 }
1093 builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
1094}
1095
1096// VEC_ABS
1097fir::ExtendedValue
1098PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
1099 llvm::ArrayRef<fir::ExtendedValue> args) {
1100 assert(args.size() == 1);
1101 auto context{builder.getContext()};
1102 auto argBases{getBasesForArgs(args)};
1103 auto vTypeInfo{getVecTypeFromFir(argBases[0])};
1104
1105 mlir::func::FuncOp funcOp{nullptr};
1106 mlir::FunctionType ftype;
1107 llvm::StringRef fname{};
1108 if (vTypeInfo.isFloat()) {
1109 if (vTypeInfo.isFloat32()) {
1110 fname = "llvm.fabs.v4f32";
1111 ftype =
1112 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
1113 } else if (vTypeInfo.isFloat64()) {
1114 fname = "llvm.fabs.v2f64";
1115 ftype =
1116 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
1117 }
1118
1119 funcOp = builder.createFunction(loc, fname, ftype);
1120 auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
1121 return callOp.getResult(0);
1122 } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast<mlir::IntegerType>()) {
1123 // vec_abs(arg1) = max(0 - arg1, arg1)
1124
1125 auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
1126 auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
1127 // construct vector(0,..)
1128 auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
1129 auto vZero{
1130 builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
1131 auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};
1132
1133 mlir::func::FuncOp funcOp{nullptr};
1134 switch (eleTy.getWidth()) {
1135 case 8:
1136 fname = "llvm.ppc.altivec.vmaxsb";
1137 ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
1138 Ty::IntegerVector<1>>(context, builder);
1139 break;
1140 case 16:
1141 fname = "llvm.ppc.altivec.vmaxsh";
1142 ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
1143 Ty::IntegerVector<2>>(context, builder);
1144 break;
1145 case 32:
1146 fname = "llvm.ppc.altivec.vmaxsw";
1147 ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
1148 Ty::IntegerVector<4>>(context, builder);
1149 break;
1150 case 64:
1151 fname = "llvm.ppc.altivec.vmaxsd";
1152 ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
1153 Ty::IntegerVector<8>>(context, builder);
1154 break;
1155 default:
1156 llvm_unreachable("invalid integer size");
1157 }
1158 funcOp = builder.createFunction(loc, fname, ftype);
1159
1160 mlir::Value args[] = {zeroSubVarg1, varg1};
1161 auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
1162 return builder.createConvert(loc, argBases[0].getType(),
1163 callOp.getResult(0));
1164 }
1165
1166 llvm_unreachable("unknown vector type");
1167}
1168
1169// VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
1170template <VecOp vop>
1171fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
1172 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1173 assert(args.size() == 2);
1174 auto argBases{getBasesForArgs(args)};
1175 auto argsTy{getTypesForArgs(argBases)};
1176 assert(argsTy[0].isa<fir::VectorType>() && argsTy[1].isa<fir::VectorType>());
1177
1178 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1179
1180 const auto isInteger{vecTyInfo.eleTy.isa<mlir::IntegerType>()};
1181 const auto isFloat{vecTyInfo.eleTy.isa<mlir::FloatType>()};
1182 assert((isInteger || isFloat) && "unknown vector type");
1183
1184 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1185
1186 mlir::Value r{nullptr};
1187 switch (vop) {
1188 case VecOp::Add:
1189 if (isInteger)
1190 r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]);
1191 else if (isFloat)
1192 r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]);
1193 break;
1194 case VecOp::Mul:
1195 if (isInteger)
1196 r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]);
1197 else if (isFloat)
1198 r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]);
1199 break;
1200 case VecOp::Sub:
1201 if (isInteger)
1202 r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]);
1203 else if (isFloat)
1204 r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]);
1205 break;
1206 case VecOp::And:
1207 case VecOp::Xor: {
1208 mlir::Value arg1{nullptr};
1209 mlir::Value arg2{nullptr};
1210 if (isInteger) {
1211 arg1 = vargs[0];
1212 arg2 = vargs[1];
1213 } else if (isFloat) {
1214 // bitcast the arguments to integer
1215 auto wd{vecTyInfo.eleTy.dyn_cast<mlir::FloatType>().getWidth()};
1216 auto ftype{builder.getIntegerType(wd)};
1217 auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)};
1218 arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]);
1219 arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]);
1220 }
1221 if (vop == VecOp::And)
1222 r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2);
1223 else if (vop == VecOp::Xor)
1224 r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2);
1225
1226 if (isFloat)
1227 r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r);
1228
1229 break;
1230 }
1231 }
1232
1233 return builder.createConvert(loc, argsTy[0], r);
1234}
1235
1236// VEC_ANY_GE
1237template <VecOp vop>
1238fir::ExtendedValue
1239PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType,
1240 llvm::ArrayRef<fir::ExtendedValue> args) {
1241 assert(args.size() == 2);
1242 assert(vop == VecOp::Anyge && "unknown vector compare operation");
1243 auto argBases{getBasesForArgs(args)};
1244 VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])};
1245 [[maybe_unused]] const auto isSupportedTy{
1246 mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>(
1247 vTypeInfo.eleTy)};
1248 assert(isSupportedTy && "unsupported vector type");
1249
1250 // Constants for mapping CR6 bits to predicate result
1251 enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 };
1252
1253 auto context{builder.getContext()};
1254
1255 static std::map<std::pair<ParamTypeId, unsigned>,
1256 std::pair<llvm::StringRef, mlir::FunctionType>>
1257 uiBuiltin{
1258 {std::make_pair(ParamTypeId::IntegerVector, 8),
1259 std::make_pair(
1260 "llvm.ppc.altivec.vcmpgtsb.p",
1261 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>,
1262 Ty::IntegerVector<1>>(context, builder))},
1263 {std::make_pair(ParamTypeId::IntegerVector, 16),
1264 std::make_pair(
1265 "llvm.ppc.altivec.vcmpgtsh.p",
1266 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>,
1267 Ty::IntegerVector<2>>(context, builder))},
1268 {std::make_pair(ParamTypeId::IntegerVector, 32),
1269 std::make_pair(
1270 "llvm.ppc.altivec.vcmpgtsw.p",
1271 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>,
1272 Ty::IntegerVector<4>>(context, builder))},
1273 {std::make_pair(ParamTypeId::IntegerVector, 64),
1274 std::make_pair(
1275 "llvm.ppc.altivec.vcmpgtsd.p",
1276 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>,
1277 Ty::IntegerVector<8>>(context, builder))},
1278 {std::make_pair(ParamTypeId::UnsignedVector, 8),
1279 std::make_pair(
1280 "llvm.ppc.altivec.vcmpgtub.p",
1281 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1282 Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(
1283 context, builder))},
1284 {std::make_pair(ParamTypeId::UnsignedVector, 16),
1285 std::make_pair(
1286 "llvm.ppc.altivec.vcmpgtuh.p",
1287 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1288 Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(
1289 context, builder))},
1290 {std::make_pair(ParamTypeId::UnsignedVector, 32),
1291 std::make_pair(
1292 "llvm.ppc.altivec.vcmpgtuw.p",
1293 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1294 Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(
1295 context, builder))},
1296 {std::make_pair(ParamTypeId::UnsignedVector, 64),
1297 std::make_pair(
1298 "llvm.ppc.altivec.vcmpgtud.p",
1299 genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1300 Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(
1301 context, builder))},
1302 };
1303
1304 mlir::FunctionType ftype{nullptr};
1305 llvm::StringRef fname;
1306 const auto i32Ty{mlir::IntegerType::get(context, 32)};
1307 llvm::SmallVector<mlir::Value> cmpArgs;
1308 mlir::Value op{nullptr};
1309 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1310
1311 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
1312 std::pair<llvm::StringRef, mlir::FunctionType> bi;
1313 bi = (elementTy.isUnsignedInteger())
1314 ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)]
1315 : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)];
1316
1317 fname = std::get<0>(bi);
1318 ftype = std::get<1>(bi);
1319
1320 op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV);
1321 cmpArgs.emplace_back(op);
1322 // reverse the argument order
1323 cmpArgs.emplace_back(argBases[1]);
1324 cmpArgs.emplace_back(argBases[0]);
1325 } else if (vTypeInfo.isFloat()) {
1326 if (vTypeInfo.isFloat32()) {
1327 fname = "llvm.ppc.vsx.xvcmpgesp.p";
1328 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>,
1329 Ty::RealVector<4>>(context, builder);
1330 } else {
1331 fname = "llvm.ppc.vsx.xvcmpgedp.p";
1332 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>,
1333 Ty::RealVector<8>>(context, builder);
1334 }
1335 op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV);
1336 cmpArgs.emplace_back(op);
1337 cmpArgs.emplace_back(argBases[0]);
1338 cmpArgs.emplace_back(argBases[1]);
1339 }
1340 assert((!fname.empty() && ftype) && "invalid type");
1341
1342 mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)};
1343 auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)};
1344 return callOp.getResult(0);
1345}
1346
1347static std::pair<llvm::StringRef, mlir::FunctionType>
1348getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop,
1349 fir::FirOpBuilder &builder) {
1350 auto context{builder.getContext()};
1351 static std::map<std::pair<ParamTypeId, unsigned>,
1352 std::pair<llvm::StringRef, mlir::FunctionType>>
1353 iuBuiltinName{
1354 {std::make_pair(ParamTypeId::IntegerVector, 8),
1355 std::make_pair(
1356 "llvm.ppc.altivec.vcmpgtsb",
1357 genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>,
1358 Ty::IntegerVector<1>>(context, builder))},
1359 {std::make_pair(ParamTypeId::IntegerVector, 16),
1360 std::make_pair(
1361 "llvm.ppc.altivec.vcmpgtsh",
1362 genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>,
1363 Ty::IntegerVector<2>>(context, builder))},
1364 {std::make_pair(ParamTypeId::IntegerVector, 32),
1365 std::make_pair(
1366 "llvm.ppc.altivec.vcmpgtsw",
1367 genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>,
1368 Ty::IntegerVector<4>>(context, builder))},
1369 {std::make_pair(ParamTypeId::IntegerVector, 64),
1370 std::make_pair(
1371 "llvm.ppc.altivec.vcmpgtsd",
1372 genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>,
1373 Ty::IntegerVector<8>>(context, builder))},
1374 {std::make_pair(ParamTypeId::UnsignedVector, 8),
1375 std::make_pair(
1376 "llvm.ppc.altivec.vcmpgtub",
1377 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
1378 Ty::UnsignedVector<1>>(context, builder))},
1379 {std::make_pair(ParamTypeId::UnsignedVector, 16),
1380 std::make_pair(
1381 "llvm.ppc.altivec.vcmpgtuh",
1382 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
1383 Ty::UnsignedVector<2>>(context, builder))},
1384 {std::make_pair(ParamTypeId::UnsignedVector, 32),
1385 std::make_pair(
1386 "llvm.ppc.altivec.vcmpgtuw",
1387 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
1388 Ty::UnsignedVector<4>>(context, builder))},
1389 {std::make_pair(ParamTypeId::UnsignedVector, 64),
1390 std::make_pair(
1391 "llvm.ppc.altivec.vcmpgtud",
1392 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
1393 Ty::UnsignedVector<8>>(context, builder))}};
1394
1395 // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with
1396 // arguments revsered.
1397 enum class Cmp { gtOrLt, geOrLe };
1398 static std::map<std::pair<Cmp, int>,
1399 std::pair<llvm::StringRef, mlir::FunctionType>>
1400 rGBI{{std::make_pair(Cmp::geOrLe, 32),
1401 std::make_pair("llvm.ppc.vsx.xvcmpgesp",
1402 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1403 Ty::RealVector<4>>(context, builder))},
1404 {std::make_pair(Cmp::geOrLe, 64),
1405 std::make_pair("llvm.ppc.vsx.xvcmpgedp",
1406 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1407 Ty::RealVector<8>>(context, builder))},
1408 {std::make_pair(Cmp::gtOrLt, 32),
1409 std::make_pair("llvm.ppc.vsx.xvcmpgtsp",
1410 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1411 Ty::RealVector<4>>(context, builder))},
1412 {std::make_pair(Cmp::gtOrLt, 64),
1413 std::make_pair("llvm.ppc.vsx.xvcmpgtdp",
1414 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1415 Ty::RealVector<8>>(context, builder))}};
1416
1417 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1418 std::pair<llvm::StringRef, mlir::FunctionType> specFunc;
1419 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy))
1420 specFunc =
1421 (elementTy.isUnsignedInteger())
1422 ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)]
1423 : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)];
1424 else if (vTypeInfo.isFloat())
1425 specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple)
1426 ? rGBI[std::make_pair(Cmp::geOrLe, width)]
1427 : rGBI[std::make_pair(Cmp::gtOrLt, width)];
1428
1429 assert(!std::get<0>(specFunc).empty() && "unknown builtin name");
1430 assert(std::get<1>(specFunc) && "unknown function type");
1431 return specFunc;
1432}
1433
1434// VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT
1435template <VecOp vop>
1436fir::ExtendedValue
1437PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType,
1438 llvm::ArrayRef<fir::ExtendedValue> args) {
1439 assert(args.size() == 2);
1440 auto context{builder.getContext()};
1441 auto argBases{getBasesForArgs(args)};
1442 VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])};
1443 auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1444
1445 std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{
1446 getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)};
1447
1448 mlir::func::FuncOp funcOp = builder.createFunction(
1449 loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam));
1450
1451 mlir::Value res{nullptr};
1452
1453 if (auto eTy = vecTyInfo.eleTy.dyn_cast<mlir::IntegerType>()) {
1454 constexpr int firstArg{0};
1455 constexpr int secondArg{1};
1456 std::map<VecOp, std::array<int, 2>> argOrder{
1457 {VecOp::Cmpge, {secondArg, firstArg}},
1458 {VecOp::Cmple, {firstArg, secondArg}},
1459 {VecOp::Cmpgt, {firstArg, secondArg}},
1460 {VecOp::Cmplt, {secondArg, firstArg}}};
1461
1462 // Construct the function return type, unsigned vector, for conversion.
1463 auto itype = mlir::IntegerType::get(context, eTy.getWidth(),
1464 mlir::IntegerType::Unsigned);
1465 auto returnType = fir::VectorType::get(vecTyInfo.len, itype);
1466
1467 switch (vop) {
1468 case VecOp::Cmpgt:
1469 case VecOp::Cmplt: {
1470 // arg1 > arg2 --> vcmpgt(arg1, arg2)
1471 // arg1 < arg2 --> vcmpgt(arg2, arg1)
1472 mlir::Value vargs[]{argBases[argOrder[vop][0]],
1473 argBases[argOrder[vop][1]]};
1474 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1475 res = callOp.getResult(0);
1476 break;
1477 }
1478 case VecOp::Cmpge:
1479 case VecOp::Cmple: {
1480 // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1)
1481 // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1)
1482 mlir::Value vargs[]{argBases[argOrder[vop][0]],
1483 argBases[argOrder[vop][1]]};
1484
1485 // Construct a constant vector(-1)
1486 auto negOneVal{builder.createIntegerConstant(
1487 loc, getConvertedElementType(context, eTy), -1)};
1488 auto vNegOne{builder.create<mlir::vector::BroadcastOp>(
1489 loc, vecTyInfo.toMlirVectorType(context), negOneVal)};
1490
1491 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1492 mlir::Value callRes{callOp.getResult(0)};
1493 auto vargs2{
1494 convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})};
1495 auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)};
1496
1497 res = builder.createConvert(loc, returnType, xorRes);
1498 break;
1499 }
1500 default:
1501 llvm_unreachable("Invalid vector operation for generator");
1502 }
1503 } else if (vecTyInfo.isFloat()) {
1504 mlir::Value vargs[2];
1505 switch (vop) {
1506 case VecOp::Cmpge:
1507 case VecOp::Cmpgt:
1508 vargs[0] = argBases[0];
1509 vargs[1] = argBases[1];
1510 break;
1511 case VecOp::Cmple:
1512 case VecOp::Cmplt:
1513 // Swap the arguments as xvcmpg[et] is used
1514 vargs[0] = argBases[1];
1515 vargs[1] = argBases[0];
1516 break;
1517 default:
1518 llvm_unreachable("Invalid vector operation for generator");
1519 }
1520 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1521 res = callOp.getResult(0);
1522 } else
1523 llvm_unreachable("invalid vector type");
1524
1525 return res;
1526}
1527
1528static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder,
1529 mlir::Location loc,
1530 mlir::Value arg) {
1531 auto ty = arg.getType();
1532 auto context{builder.getContext()};
1533 auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1534
1535 if (ty != vtype)
1536 arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult();
1537
1538 llvm::SmallVector<int64_t, 16> mask{4, 5, 6, 7, 0, 1, 2, 3,
1539 12, 13, 14, 15, 8, 9, 10, 11};
1540 arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask);
1541 if (ty != vtype)
1542 arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg);
1543 return arg;
1544}
1545
1546// VEC_CONVERT, VEC_CTF, VEC_CVF
1547template <VecOp vop>
1548fir::ExtendedValue
1549PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
1550 llvm::ArrayRef<fir::ExtendedValue> args) {
1551 auto context{builder.getContext()};
1552 auto argBases{getBasesForArgs(args)};
1553 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1554 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1555 auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])};
1556 const auto i32Ty{mlir::IntegerType::get(context, 32)};
1557
1558 switch (vop) {
1559 case VecOp::Ctf: {
1560 assert(args.size() == 2);
1561 auto convArg{builder.createConvert(loc, i32Ty, argBases[1])};
1562 auto eTy{vecTyInfo.eleTy.dyn_cast<mlir::IntegerType>()};
1563 assert(eTy && "Unsupported vector type");
1564 const auto isUnsigned{eTy.isUnsignedInteger()};
1565 const auto width{eTy.getWidth()};
1566
1567 if (width == 32) {
1568 auto ftype{(isUnsigned)
1569 ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>,
1570 Ty::Integer<4>>(context, builder)
1571 : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>,
1572 Ty::Integer<4>>(context, builder)};
1573 const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux"
1574 : "llvm.ppc.altivec.vcfsx"};
1575 auto funcOp{builder.createFunction(loc, fname, ftype)};
1576 mlir::Value newArgs[] = {argBases[0], convArg};
1577 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1578
1579 return callOp.getResult(0);
1580 } else if (width == 64) {
1581 auto fTy{mlir::FloatType::getF64(context)};
1582 auto ty{mlir::VectorType::get(2, fTy)};
1583
1584 // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1))
1585 auto convOp{(isUnsigned)
1586 ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1)
1587 : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)};
1588
1589 // construct vector<1./(1<<arg1), 1.0/(1<<arg1)>
1590 auto constInt{
1591 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp())
1592 .getValue()
1593 .dyn_cast_or_null<mlir::IntegerAttr>()};
1594 assert(constInt && "expected integer constant argument");
1595 double f{1.0 / (1 << constInt.getInt())};
1596 llvm::SmallVector<double> vals{f, f};
1597 auto constOp{builder.create<mlir::arith::ConstantOp>(
1598 loc, ty, builder.getF64VectorAttr(vals))};
1599
1600 auto mulOp{builder.create<mlir::LLVM::FMulOp>(
1601 loc, ty, convOp->getResult(0), constOp)};
1602
1603 return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp);
1604 }
1605 llvm_unreachable("invalid element integer kind");
1606 }
1607 case VecOp::Convert: {
1608 assert(args.size() == 2);
1609 // resultType has mold type (if scalar) or element type (if array)
1610 auto resTyInfo{getVecTypeFromFirType(resultType)};
1611 auto moldTy{resTyInfo.toMlirVectorType(context)};
1612 auto firTy{resTyInfo.toFirVectorType()};
1613
1614 // vec_convert(v, mold) = bitcast v to "type of mold"
1615 auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)};
1616
1617 return builder.createConvert(loc, firTy, conv);
1618 }
1619 case VecOp::Cvf: {
1620 assert(args.size() == 1);
1621
1622 mlir::Value newArgs[]{vArg1};
1623 if (vecTyInfo.isFloat32()) {
1624 if (changeVecElemOrder())
1625 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1626
1627 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
1628 auto ftype{
1629 genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)};
1630 auto funcOp{builder.createFunction(loc, fname, ftype)};
1631 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1632
1633 return callOp.getResult(0);
1634 } else if (vecTyInfo.isFloat64()) {
1635 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"};
1636 auto ftype{
1637 genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)};
1638 auto funcOp{builder.createFunction(loc, fname, ftype)};
1639 newArgs[0] =
1640 builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0);
1641 auto fvf32Ty{newArgs[0].getType()};
1642 auto f32type{mlir::FloatType::getF32(context)};
1643 auto mvf32Ty{mlir::VectorType::get(4, f32type)};
1644 newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
1645
1646 if (changeVecElemOrder())
1647 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1648
1649 return builder.createConvert(loc, fvf32Ty, newArgs[0]);
1650 }
1651 llvm_unreachable("invalid element integer kind");
1652 }
1653 default:
1654 llvm_unreachable("Invalid vector operation for generator");
1655 }
1656}
1657
1658static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder,
1659 mlir::Location loc,
1660 VecTypeInfo vecInfo,
1661 mlir::Value idx) {
1662 mlir::Value numSub1{
1663 builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)};
1664 return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx);
1665}
1666
1667// VEC_EXTRACT
1668fir::ExtendedValue
1669PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType,
1670 llvm::ArrayRef<fir::ExtendedValue> args) {
1671 assert(args.size() == 2);
1672 auto argBases{getBasesForArgs(args)};
1673 auto argTypes{getTypesForArgs(argBases)};
1674 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1675
1676 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1677 auto varg0{builder.createConvert(loc, mlirTy, argBases[0])};
1678
1679 // arg2 modulo the number of elements in arg1 to determine the element
1680 // position
1681 auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)};
1682 mlir::Value uremOp{
1683 builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)};
1684
1685 if (!isNativeVecElemOrderOnLE())
1686 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1687
1688 return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp);
1689}
1690
1691// VEC_INSERT
1692fir::ExtendedValue
1693PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType,
1694 llvm::ArrayRef<fir::ExtendedValue> args) {
1695 assert(args.size() == 3);
1696 auto argBases{getBasesForArgs(args)};
1697 auto argTypes{getTypesForArgs(argBases)};
1698 auto vecTyInfo{getVecTypeFromFir(argBases[1])};
1699 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1700 auto varg1{builder.createConvert(loc, mlirTy, argBases[1])};
1701
1702 auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)};
1703 mlir::Value uremOp{
1704 builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)};
1705
1706 if (!isNativeVecElemOrderOnLE())
1707 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1708
1709 auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0],
1710 varg1, uremOp)};
1711 return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res);
1712}
1713
1714// VEC_MERGEH, VEC_MERGEL
1715template <VecOp vop>
1716fir::ExtendedValue
1717PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
1718 llvm::ArrayRef<fir::ExtendedValue> args) {
1719 assert(args.size() == 2);
1720 auto argBases{getBasesForArgs(args)};
1721 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1722 llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask
1723 llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask
1724
1725 switch (vop) {
1726 case VecOp::Mergeh: {
1727 switch (vecTyInfo.len) {
1728 case 2: {
1729 enum { V1 = 0, V2 = 2 };
1730 mMask = {V1 + 0, V2 + 0};
1731 rMask = {V2 + 1, V1 + 1};
1732 break;
1733 }
1734 case 4: {
1735 enum { V1 = 0, V2 = 4 };
1736 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1};
1737 rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1738 break;
1739 }
1740 case 8: {
1741 enum { V1 = 0, V2 = 8 };
1742 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1743 rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7};
1744 break;
1745 }
1746 case 16:
1747 mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13,
1748 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17};
1749 rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B,
1750 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F};
1751 break;
1752 default:
1753 llvm_unreachable("unexpected vector length");
1754 }
1755 break;
1756 }
1757 case VecOp::Mergel: {
1758 switch (vecTyInfo.len) {
1759 case 2: {
1760 enum { V1 = 0, V2 = 2 };
1761 mMask = {V1 + 1, V2 + 1};
1762 rMask = {V2 + 0, V1 + 0};
1763 break;
1764 }
1765 case 4: {
1766 enum { V1 = 0, V2 = 4 };
1767 mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1768 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1};
1769 break;
1770 }
1771 case 8: {
1772 enum { V1 = 0, V2 = 8 };
1773 mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7};
1774 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1775 break;
1776 }
1777 case 16:
1778 mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B,
1779 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F};
1780 rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03,
1781 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07};
1782 break;
1783 default:
1784 llvm_unreachable("unexpected vector length");
1785 }
1786 break;
1787 }
1788 default:
1789 llvm_unreachable("invalid vector operation for generator");
1790 }
1791
1792 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1793
1794 llvm::SmallVector<int64_t, 16> &mergeMask =
1795 (isBEVecElemOrderOnLE()) ? rMask : mMask;
1796
1797 auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1],
1798 mergeMask)};
1799 return builder.createConvert(loc, resultType, callOp);
1800}
1801
1802static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
1803 mlir::Location loc, mlir::Value baseAddr,
1804 mlir::Value offset) {
1805 auto typeExtent{fir::SequenceType::getUnknownExtent()};
1806 // Construct an !fir.ref<!ref.array<?xi8>> type
1807 auto arrRefTy{builder.getRefType(fir::SequenceType::get(
1808 {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
1809 // Convert arg to !fir.ref<!ref.array<?xi8>>
1810 auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
1811
1812 return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
1813}
1814
1815static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
1816 mlir::Location loc, mlir::Value v,
1817 int64_t len) {
1818 assert(v.getType().isa<mlir::VectorType>());
1819 assert(len > 0);
1820 llvm::SmallVector<int64_t, 16> mask;
1821 for (int64_t i = 0; i < len; ++i) {
1822 mask.push_back(Elt: len - 1 - i);
1823 }
1824 auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
1825 return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
1826}
1827
1828static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
1829 const int val) {
1830 auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
1831 auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
1832 return builder.getNamedAttr("alignment", alignAttr);
1833}
1834
1835fir::ExtendedValue
1836PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
1837 llvm::ArrayRef<fir::ExtendedValue> args) {
1838 VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
1839 switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
1840 case 8:
1841 // vec_xlb1
1842 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1843 case 16:
1844 // vec_xlh8
1845 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1846 case 32:
1847 // vec_xlw4
1848 return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
1849 case 64:
1850 // vec_xld2
1851 return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
1852 default:
1853 llvm_unreachable("invalid kind");
1854 }
1855 llvm_unreachable("invalid vector operation for generator");
1856}
1857
1858template <VecOp vop>
1859fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
1860 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1861 assert(args.size() == 2);
1862 auto arg0{getBase(args[0])};
1863 auto arg1{getBase(args[1])};
1864
1865 auto vecTyInfo{getVecTypeFromFirType(resultType)};
1866 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1867 auto firTy{vecTyInfo.toFirVectorType()};
1868
1869 // Add the %val of arg0 to %addr of arg1
1870 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1871
1872 const auto triple{fir::getTargetTriple(builder.getModule())};
1873 // Need to get align 1.
1874 auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
1875 getAlignmentAttr(builder, 1))};
1876 if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
1877 (vop == VecOp::Xlbe && triple.isLittleEndian()))
1878 return builder.createConvert(
1879 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
1880
1881 return builder.createConvert(loc, firTy, result);
1882}
1883
1884// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
1885template <VecOp vop>
1886fir::ExtendedValue
1887PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
1888 llvm::ArrayRef<fir::ExtendedValue> args) {
1889 assert(args.size() == 2);
1890 auto context{builder.getContext()};
1891 auto arg0{getBase(args[0])};
1892 auto arg1{getBase(args[1])};
1893
1894 // Prepare the return type in FIR.
1895 auto vecResTyInfo{getVecTypeFromFirType(resultType)};
1896 auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
1897 auto firTy{vecResTyInfo.toFirVectorType()};
1898
1899 // llvm.ppc.altivec.lvx* returns <4xi32>
1900 // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
1901 const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
1902 const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
1903
1904 // For vec_ld, need to convert arg0 from i64 to i32
1905 if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
1906 arg0 = builder.createConvert(loc, i32Ty, arg0);
1907
1908 // Add the %val of arg0 to %addr of arg1
1909 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1910 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
1911
1912 mlir::Type intrinResTy{nullptr};
1913 llvm::StringRef fname{};
1914 switch (vop) {
1915 case VecOp::Ld:
1916 fname = "llvm.ppc.altivec.lvx";
1917 intrinResTy = mVecI32Ty;
1918 break;
1919 case VecOp::Lde:
1920 switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
1921 case 8:
1922 fname = "llvm.ppc.altivec.lvebx";
1923 intrinResTy = mlirTy;
1924 break;
1925 case 16:
1926 fname = "llvm.ppc.altivec.lvehx";
1927 intrinResTy = mlirTy;
1928 break;
1929 case 32:
1930 fname = "llvm.ppc.altivec.lvewx";
1931 if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
1932 intrinResTy = mlirTy;
1933 else
1934 intrinResTy = mVecI32Ty;
1935 break;
1936 default:
1937 llvm_unreachable("invalid vector for vec_lde");
1938 }
1939 break;
1940 case VecOp::Ldl:
1941 fname = "llvm.ppc.altivec.lvxl";
1942 intrinResTy = mVecI32Ty;
1943 break;
1944 case VecOp::Lxvp:
1945 fname = "llvm.ppc.vsx.lxvp";
1946 intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
1947 break;
1948 case VecOp::Xld2: {
1949 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
1950 : "llvm.ppc.vsx.lxvd2x";
1951 // llvm.ppc.altivec.lxvd2x* returns <2 x double>
1952 intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context));
1953 } break;
1954 case VecOp::Xlw4:
1955 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
1956 : "llvm.ppc.vsx.lxvw4x";
1957 // llvm.ppc.altivec.lxvw4x* returns <4xi32>
1958 intrinResTy = mVecI32Ty;
1959 break;
1960 default:
1961 llvm_unreachable("invalid vector operation for generator");
1962 }
1963
1964 auto funcType{
1965 mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
1966 auto funcOp{builder.createFunction(loc, fname, funcType)};
1967 auto result{
1968 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
1969
1970 if (vop == VecOp::Lxvp)
1971 return result;
1972
1973 if (intrinResTy != mlirTy)
1974 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
1975
1976 if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
1977 return builder.createConvert(
1978 loc, firTy,
1979 reverseVectorElements(builder, loc, result, vecResTyInfo.len));
1980
1981 return builder.createConvert(loc, firTy, result);
1982}
1983
1984// VEC_LVSL, VEC_LVSR
1985template <VecOp vop>
1986fir::ExtendedValue
1987PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
1988 llvm::ArrayRef<fir::ExtendedValue> args) {
1989 assert(args.size() == 2);
1990 auto context{builder.getContext()};
1991 auto arg0{getBase(args[0])};
1992 auto arg1{getBase(args[1])};
1993
1994 auto vecTyInfo{getVecTypeFromFirType(resultType)};
1995 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1996 auto firTy{vecTyInfo.toFirVectorType()};
1997
1998 // Convert arg0 to i64 type if needed
1999 auto i64ty{mlir::IntegerType::get(context, 64)};
2000 if (arg0.getType() != i64ty)
2001 arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
2002
2003 // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
2004 // upper 56 bit while preserving sign
2005 auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
2006 auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
2007 auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
2008
2009 // Add the offsetArg to %addr of arg1
2010 auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
2011 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
2012
2013 llvm::StringRef fname{};
2014 switch (vop) {
2015 case VecOp::Lvsl:
2016 fname = "llvm.ppc.altivec.lvsl";
2017 break;
2018 case VecOp::Lvsr:
2019 fname = "llvm.ppc.altivec.lvsr";
2020 break;
2021 default:
2022 llvm_unreachable("invalid vector operation for generator");
2023 }
2024 auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
2025 auto funcOp{builder.createFunction(loc, fname, funcType)};
2026 auto result{
2027 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
2028
2029 if (isNativeVecElemOrderOnLE())
2030 return builder.createConvert(
2031 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
2032
2033 return builder.createConvert(loc, firTy, result);
2034}
2035
2036// VEC_NMADD, VEC_MSUB
2037template <VecOp vop>
2038fir::ExtendedValue
2039PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
2040 llvm::ArrayRef<fir::ExtendedValue> args) {
2041 assert(args.size() == 3);
2042 auto context{builder.getContext()};
2043 auto argBases{getBasesForArgs(args)};
2044 auto vTypeInfo{getVecTypeFromFir(argBases[0])};
2045 auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
2046 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
2047
2048 static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
2049 {32,
2050 std::make_pair(
2051 "llvm.fma.v4f32",
2052 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
2053 context, builder))},
2054 {64,
2055 std::make_pair(
2056 "llvm.fma.v2f64",
2057 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
2058 context, builder))}};
2059
2060 auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]),
2061 std::get<1>(fmaMap[width]))};
2062 if (vop == VecOp::Nmadd) {
2063 // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
2064 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2065
2066 // We need to convert fir.vector to MLIR vector to use fneg and then back
2067 // to fir.vector to store.
2068 auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
2069 callOp.getResult(0))};
2070 auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
2071 return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
2072 } else if (vop == VecOp::Msub) {
2073 // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
2074 newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);
2075
2076 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2077 return callOp.getResult(0);
2078 }
2079 llvm_unreachable("Invalid vector operation for generator");
2080}
2081
2082// VEC_PERM, VEC_PERMI
2083template <VecOp vop>
2084fir::ExtendedValue
2085PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType,
2086 llvm::ArrayRef<fir::ExtendedValue> args) {
2087 assert(args.size() == 3);
2088 auto context{builder.getContext()};
2089 auto argBases{getBasesForArgs(args)};
2090 auto argTypes{getTypesForArgs(argBases)};
2091 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2092 auto mlirTy{vecTyInfo.toMlirVectorType(context)};
2093
2094 auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))};
2095 auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))};
2096
2097 auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])};
2098 auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])};
2099
2100 switch (vop) {
2101 case VecOp::Perm: {
2102 VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])};
2103 auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)};
2104 auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])};
2105
2106 if (mlirTy != vi32Ty) {
2107 mArg0 =
2108 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult();
2109 mArg1 =
2110 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult();
2111 }
2112
2113 auto funcOp{builder.createFunction(
2114 loc, "llvm.ppc.altivec.vperm",
2115 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2116 Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context,
2117 builder))};
2118
2119 llvm::SmallVector<mlir::Value> newArgs;
2120 if (isNativeVecElemOrderOnLE()) {
2121 auto i8Ty{mlir::IntegerType::get(context, 8)};
2122 auto v8Ty{mlir::VectorType::get(16, i8Ty)};
2123 auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2124 auto vNegOne{
2125 builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)};
2126
2127 mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne);
2128 newArgs = {mArg1, mArg0, mMask};
2129 } else {
2130 newArgs = {mArg0, mArg1, mMask};
2131 }
2132
2133 auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)};
2134
2135 if (res.getType() != argTypes[0]) {
2136 // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32>
2137 // convert the result back to the original type
2138 res = builder.createConvert(loc, vi32Ty, res);
2139 if (mlirTy != vi32Ty)
2140 res =
2141 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult();
2142 }
2143 return builder.createConvert(loc, resultType, res);
2144 }
2145 case VecOp::Permi: {
2146 // arg3 is a constant
2147 auto constIntOp{
2148 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2149 .getValue()
2150 .dyn_cast_or_null<mlir::IntegerAttr>()};
2151 assert(constIntOp && "expected integer constant argument");
2152 auto constInt{constIntOp.getInt()};
2153 // arg1, arg2, and result type share same VecTypeInfo
2154 if (vecTyInfo.isFloat()) {
2155 mArg0 =
2156 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult();
2157 mArg1 =
2158 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult();
2159 }
2160
2161 llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask
2162 llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask
2163 enum { V1 = 0, V2 = 2 };
2164 switch (constInt) {
2165 case 0:
2166 nMask = {V1 + 0, V2 + 0};
2167 rMask = {V2 + 1, V1 + 1};
2168 break;
2169 case 1:
2170 nMask = {V1 + 0, V2 + 1};
2171 rMask = {V2 + 0, V1 + 1};
2172 break;
2173 case 2:
2174 nMask = {V1 + 1, V2 + 0};
2175 rMask = {V2 + 1, V1 + 0};
2176 break;
2177 case 3:
2178 nMask = {V1 + 1, V2 + 1};
2179 rMask = {V2 + 0, V1 + 0};
2180 break;
2181 default:
2182 llvm_unreachable("unexpected arg3 value for vec_permi");
2183 }
2184
2185 llvm::SmallVector<int64_t, 2> mask =
2186 (isBEVecElemOrderOnLE()) ? rMask : nMask;
2187 auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)};
2188 if (res.getType() != mlirTy) {
2189 auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)};
2190 return builder.createConvert(loc, resultType, cast);
2191 }
2192 return builder.createConvert(loc, resultType, res);
2193 }
2194 default:
2195 llvm_unreachable("invalid vector operation for generator");
2196 }
2197}
2198
2199// VEC_SEL
2200fir::ExtendedValue
2201PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
2202 llvm::ArrayRef<fir::ExtendedValue> args) {
2203 assert(args.size() == 3);
2204 auto argBases{getBasesForArgs(args)};
2205 llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
2206 for (size_t i = 0; i < argBases.size(); i++) {
2207 vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
2208 }
2209 auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};
2210
2211 auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
2212 auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2213
2214 // construct a constant <16 x i8> vector with value -1 for bitcast
2215 auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
2216 auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};
2217
2218 // bitcast arguments to bcVecTy
2219 auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
2220 auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
2221 auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};
2222
2223 // vec_sel(arg1, arg2, arg3) =
2224 // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
2225 auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
2226 auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
2227 auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
2228 auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};
2229
2230 auto bcRes{
2231 builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};
2232
2233 return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
2234}
2235
2236// VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
2237template <VecOp vop>
2238fir::ExtendedValue
2239PPCIntrinsicLibrary::genVecShift(mlir::Type resultType,
2240 llvm::ArrayRef<fir::ExtendedValue> args) {
2241 auto context{builder.getContext()};
2242 auto argBases{getBasesForArgs(args)};
2243 auto argTypes{getTypesForArgs(argBases)};
2244
2245 llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs;
2246 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0]));
2247 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1]));
2248
2249 // Convert the first two arguments to MLIR vectors
2250 llvm::SmallVector<mlir::Type, 2> mlirTyArgs;
2251 mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context));
2252 mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context));
2253
2254 llvm::SmallVector<mlir::Value, 2> mlirVecArgs;
2255 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0]));
2256 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1]));
2257
2258 mlir::Value shftRes{nullptr};
2259
2260 if (vop == VecOp::Sl || vop == VecOp::Sr) {
2261 assert(args.size() == 2);
2262 // Construct the mask
2263 auto width{
2264 mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()};
2265 auto vecVal{builder.createIntegerConstant(
2266 loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)};
2267 auto mask{
2268 builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)};
2269 auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)};
2270
2271 mlir::Value res{nullptr};
2272 if (vop == VecOp::Sr)
2273 res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft);
2274 else if (vop == VecOp::Sl)
2275 res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft);
2276
2277 shftRes = builder.createConvert(loc, argTypes[0], res);
2278 } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl ||
2279 vop == VecOp::Sro) {
2280 assert(args.size() == 2);
2281
2282 // Bitcast to vector<4xi32>
2283 auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))};
2284 if (mlirTyArgs[0] != bcVecTy)
2285 mlirVecArgs[0] =
2286 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]);
2287 if (mlirTyArgs[1] != bcVecTy)
2288 mlirVecArgs[1] =
2289 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]);
2290
2291 llvm::StringRef funcName;
2292 switch (vop) {
2293 case VecOp::Srl:
2294 funcName = "llvm.ppc.altivec.vsr";
2295 break;
2296 case VecOp::Sro:
2297 funcName = "llvm.ppc.altivec.vsro";
2298 break;
2299 case VecOp::Sll:
2300 funcName = "llvm.ppc.altivec.vsl";
2301 break;
2302 case VecOp::Slo:
2303 funcName = "llvm.ppc.altivec.vslo";
2304 break;
2305 default:
2306 llvm_unreachable("unknown vector shift operation");
2307 }
2308 auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2309 Ty::IntegerVector<4>>(context, builder)};
2310 mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)};
2311 auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)};
2312
2313 // If the result vector type is different from the original type, need
2314 // to convert to mlir vector, bitcast and then convert back to fir vector.
2315 if (callOp.getResult(0).getType() != argTypes[0]) {
2316 auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0));
2317 res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res);
2318 shftRes = builder.createConvert(loc, argTypes[0], res);
2319 } else {
2320 shftRes = callOp.getResult(0);
2321 }
2322 } else if (vop == VecOp::Sld || vop == VecOp::Sldw) {
2323 assert(args.size() == 3);
2324 auto constIntOp =
2325 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2326 .getValue()
2327 .dyn_cast_or_null<mlir::IntegerAttr>();
2328 assert(constIntOp && "expected integer constant argument");
2329
2330 // Bitcast to vector<16xi8>
2331 auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))};
2332 if (mlirTyArgs[0] != vi8Ty) {
2333 mlirVecArgs[0] =
2334 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0])
2335 .getResult();
2336 mlirVecArgs[1] =
2337 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1])
2338 .getResult();
2339 }
2340
2341 // Construct the mask for shuffling
2342 auto shiftVal{constIntOp.getInt()};
2343 if (vop == VecOp::Sldw)
2344 shiftVal = shiftVal << 2;
2345 shiftVal &= 0xF;
2346 llvm::SmallVector<int64_t, 16> mask;
2347 // Shuffle with mask based on the endianness
2348 const auto triple{fir::getTargetTriple(builder.getModule())};
2349 if (triple.isLittleEndian()) {
2350 for (int i = 16; i < 32; ++i)
2351 mask.push_back(i - shiftVal);
2352 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1],
2353 mlirVecArgs[0], mask);
2354 } else {
2355 for (int i = 0; i < 16; ++i)
2356 mask.push_back(i + shiftVal);
2357 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0],
2358 mlirVecArgs[1], mask);
2359 }
2360
2361 // Bitcast to the original type
2362 if (shftRes.getType() != mlirTyArgs[0])
2363 shftRes =
2364 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes);
2365
2366 return builder.createConvert(loc, resultType, shftRes);
2367 } else
2368 llvm_unreachable("Invalid vector operation for generator");
2369
2370 return shftRes;
2371}
2372
2373// VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32
2374template <VecOp vop>
2375fir::ExtendedValue
2376PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
2377 llvm::ArrayRef<fir::ExtendedValue> args) {
2378 auto context{builder.getContext()};
2379 auto argBases{getBasesForArgs(args)};
2380
2381 mlir::vector::SplatOp splatOp{nullptr};
2382 mlir::Type retTy{nullptr};
2383 switch (vop) {
2384 case VecOp::Splat: {
2385 assert(args.size() == 2);
2386 auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2387
2388 auto extractOp{genVecExtract(resultType, args)};
2389 splatOp = builder.create<mlir::vector::SplatOp>(
2390 loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context));
2391 retTy = vecTyInfo.toFirVectorType();
2392 break;
2393 }
2394 case VecOp::Splats: {
2395 assert(args.size() == 1);
2396 auto vecTyInfo{getVecTypeFromEle(argBases[0])};
2397
2398 splatOp = builder.create<mlir::vector::SplatOp>(
2399 loc, argBases[0], vecTyInfo.toMlirVectorType(context));
2400 retTy = vecTyInfo.toFirVectorType();
2401 break;
2402 }
2403 case VecOp::Splat_s32: {
2404 assert(args.size() == 1);
2405 auto eleTy{builder.getIntegerType(32)};
2406 auto intOp{builder.createConvert(loc, eleTy, argBases[0])};
2407
2408 // the intrinsic always returns vector(integer(4))
2409 splatOp = builder.create<mlir::vector::SplatOp>(
2410 loc, intOp, mlir::VectorType::get(4, eleTy));
2411 retTy = fir::VectorType::get(4, eleTy);
2412 break;
2413 }
2414 default:
2415 llvm_unreachable("invalid vector operation for generator");
2416 }
2417 return builder.createConvert(loc, retTy, splatOp);
2418}
2419
2420fir::ExtendedValue
2421PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
2422 llvm::ArrayRef<fir::ExtendedValue> args) {
2423 assert(args.size() == 2);
2424 auto arg0{getBase(args[0])};
2425 auto arg1{getBase(args[1])};
2426
2427 // Prepare the return type in FIR.
2428 auto vecTyInfo{getVecTypeFromFirType(resultType)};
2429 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
2430 auto firTy{vecTyInfo.toFirVectorType()};
2431
2432 // Add the %val of arg0 to %addr of arg1
2433 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
2434
2435 auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
2436 auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
2437 auto i64RefTy{builder.getRefType(i64Ty)};
2438 auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
2439
2440 auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
2441 auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
2442
2443 mlir::Value result{nullptr};
2444 if (mlirTy != splatRes.getType()) {
2445 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
2446 } else
2447 result = splatRes;
2448
2449 return builder.createConvert(loc, firTy, result);
2450}
2451
2452const char *getMmaIrIntrName(MMAOp mmaOp) {
2453 switch (mmaOp) {
2454 case MMAOp::AssembleAcc:
2455 return "llvm.ppc.mma.assemble.acc";
2456 case MMAOp::AssemblePair:
2457 return "llvm.ppc.vsx.assemble.pair";
2458 case MMAOp::DisassembleAcc:
2459 return "llvm.ppc.mma.disassemble.acc";
2460 case MMAOp::DisassemblePair:
2461 return "llvm.ppc.vsx.disassemble.pair";
2462 case MMAOp::Xxmfacc:
2463 return "llvm.ppc.mma.xxmfacc";
2464 case MMAOp::Xxmtacc:
2465 return "llvm.ppc.mma.xxmtacc";
2466 case MMAOp::Xxsetaccz:
2467 return "llvm.ppc.mma.xxsetaccz";
2468 case MMAOp::Pmxvbf16ger2:
2469 return "llvm.ppc.mma.pmxvbf16ger2";
2470 case MMAOp::Pmxvbf16ger2nn:
2471 return "llvm.ppc.mma.pmxvbf16ger2nn";
2472 case MMAOp::Pmxvbf16ger2np:
2473 return "llvm.ppc.mma.pmxvbf16ger2np";
2474 case MMAOp::Pmxvbf16ger2pn:
2475 return "llvm.ppc.mma.pmxvbf16ger2pn";
2476 case MMAOp::Pmxvbf16ger2pp:
2477 return "llvm.ppc.mma.pmxvbf16ger2pp";
2478 case MMAOp::Pmxvf16ger2:
2479 return "llvm.ppc.mma.pmxvf16ger2";
2480 case MMAOp::Pmxvf16ger2nn:
2481 return "llvm.ppc.mma.pmxvf16ger2nn";
2482 case MMAOp::Pmxvf16ger2np:
2483 return "llvm.ppc.mma.pmxvf16ger2np";
2484 case MMAOp::Pmxvf16ger2pn:
2485 return "llvm.ppc.mma.pmxvf16ger2pn";
2486 case MMAOp::Pmxvf16ger2pp:
2487 return "llvm.ppc.mma.pmxvf16ger2pp";
2488 case MMAOp::Pmxvf32ger:
2489 return "llvm.ppc.mma.pmxvf32ger";
2490 case MMAOp::Pmxvf32gernn:
2491 return "llvm.ppc.mma.pmxvf32gernn";
2492 case MMAOp::Pmxvf32gernp:
2493 return "llvm.ppc.mma.pmxvf32gernp";
2494 case MMAOp::Pmxvf32gerpn:
2495 return "llvm.ppc.mma.pmxvf32gerpn";
2496 case MMAOp::Pmxvf32gerpp:
2497 return "llvm.ppc.mma.pmxvf32gerpp";
2498 case MMAOp::Pmxvf64ger:
2499 return "llvm.ppc.mma.pmxvf64ger";
2500 case MMAOp::Pmxvf64gernn:
2501 return "llvm.ppc.mma.pmxvf64gernn";
2502 case MMAOp::Pmxvf64gernp:
2503 return "llvm.ppc.mma.pmxvf64gernp";
2504 case MMAOp::Pmxvf64gerpn:
2505 return "llvm.ppc.mma.pmxvf64gerpn";
2506 case MMAOp::Pmxvf64gerpp:
2507 return "llvm.ppc.mma.pmxvf64gerpp";
2508 case MMAOp::Pmxvi16ger2:
2509 return "llvm.ppc.mma.pmxvi16ger2";
2510 case MMAOp::Pmxvi16ger2pp:
2511 return "llvm.ppc.mma.pmxvi16ger2pp";
2512 case MMAOp::Pmxvi16ger2s:
2513 return "llvm.ppc.mma.pmxvi16ger2s";
2514 case MMAOp::Pmxvi16ger2spp:
2515 return "llvm.ppc.mma.pmxvi16ger2spp";
2516 case MMAOp::Pmxvi4ger8:
2517 return "llvm.ppc.mma.pmxvi4ger8";
2518 case MMAOp::Pmxvi4ger8pp:
2519 return "llvm.ppc.mma.pmxvi4ger8pp";
2520 case MMAOp::Pmxvi8ger4:
2521 return "llvm.ppc.mma.pmxvi8ger4";
2522 case MMAOp::Pmxvi8ger4pp:
2523 return "llvm.ppc.mma.pmxvi8ger4pp";
2524 case MMAOp::Pmxvi8ger4spp:
2525 return "llvm.ppc.mma.pmxvi8ger4spp";
2526 case MMAOp::Xvbf16ger2:
2527 return "llvm.ppc.mma.xvbf16ger2";
2528 case MMAOp::Xvbf16ger2nn:
2529 return "llvm.ppc.mma.xvbf16ger2nn";
2530 case MMAOp::Xvbf16ger2np:
2531 return "llvm.ppc.mma.xvbf16ger2np";
2532 case MMAOp::Xvbf16ger2pn:
2533 return "llvm.ppc.mma.xvbf16ger2pn";
2534 case MMAOp::Xvbf16ger2pp:
2535 return "llvm.ppc.mma.xvbf16ger2pp";
2536 case MMAOp::Xvf16ger2:
2537 return "llvm.ppc.mma.xvf16ger2";
2538 case MMAOp::Xvf16ger2nn:
2539 return "llvm.ppc.mma.xvf16ger2nn";
2540 case MMAOp::Xvf16ger2np:
2541 return "llvm.ppc.mma.xvf16ger2np";
2542 case MMAOp::Xvf16ger2pn:
2543 return "llvm.ppc.mma.xvf16ger2pn";
2544 case MMAOp::Xvf16ger2pp:
2545 return "llvm.ppc.mma.xvf16ger2pp";
2546 case MMAOp::Xvf32ger:
2547 return "llvm.ppc.mma.xvf32ger";
2548 case MMAOp::Xvf32gernn:
2549 return "llvm.ppc.mma.xvf32gernn";
2550 case MMAOp::Xvf32gernp:
2551 return "llvm.ppc.mma.xvf32gernp";
2552 case MMAOp::Xvf32gerpn:
2553 return "llvm.ppc.mma.xvf32gerpn";
2554 case MMAOp::Xvf32gerpp:
2555 return "llvm.ppc.mma.xvf32gerpp";
2556 case MMAOp::Xvf64ger:
2557 return "llvm.ppc.mma.xvf64ger";
2558 case MMAOp::Xvf64gernn:
2559 return "llvm.ppc.mma.xvf64gernn";
2560 case MMAOp::Xvf64gernp:
2561 return "llvm.ppc.mma.xvf64gernp";
2562 case MMAOp::Xvf64gerpn:
2563 return "llvm.ppc.mma.xvf64gerpn";
2564 case MMAOp::Xvf64gerpp:
2565 return "llvm.ppc.mma.xvf64gerpp";
2566 case MMAOp::Xvi16ger2:
2567 return "llvm.ppc.mma.xvi16ger2";
2568 case MMAOp::Xvi16ger2pp:
2569 return "llvm.ppc.mma.xvi16ger2pp";
2570 case MMAOp::Xvi16ger2s:
2571 return "llvm.ppc.mma.xvi16ger2s";
2572 case MMAOp::Xvi16ger2spp:
2573 return "llvm.ppc.mma.xvi16ger2spp";
2574 case MMAOp::Xvi4ger8:
2575 return "llvm.ppc.mma.xvi4ger8";
2576 case MMAOp::Xvi4ger8pp:
2577 return "llvm.ppc.mma.xvi4ger8pp";
2578 case MMAOp::Xvi8ger4:
2579 return "llvm.ppc.mma.xvi8ger4";
2580 case MMAOp::Xvi8ger4pp:
2581 return "llvm.ppc.mma.xvi8ger4pp";
2582 case MMAOp::Xvi8ger4spp:
2583 return "llvm.ppc.mma.xvi8ger4spp";
2584 }
2585 llvm_unreachable("getMmaIrIntrName");
2586}
2587
2588mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) {
2589 switch (mmaOp) {
2590 case MMAOp::AssembleAcc:
2591 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4);
2592 case MMAOp::AssemblePair:
2593 return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2594 case MMAOp::DisassembleAcc:
2595 return genMmaDisassembleFuncType(context, mmaOp);
2596 case MMAOp::DisassemblePair:
2597 return genMmaDisassembleFuncType(context, mmaOp);
2598 case MMAOp::Xxmfacc:
2599 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2600 case MMAOp::Xxmtacc:
2601 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2602 case MMAOp::Xxsetaccz:
2603 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0);
2604 case MMAOp::Pmxvbf16ger2:
2605 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2606 /*Integer*/ 3);
2607 case MMAOp::Pmxvbf16ger2nn:
2608 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2609 /*Integer*/ 3);
2610 case MMAOp::Pmxvbf16ger2np:
2611 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2612 /*Integer*/ 3);
2613 case MMAOp::Pmxvbf16ger2pn:
2614 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2615 /*Integer*/ 3);
2616 case MMAOp::Pmxvbf16ger2pp:
2617 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2618 /*Integer*/ 3);
2619 case MMAOp::Pmxvf16ger2:
2620 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2621 /*Integer*/ 3);
2622 case MMAOp::Pmxvf16ger2nn:
2623 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2624 /*Integer*/ 3);
2625 case MMAOp::Pmxvf16ger2np:
2626 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2627 /*Integer*/ 3);
2628 case MMAOp::Pmxvf16ger2pn:
2629 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2630 /*Integer*/ 3);
2631 case MMAOp::Pmxvf16ger2pp:
2632 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2633 /*Integer*/ 3);
2634 case MMAOp::Pmxvf32ger:
2635 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2636 /*Integer*/ 2);
2637 case MMAOp::Pmxvf32gernn:
2638 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2639 /*Integer*/ 2);
2640 case MMAOp::Pmxvf32gernp:
2641 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2642 /*Integer*/ 2);
2643 case MMAOp::Pmxvf32gerpn:
2644 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2645 /*Integer*/ 2);
2646 case MMAOp::Pmxvf32gerpp:
2647 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2648 /*Integer*/ 2);
2649 case MMAOp::Pmxvf64ger:
2650 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1,
2651 /*Integer*/ 2);
2652 case MMAOp::Pmxvf64gernn:
2653 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2654 /*Integer*/ 2);
2655 case MMAOp::Pmxvf64gernp:
2656 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2657 /*Integer*/ 2);
2658 case MMAOp::Pmxvf64gerpn:
2659 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2660 /*Integer*/ 2);
2661 case MMAOp::Pmxvf64gerpp:
2662 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2663 /*Integer*/ 2);
2664 case MMAOp::Pmxvi16ger2:
2665 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2666 /*Integer*/ 3);
2667 case MMAOp::Pmxvi16ger2pp:
2668 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2669 /*Integer*/ 3);
2670 case MMAOp::Pmxvi16ger2s:
2671 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2672 /*Integer*/ 3);
2673 case MMAOp::Pmxvi16ger2spp:
2674 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2675 /*Integer*/ 3);
2676 case MMAOp::Pmxvi4ger8:
2677 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2678 /*Integer*/ 3);
2679 case MMAOp::Pmxvi4ger8pp:
2680 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2681 /*Integer*/ 3);
2682 case MMAOp::Pmxvi8ger4:
2683 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2684 /*Integer*/ 3);
2685 case MMAOp::Pmxvi8ger4pp:
2686 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2687 /*Integer*/ 3);
2688 case MMAOp::Pmxvi8ger4spp:
2689 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2690 /*Integer*/ 3);
2691 case MMAOp::Xvbf16ger2:
2692 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2693 case MMAOp::Xvbf16ger2nn:
2694 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2695 case MMAOp::Xvbf16ger2np:
2696 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2697 case MMAOp::Xvbf16ger2pn:
2698 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2699 case MMAOp::Xvbf16ger2pp:
2700 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2701 case MMAOp::Xvf16ger2:
2702 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2703 case MMAOp::Xvf16ger2nn:
2704 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2705 case MMAOp::Xvf16ger2np:
2706 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2707 case MMAOp::Xvf16ger2pn:
2708 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2709 case MMAOp::Xvf16ger2pp:
2710 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2711 case MMAOp::Xvf32ger:
2712 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2713 case MMAOp::Xvf32gernn:
2714 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2715 case MMAOp::Xvf32gernp:
2716 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2717 case MMAOp::Xvf32gerpn:
2718 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2719 case MMAOp::Xvf32gerpp:
2720 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2721 case MMAOp::Xvf64ger:
2722 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1);
2723 case MMAOp::Xvf64gernn:
2724 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2725 case MMAOp::Xvf64gernp:
2726 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2727 case MMAOp::Xvf64gerpn:
2728 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2729 case MMAOp::Xvf64gerpp:
2730 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2731 case MMAOp::Xvi16ger2:
2732 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2733 case MMAOp::Xvi16ger2pp:
2734 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2735 case MMAOp::Xvi16ger2s:
2736 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2737 case MMAOp::Xvi16ger2spp:
2738 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2739 case MMAOp::Xvi4ger8:
2740 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2741 case MMAOp::Xvi4ger8pp:
2742 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2743 case MMAOp::Xvi8ger4:
2744 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2745 case MMAOp::Xvi8ger4pp:
2746 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2747 case MMAOp::Xvi8ger4spp:
2748 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2749 }
2750 llvm_unreachable("getMmaIrFuncType");
2751}
2752
2753template <MMAOp IntrId, MMAHandlerOp HandlerOp>
2754void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
2755 auto context{builder.getContext()};
2756 mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)};
2757 mlir::func::FuncOp funcOp{
2758 builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)};
2759 llvm::SmallVector<mlir::Value> intrArgs;
2760
2761 // Depending on SubToFunc, change the subroutine call to a function call.
2762 // First argument represents the result. Rest of the arguments
2763 // are shifted one position to form the actual argument list.
2764 size_t argStart{0};
2765 size_t argStep{1};
2766 size_t e{args.size()};
2767 if (HandlerOp == MMAHandlerOp::SubToFunc) {
2768 // The first argument becomes function result. Start from the second
2769 // argument.
2770 argStart = 1;
2771 } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) {
2772 // Reverse argument order on little-endian target only.
2773 // The reversal does not depend on the setting of non-native-order option.
2774 const auto triple{fir::getTargetTriple(builder.getModule())};
2775 if (triple.isLittleEndian()) {
2776 // Load the arguments in reverse order.
2777 argStart = args.size() - 1;
2778 // The first argument becomes function result. Stop at the second
2779 // argument.
2780 e = 0;
2781 argStep = -1;
2782 } else {
2783 // Load the arguments in natural order.
2784 // The first argument becomes function result. Start from the second
2785 // argument.
2786 argStart = 1;
2787 }
2788 }
2789
2790 for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) {
2791 auto v{fir::getBase(args[i])};
2792 if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2793 // First argument is passed in as an address. We need to load
2794 // the content to match the LLVM interface.
2795 v = builder.create<fir::LoadOp>(loc, v);
2796 }
2797 auto vType{v.getType()};
2798 mlir::Type targetType{intrFuncType.getInput(j)};
2799 if (vType != targetType) {
2800 if (targetType.isa<mlir::VectorType>()) {
2801 // Perform vector type conversion for arguments passed by value.
2802 auto eleTy{vType.dyn_cast<fir::VectorType>().getEleTy()};
2803 auto len{vType.dyn_cast<fir::VectorType>().getLen()};
2804 mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy);
2805 auto v0{builder.createConvert(loc, mlirType, v)};
2806 auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)};
2807 intrArgs.push_back(v1);
2808 } else if (targetType.isa<mlir::IntegerType>() &&
2809 vType.isa<mlir::IntegerType>()) {
2810 auto v0{builder.createConvert(loc, targetType, v)};
2811 intrArgs.push_back(v0);
2812 } else {
2813 llvm::errs() << "\nUnexpected type conversion requested: "
2814 << " from " << vType << " to " << targetType << "\n";
2815 llvm_unreachable("Unsupported type conversion for argument to PowerPC "
2816 "MMA intrinsic");
2817 }
2818 } else {
2819 intrArgs.push_back(v);
2820 }
2821 }
2822 auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)};
2823 if (HandlerOp == MMAHandlerOp::SubToFunc ||
2824 HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE ||
2825 HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2826 // Convert pointer type if needed.
2827 mlir::Value callResult{callSt.getResult(0)};
2828 mlir::Value destPtr{fir::getBase(args[0])};
2829 mlir::Type callResultPtrType{builder.getRefType(callResult.getType())};
2830 if (destPtr.getType() != callResultPtrType) {
2831 destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr);
2832 }
2833 // Copy the result.
2834 builder.create<fir::StoreOp>(loc, callResult, destPtr);
2835 }
2836}
2837
2838// VEC_ST, VEC_STE
2839template <VecOp vop>
2840void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
2841 assert(args.size() == 3);
2842
2843 auto context{builder.getContext()};
2844 auto argBases{getBasesForArgs(args)};
2845 auto arg1TyInfo{getVecTypeFromFir(argBases[0])};
2846
2847 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2848
2849 llvm::StringRef fname{};
2850 mlir::VectorType stTy{nullptr};
2851 auto i32ty{mlir::IntegerType::get(context, 32)};
2852 switch (vop) {
2853 case VecOp::St:
2854 stTy = mlir::VectorType::get(4, i32ty);
2855 fname = "llvm.ppc.altivec.stvx";
2856 break;
2857 case VecOp::Ste: {
2858 const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
2859 const auto len{arg1TyInfo.len};
2860
2861 if (arg1TyInfo.isFloat32()) {
2862 stTy = mlir::VectorType::get(len, i32ty);
2863 fname = "llvm.ppc.altivec.stvewx";
2864 } else if (arg1TyInfo.eleTy.isa<mlir::IntegerType>()) {
2865 stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));
2866
2867 switch (width) {
2868 case 8:
2869 fname = "llvm.ppc.altivec.stvebx";
2870 break;
2871 case 16:
2872 fname = "llvm.ppc.altivec.stvehx";
2873 break;
2874 case 32:
2875 fname = "llvm.ppc.altivec.stvewx";
2876 break;
2877 default:
2878 assert(false && "invalid element size");
2879 }
2880 } else
2881 assert(false && "unknown type");
2882 break;
2883 }
2884 case VecOp::Stxvp:
2885 // __vector_pair type
2886 stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1));
2887 fname = "llvm.ppc.vsx.stxvp";
2888 break;
2889 default:
2890 llvm_unreachable("invalid vector operation for generator");
2891 }
2892
2893 auto funcType{
2894 mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
2895 mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType);
2896
2897 llvm::SmallVector<mlir::Value, 4> biArgs;
2898
2899 if (vop == VecOp::Stxvp) {
2900 biArgs.push_back(argBases[0]);
2901 biArgs.push_back(addr);
2902 builder.create<fir::CallOp>(loc, funcOp, biArgs);
2903 return;
2904 }
2905
2906 auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
2907 auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
2908 argBases[0])};
2909
2910 mlir::Value newArg1{nullptr};
2911 if (stTy != arg1TyInfo.toMlirVectorType(context))
2912 newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
2913 else
2914 newArg1 = cnv;
2915
2916 if (isBEVecElemOrderOnLE())
2917 newArg1 = builder.createConvert(
2918 loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));
2919
2920 biArgs.push_back(newArg1);
2921 biArgs.push_back(addr);
2922
2923 builder.create<fir::CallOp>(loc, funcOp, biArgs);
2924}
2925
2926// VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
2927template <VecOp vop>
2928void PPCIntrinsicLibrary::genVecXStore(
2929 llvm::ArrayRef<fir::ExtendedValue> args) {
2930 assert(args.size() == 3);
2931 auto context{builder.getContext()};
2932 auto argBases{getBasesForArgs(args)};
2933 VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};
2934
2935 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2936
2937 mlir::Value trg{nullptr};
2938 mlir::Value src{nullptr};
2939
2940 switch (vop) {
2941 case VecOp::Xst:
2942 case VecOp::Xst_be: {
2943 src = argBases[0];
2944 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2945 addr);
2946
2947 if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
2948 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2949 argBases[0])};
2950 auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};
2951
2952 src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
2953 }
2954 break;
2955 }
2956 case VecOp::Xstd2:
2957 case VecOp::Xstw4: {
2958 // an 16-byte vector arg1 is treated as two 8-byte elements or
2959 // four 4-byte elements
2960 mlir::IntegerType elemTy;
2961 uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
2962 elemTy = builder.getIntegerType(128 / numElem);
2963
2964 mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
2965 fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};
2966
2967 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2968 argBases[0])};
2969
2970 mlir::Type srcTy{nullptr};
2971 if (numElem != arg1TyInfo.len) {
2972 cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
2973 srcTy = firVecTy;
2974 } else {
2975 srcTy = arg1TyInfo.toFirVectorType();
2976 }
2977
2978 trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);
2979
2980 if (isBEVecElemOrderOnLE()) {
2981 cnv = reverseVectorElements(builder, loc, cnv, numElem);
2982 }
2983
2984 src = builder.createConvert(loc, srcTy, cnv);
2985 break;
2986 }
2987 case VecOp::Stxv:
2988 src = argBases[0];
2989 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2990 addr);
2991 break;
2992 default:
2993 assert(false && "Invalid vector operation for generator");
2994 }
2995 builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
2996 mlir::ValueRange{src, trg},
2997 getAlignmentAttr(builder, 1));
2998}
2999
3000} // namespace fir
3001

source code of flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp