1//===-- Character.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10//
11//===----------------------------------------------------------------------===//
12
13#include "flang/Optimizer/Builder/Character.h"
14#include "flang/Optimizer/Builder/DoLoopHelper.h"
15#include "flang/Optimizer/Builder/FIRBuilder.h"
16#include "flang/Optimizer/Builder/Todo.h"
17#include "flang/Optimizer/Dialect/FIROpsSupport.h"
18#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
19#include "llvm/Support/Debug.h"
20#include <optional>
21
22#define DEBUG_TYPE "flang-lower-character"
23
24//===----------------------------------------------------------------------===//
25// CharacterExprHelper implementation
26//===----------------------------------------------------------------------===//
27
28/// Unwrap all the ref and box types and return the inner element type.
29static mlir::Type unwrapBoxAndRef(mlir::Type type) {
30 if (auto boxType = mlir::dyn_cast<fir::BoxCharType>(type))
31 return boxType.getEleTy();
32 while (true) {
33 type = fir::unwrapRefType(type);
34 if (auto boxTy = mlir::dyn_cast<fir::BoxType>(type))
35 type = boxTy.getEleTy();
36 else
37 break;
38 }
39 return type;
40}
41
42/// Unwrap base fir.char<kind,len> type.
43static fir::CharacterType recoverCharacterType(mlir::Type type) {
44 type = fir::unwrapSequenceType(unwrapBoxAndRef(type));
45 if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type))
46 return charTy;
47 llvm::report_fatal_error(reason: "expected a character type");
48}
49
50bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
51 type = unwrapBoxAndRef(type);
52 return !mlir::isa<fir::SequenceType>(type) && fir::isa_char(type);
53}
54
55bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
56 type = unwrapBoxAndRef(type);
57 if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(type))
58 return fir::isa_char(seqTy.getEleTy());
59 return false;
60}
61
62fir::CharacterType
63fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
64 assert(isCharacterScalar(type) && "expected scalar character");
65 return recoverCharacterType(type);
66}
67
68fir::CharacterType
69fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
70 return recoverCharacterType(type);
71}
72
73fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
74 const fir::CharBoxValue &box) {
75 return getCharacterType(box.getBuffer().getType());
76}
77
78fir::CharacterType
79fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
80 return getCharacterType(str.getType());
81}
82
83/// Determine the static size of the character. Returns the computed size, not
84/// an IR Value.
85static std::optional<fir::CharacterType::LenType>
86getCompileTimeLength(const fir::CharBoxValue &box) {
87 auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
88 if (len == fir::CharacterType::unknownLen())
89 return {};
90 return len;
91}
92
93/// Detect the precondition that the value `str` does not reside in memory. Such
94/// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
95LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
96 return mlir::isa<fir::SequenceType>(str.getType()) ||
97 fir::isa_char(str.getType());
98}
99
100/// This is called only if `str` does not reside in memory. Such a bare string
101/// value will be converted into a memory-based temporary and an extended
102/// boxchar value returned.
103fir::CharBoxValue
104fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
105 assert(needToMaterialize(str));
106 auto ty = str.getType();
107 assert(isCharacterScalar(ty) && "expected scalar character");
108 auto charTy = mlir::dyn_cast<fir::CharacterType>(ty);
109 if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
110 LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
111 llvm_unreachable("must be a !fir.char<N> type");
112 }
113 auto len = builder.createIntegerConstant(
114 loc, builder.getCharacterLengthType(), charTy.getLen());
115 auto temp = builder.create<fir::AllocaOp>(loc, charTy);
116 builder.create<fir::StoreOp>(loc, str, temp);
117 LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
118 << ", " << len << ")\n");
119 return {temp, len};
120}
121
122fir::ExtendedValue
123fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
124 mlir::Value len) {
125 auto lenType = builder.getCharacterLengthType();
126 auto type = character.getType();
127 auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
128 auto resultLen = len;
129 llvm::SmallVector<mlir::Value> extents;
130
131 if (auto eleType = fir::dyn_cast_ptrEleTy(type))
132 type = eleType;
133
134 if (auto arrayType = mlir::dyn_cast<fir::SequenceType>(type)) {
135 type = arrayType.getEleTy();
136 auto indexType = builder.getIndexType();
137 for (auto extent : arrayType.getShape()) {
138 if (extent == fir::SequenceType::getUnknownExtent())
139 break;
140 extents.emplace_back(
141 builder.createIntegerConstant(loc, indexType, extent));
142 }
143 // Last extent might be missing in case of assumed-size. If more extents
144 // could not be deduced from type, that's an error (a fir.box should
145 // have been used in the interface).
146 if (extents.size() + 1 < arrayType.getShape().size())
147 mlir::emitError(loc, "cannot retrieve array extents from type");
148 }
149
150 if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type)) {
151 if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
152 resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
153 } else if (auto boxCharType = mlir::dyn_cast<fir::BoxCharType>(type)) {
154 auto refType = builder.getRefType(boxCharType.getEleTy());
155 // If the embox is accessible, use its operand to avoid filling
156 // the generated fir with embox/unbox.
157 mlir::Value boxCharLen;
158 if (auto definingOp = character.getDefiningOp()) {
159 if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) {
160 base = box.getMemref();
161 boxCharLen = box.getLen();
162 }
163 }
164 if (!boxCharLen) {
165 auto unboxed =
166 builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
167 base = builder.createConvert(loc, refType, unboxed.getResult(0));
168 boxCharLen = unboxed.getResult(1);
169 }
170 if (!resultLen) {
171 resultLen = boxCharLen;
172 }
173 } else if (mlir::isa<fir::BoxType>(type)) {
174 mlir::emitError(loc, "descriptor or derived type not yet handled");
175 } else {
176 llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
177 }
178
179 if (!base) {
180 if (auto load =
181 mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
182 base = load.getOperand();
183 } else {
184 return materializeValue(fir::getBase(character));
185 }
186 }
187 if (!resultLen)
188 llvm::report_fatal_error("no dynamic length found for character");
189 if (!extents.empty())
190 return fir::CharArrayBoxValue{base, resultLen, extents};
191 return fir::CharBoxValue{base, resultLen};
192}
193
194static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
195 return fir::CharacterType::getSingleton(ctxt, kind);
196}
197
198mlir::Value
199fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
200 // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
201 // type)
202 auto charTy = recoverCharacterType(box.getBuffer().getType());
203 auto boxCharType =
204 fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
205 auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
206 mlir::Value buff = box.getBuffer();
207 // fir.boxchar requires a memory reference. Allocate temp if the character is
208 // not in memory.
209 if (!fir::isa_ref_type(buff.getType())) {
210 auto temp = builder.createTemporary(loc, buff.getType());
211 builder.create<fir::StoreOp>(loc, buff, temp);
212 buff = temp;
213 }
214 // fir.emboxchar only accepts scalar, cast array buffer to a scalar buffer.
215 if (mlir::isa<fir::SequenceType>(fir::dyn_cast_ptrEleTy(buff.getType())))
216 buff = builder.createConvert(loc, refType, buff);
217 // Convert in case the provided length is not of the integer type that must
218 // be used in boxchar.
219 auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
220 box.getLen());
221 return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
222}
223
224fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
225 const fir::CharArrayBoxValue &box) {
226 if (mlir::isa<fir::PointerType>(box.getBuffer().getType()))
227 TODO(loc, "concatenating non contiguous character array into a scalar");
228
229 // TODO: add a fast path multiplying new length at compile time if the info is
230 // in the array type.
231 auto lenType = builder.getCharacterLengthType();
232 auto len = builder.createConvert(loc, lenType, box.getLen());
233 for (auto extent : box.getExtents())
234 len = builder.create<mlir::arith::MulIOp>(
235 loc, len, builder.createConvert(loc, lenType, extent));
236
237 // TODO: typeLen can be improved in compiled constant cases
238 // TODO: allow bare fir.array<> (no ref) conversion here ?
239 auto typeLen = fir::CharacterType::unknownLen();
240 auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
241 auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
242 auto type = fir::ReferenceType::get(charTy);
243 auto buffer = builder.createConvert(loc, type, box.getBuffer());
244 return {buffer, len};
245}
246
247mlir::Value fir::factory::CharacterExprHelper::createEmbox(
248 const fir::CharArrayBoxValue &box) {
249 // Use same embox as for scalar. It's losing the actual data size information
250 // (We do not multiply the length by the array size), but that is what Fortran
251 // call interfaces using boxchar expect.
252 return createEmbox(static_cast<const fir::CharBoxValue &>(box));
253}
254
255/// Get the address of the element at position \p index of the scalar character
256/// \p buffer.
257/// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
258/// unknown. \p index must have any integer type, and is zero based. The return
259/// value is a singleton address (!fir.ref<!fir.char<kind>>)
260mlir::Value
261fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
262 mlir::Value index) {
263 // The only way to address an element of a fir.ref<char<kind, len>> is to cast
264 // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
265 auto bufferType = buffer.getType();
266 assert(fir::isa_ref_type(bufferType));
267 assert(isCharacterScalar(bufferType));
268 auto charTy = recoverCharacterType(bufferType);
269 auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
270 auto singleRefTy = builder.getRefType(singleTy);
271 auto extent = fir::SequenceType::getUnknownExtent();
272 if (charTy.getLen() != fir::CharacterType::unknownLen())
273 extent = charTy.getLen();
274 const bool isVolatile = fir::isa_volatile_type(buffer.getType());
275 auto sequenceType = fir::SequenceType::get({extent}, singleTy);
276 auto coorTy = builder.getRefType(sequenceType, isVolatile);
277
278 auto coor = builder.createConvert(loc, coorTy, buffer);
279 auto i = builder.createConvert(loc, builder.getIndexType(), index);
280 return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
281}
282
283/// Load a character out of `buff` from offset `index`.
284/// `buff` must be a reference to memory.
285mlir::Value
286fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
287 mlir::Value index) {
288 LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
289 << buff.getType() << " at: " << index << '\n');
290 return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
291}
292
293/// Store the singleton character `c` to `str` at offset `index`.
294/// `str` must be a reference to memory.
295void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
296 mlir::Value index,
297 mlir::Value c) {
298 LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
299 << " type: " << str.getType() << " at: " << index
300 << '\n');
301 auto addr = createElementAddr(str, index);
302 builder.create<fir::StoreOp>(loc, c, addr);
303}
304
305// FIXME: this temp is useless... either fir.coordinate_of needs to
306// work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
307// character should never be loaded.
308// If this is a fir.array<>, allocate and store the value so that
309// fir.cooridnate_of can be use on the value.
310mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
311 const fir::CharBoxValue &box) {
312 auto buff = box.getBuffer();
313 if (fir::isa_char(buff.getType())) {
314 auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
315 builder.create<fir::StoreOp>(loc, buff, newBuff);
316 return newBuff;
317 }
318 return buff;
319}
320
321/// Create a loop to copy `count` characters from `src` to `dest`. Note that the
322/// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
323void fir::factory::CharacterExprHelper::createCopy(
324 const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
325 mlir::Value count) {
326 auto fromBuff = getCharBoxBuffer(src);
327 auto toBuff = getCharBoxBuffer(dest);
328 LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
329 llvm::dbgs() << " to: "; dest.dump();
330 llvm::dbgs() << " count: " << count << '\n');
331 auto kind = getCharacterKind(src.getBuffer().getType());
332 // If the src and dest are the same KIND, then use memmove to move the bits.
333 // We don't have to worry about overlapping ranges with memmove.
334 if (getCharacterKind(dest.getBuffer().getType()) == kind) {
335 const bool isVolatile = fir::isa_volatile_type(fromBuff.getType()) ||
336 fir::isa_volatile_type(toBuff.getType());
337 auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
338 auto i64Ty = builder.getI64Type();
339 auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
340 auto castCount = builder.createConvert(loc, i64Ty, count);
341 auto totalBytes =
342 builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount);
343 auto llvmPointerType =
344 mlir::LLVM::LLVMPointerType::get(builder.getContext());
345 auto toPtr = builder.createConvert(loc, llvmPointerType, toBuff);
346 auto fromPtr = builder.createConvert(loc, llvmPointerType, fromBuff);
347 builder.create<mlir::LLVM::MemmoveOp>(loc, toPtr, fromPtr, totalBytes,
348 isVolatile);
349 return;
350 }
351
352 // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
353 builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
354 dest.getBuffer());
355}
356
357void fir::factory::CharacterExprHelper::createPadding(
358 const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
359 auto blank = createBlankConstant(getCharacterType(str));
360 // Always create the loop, if upper < lower, no iteration will be
361 // executed.
362 auto toBuff = getCharBoxBuffer(str);
363 fir::factory::DoLoopHelper{builder, loc}.createLoop(
364 lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
365 createStoreCharAt(toBuff, index, blank);
366 });
367}
368
369fir::CharBoxValue
370fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
371 mlir::Value len) {
372 auto kind = recoverCharacterType(type).getFKind();
373 auto typeLen = fir::CharacterType::unknownLen();
374 // If len is a constant, reflect the length in the type.
375 if (auto cstLen = getIntIfConstant(len))
376 typeLen = *cstLen;
377 auto *ctxt = builder.getContext();
378 auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
379 llvm::SmallVector<mlir::Value> lenParams;
380 if (typeLen == fir::CharacterType::unknownLen())
381 lenParams.push_back(len);
382 auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
383 /*shape=*/std::nullopt, lenParams);
384 return {ref, len};
385}
386
387fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
388 const fir::ExtendedValue &source) {
389 const auto *charBox = source.getCharBox();
390 if (!charBox)
391 fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
392 auto len = charBox->getLen();
393 auto sourceTy = charBox->getBuffer().getType();
394 auto temp = createCharacterTemp(sourceTy, len);
395 if (fir::isa_ref_type(sourceTy)) {
396 createCopy(temp, *charBox, len);
397 } else {
398 auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
399 temp.getBuffer());
400 builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
401 }
402 return temp;
403}
404
405// Simple length one character assignment without loops.
406void fir::factory::CharacterExprHelper::createLengthOneAssign(
407 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
408 auto addr = lhs.getBuffer();
409 auto toTy = fir::unwrapRefType(addr.getType());
410 mlir::Value val = rhs.getBuffer();
411 if (fir::isa_ref_type(val.getType())) {
412 auto fromCharLen1RefTy = builder.getRefType(getSingletonCharType(
413 builder.getContext(),
414 getCharacterKind(fir::unwrapRefType(val.getType()))));
415 val = builder.create<fir::LoadOp>(
416 loc, builder.createConvert(loc, fromCharLen1RefTy, val));
417 }
418 auto toCharLen1Ty =
419 getSingletonCharType(builder.getContext(), getCharacterKind(toTy));
420 val = builder.createConvert(loc, toCharLen1Ty, val);
421 builder.create<fir::StoreOp>(
422 loc, val,
423 builder.createConvert(loc, builder.getRefType(toCharLen1Ty), addr));
424}
425
426/// Returns the minimum of integer mlir::Value \p a and \b.
427mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
428 mlir::Value a, mlir::Value b) {
429 auto cmp = builder.create<mlir::arith::CmpIOp>(
430 loc, mlir::arith::CmpIPredicate::slt, a, b);
431 return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
432}
433
434void fir::factory::CharacterExprHelper::createAssign(
435 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
436 auto rhsCstLen = getCompileTimeLength(rhs);
437 auto lhsCstLen = getCompileTimeLength(lhs);
438 bool compileTimeSameLength = false;
439 bool isLengthOneAssign = false;
440
441 if (lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen) {
442 compileTimeSameLength = true;
443 if (*lhsCstLen == 1)
444 isLengthOneAssign = true;
445 } else if (rhs.getLen() == lhs.getLen()) {
446 compileTimeSameLength = true;
447
448 // If the length values are the same for LHS and RHS,
449 // then we can rely on the constant length deduced from
450 // any of the two types.
451 if (lhsCstLen && *lhsCstLen == 1)
452 isLengthOneAssign = true;
453 if (rhsCstLen && *rhsCstLen == 1)
454 isLengthOneAssign = true;
455
456 // We could have recognized constant operations here (e.g.
457 // two different arith.constant ops may produce the same value),
458 // but for now leave it to CSE to get rid of the duplicates.
459 }
460 if (isLengthOneAssign) {
461 createLengthOneAssign(lhs, rhs);
462 return;
463 }
464
465 // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
466 // if needed.
467 auto copyCount = lhs.getLen();
468 auto idxTy = builder.getIndexType();
469 if (!compileTimeSameLength) {
470 auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
471 auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
472 copyCount = genMin(builder, loc, lhsLen, rhsLen);
473 }
474
475 // Actual copy
476 createCopy(lhs, rhs, copyCount);
477
478 // Pad if needed.
479 if (!compileTimeSameLength) {
480 auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
481 auto maxPadding =
482 builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one);
483 createPadding(lhs, copyCount, maxPadding);
484 }
485}
486
487fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
488 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
489 auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
490 lhs.getLen());
491 auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
492 rhs.getLen());
493 mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen);
494 auto temp = createCharacterTemp(getCharacterType(rhs), len);
495 createCopy(temp, lhs, lhsLen);
496 auto one = builder.createIntegerConstant(loc, len.getType(), 1);
497 auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one);
498 auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
499 auto fromBuff = getCharBoxBuffer(rhs);
500 auto toBuff = getCharBoxBuffer(temp);
501 fir::factory::DoLoopHelper{builder, loc}.createLoop(
502 lhsLenIdx, upperBound, one,
503 [&](fir::FirOpBuilder &bldr, mlir::Value index) {
504 auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx);
505 auto charVal = createLoadCharAt(fromBuff, rhsIndex);
506 createStoreCharAt(toBuff, index, charVal);
507 });
508 return temp;
509}
510
511mlir::Value fir::factory::CharacterExprHelper::genSubstringBase(
512 mlir::Value stringRawAddr, mlir::Value lowerBound,
513 mlir::Type substringAddrType, mlir::Value one) {
514 if (!one)
515 one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
516 auto offset =
517 builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult();
518 auto addr = createElementAddr(stringRawAddr, offset);
519 return builder.createConvert(loc, substringAddrType, addr);
520}
521
522fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
523 const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
524 // Constant need to be materialize in memory to use fir.coordinate_of.
525 auto nbounds = bounds.size();
526 if (nbounds < 1 || nbounds > 2) {
527 mlir::emitError(loc, "Incorrect number of bounds in substring");
528 return {mlir::Value{}, mlir::Value{}};
529 }
530 mlir::SmallVector<mlir::Value> castBounds;
531 // Convert bounds to length type to do safe arithmetic on it.
532 for (auto bound : bounds)
533 castBounds.push_back(
534 builder.createConvert(loc, builder.getCharacterLengthType(), bound));
535 auto lowerBound = castBounds[0];
536 // FIR CoordinateOp is zero based but Fortran substring are one based.
537 auto kind = getCharacterKind(box.getBuffer().getType());
538 auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
539 auto resultType = builder.getRefType(charTy);
540 auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
541 auto substringRef =
542 genSubstringBase(box.getBuffer(), lowerBound, resultType, one);
543
544 // Compute the length.
545 mlir::Value substringLen;
546 if (nbounds < 2) {
547 substringLen =
548 builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]);
549 } else {
550 substringLen =
551 builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]);
552 }
553 substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one);
554
555 // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
556 auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
557 auto cdt = builder.create<mlir::arith::CmpIOp>(
558 loc, mlir::arith::CmpIPredicate::slt, substringLen, zero);
559 substringLen =
560 builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
561
562 return {substringRef, substringLen};
563}
564
565mlir::Value
566fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
567 // Note: Runtime for LEN_TRIM should also be available at some
568 // point. For now use an inlined implementation.
569 auto indexType = builder.getIndexType();
570 auto len = builder.createConvert(loc, indexType, str.getLen());
571 auto one = builder.createIntegerConstant(loc, indexType, 1);
572 auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
573 auto zero = builder.createIntegerConstant(loc, indexType, 0);
574 auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
575 auto blank = createBlankConstantCode(getCharacterType(str));
576 mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one);
577
578 auto iterWhile =
579 builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
580 /*returnFinalCount=*/false, lastChar);
581 auto insPt = builder.saveInsertionPoint();
582 builder.setInsertionPointToStart(iterWhile.getBody());
583 auto index = iterWhile.getInductionVar();
584 // Look for first non-blank from the right of the character.
585 auto fromBuff = getCharBoxBuffer(str);
586 auto elemAddr = createElementAddr(fromBuff, index);
587 auto codeAddr =
588 builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
589 auto c = builder.create<fir::LoadOp>(loc, codeAddr);
590 auto isBlank = builder.create<mlir::arith::CmpIOp>(
591 loc, mlir::arith::CmpIPredicate::eq, blank, c);
592 llvm::SmallVector<mlir::Value> results = {isBlank, index};
593 builder.create<fir::ResultOp>(loc, results);
594 builder.restoreInsertionPoint(insPt);
595 // Compute length after iteration (zero if all blanks)
596 mlir::Value newLen =
597 builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one);
598 auto result = builder.create<mlir::arith::SelectOp>(
599 loc, iterWhile.getResult(0), zero, newLen);
600 return builder.createConvert(loc, builder.getCharacterLengthType(), result);
601}
602
603fir::CharBoxValue
604fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
605 int len) {
606 assert(len >= 0 && "expected positive length");
607 auto kind = recoverCharacterType(type).getFKind();
608 auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
609 auto addr = builder.create<fir::AllocaOp>(loc, charType);
610 auto mlirLen =
611 builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
612 return {addr, mlirLen};
613}
614
615// Returns integer with code for blank. The integer has the same
616// size as the character. Blank has ascii space code for all kinds.
617mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
618 fir::CharacterType type) {
619 auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
620 auto intType = builder.getIntegerType(bits);
621 return builder.createIntegerConstant(loc, intType, ' ');
622}
623
624mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
625 fir::CharacterType type) {
626 return createSingletonFromCode(createBlankConstantCode(type),
627 type.getFKind());
628}
629
630void fir::factory::CharacterExprHelper::createAssign(
631 const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
632 if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
633 if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
634 createAssign(*to, *str);
635 return;
636 }
637 }
638 TODO(loc, "character array assignment");
639 // Note that it is not sure the array aspect should be handled
640 // by this utility.
641}
642
643mlir::Value
644fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
645 mlir::Value len) {
646 return createEmbox(fir::CharBoxValue{addr, len});
647}
648
649std::pair<mlir::Value, mlir::Value>
650fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
651 using T = std::pair<mlir::Value, mlir::Value>;
652 return toExtendedValue(boxChar).match(
653 [](const fir::CharBoxValue &b) -> T {
654 return {b.getBuffer(), b.getLen()};
655 },
656 [](const fir::CharArrayBoxValue &b) -> T {
657 return {b.getBuffer(), b.getLen()};
658 },
659 [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
660}
661
662bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
663 if (auto seqType = mlir::dyn_cast<fir::SequenceType>(type))
664 return (seqType.getShape().size() == 1) &&
665 fir::isa_char(seqType.getEleTy());
666 return false;
667}
668
669fir::KindTy
670fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
671 assert(isCharacterScalar(type) && "expected scalar character");
672 return recoverCharacterType(type).getFKind();
673}
674
675fir::KindTy
676fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
677 return recoverCharacterType(type).getFKind();
678}
679
680bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
681 const fir::ExtendedValue &exv) {
682 auto charTy = recoverCharacterType(fir::getBase(exv).getType());
683 return charTy.hasConstantLen();
684}
685
686mlir::Value
687fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
688 int kind) {
689 auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
690 auto bits = builder.getKindMap().getCharacterBitsize(kind);
691 auto intType = builder.getIntegerType(bits);
692 auto cast = builder.createConvert(loc, intType, code);
693 auto undef = builder.create<fir::UndefOp>(loc, charType);
694 auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
695 return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
696 builder.getArrayAttr(zero));
697}
698
699mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
700 mlir::Value singleton) {
701 auto type = getCharacterType(singleton);
702 assert(type.getLen() == 1);
703 auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
704 auto intType = builder.getIntegerType(bits);
705 auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
706 return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
707 builder.getArrayAttr(zero));
708}
709
710mlir::Value
711fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
712 auto charTy = recoverCharacterType(box.getType());
713 return readLengthFromBox(box, charTy);
714}
715
716mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox(
717 mlir::Value box, fir::CharacterType charTy) {
718 auto lenTy = builder.getCharacterLengthType();
719 auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
720 auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
721 auto width = bits / 8;
722 if (width > 1) {
723 auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
724 return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal);
725 }
726 return size;
727}
728
729mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
730 auto memrefType = memref.getType();
731 auto charType = recoverCharacterType(memrefType);
732 assert(charType && "must be a character type");
733 if (charType.hasConstantLen())
734 return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
735 charType.getLen());
736 if (mlir::isa<fir::BoxType>(memrefType))
737 return readLengthFromBox(memref);
738 if (mlir::isa<fir::BoxCharType>(memrefType))
739 return createUnboxChar(memref).second;
740
741 // Length cannot be deduced from memref.
742 return {};
743}
744
745std::pair<mlir::Value, mlir::Value>
746fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
747 mlir::Location loc,
748 mlir::Value tuple,
749 bool openBoxProc) {
750 mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(tuple.getType());
751 mlir::Value addr = builder.create<fir::ExtractValueOp>(
752 loc, tupleType.getType(0), tuple,
753 builder.getArrayAttr(
754 {builder.getIntegerAttr(builder.getIndexType(), 0)}));
755 mlir::Value proc = [&]() -> mlir::Value {
756 if (openBoxProc)
757 if (auto addrTy = mlir::dyn_cast<fir::BoxProcType>(addr.getType()))
758 return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr);
759 return addr;
760 }();
761 mlir::Value len = builder.create<fir::ExtractValueOp>(
762 loc, tupleType.getType(1), tuple,
763 builder.getArrayAttr(
764 {builder.getIntegerAttr(builder.getIndexType(), 1)}));
765 return {proc, len};
766}
767
768mlir::Value fir::factory::createCharacterProcedureTuple(
769 fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
770 mlir::Value addr, mlir::Value len) {
771 mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(argTy);
772 addr = builder.createConvert(loc, tupleType.getType(0), addr);
773 if (len)
774 len = builder.createConvert(loc, tupleType.getType(1), len);
775 else
776 len = builder.create<fir::UndefOp>(loc, tupleType.getType(1));
777 mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
778 tuple = builder.create<fir::InsertValueOp>(
779 loc, tupleType, tuple, addr,
780 builder.getArrayAttr(
781 {builder.getIntegerAttr(builder.getIndexType(), 0)}));
782 tuple = builder.create<fir::InsertValueOp>(
783 loc, tupleType, tuple, len,
784 builder.getArrayAttr(
785 {builder.getIntegerAttr(builder.getIndexType(), 1)}));
786 return tuple;
787}
788
789mlir::Type
790fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
791 mlir::MLIRContext *context = funcPointerType.getContext();
792 mlir::Type lenType = mlir::IntegerType::get(context, 64);
793 return mlir::TupleType::get(context, {funcPointerType, lenType});
794}
795
796fir::CharBoxValue fir::factory::CharacterExprHelper::createCharExtremum(
797 bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs) {
798 // inputs: we are given a vector of all of the charboxes of the arguments
799 // passed to hlfir.char_extremum, as well as the predicate for whether we
800 // want llt or lgt
801 //
802 // note: we know that, regardless of whether we're looking at smallest or
803 // largest char, the size of the output buffer will be the same size as the
804 // largest character out of all of the operands. so, we find the biggest
805 // length first. It's okay if these char lengths are not known at compile
806 // time.
807
808 fir::CharBoxValue firstCBV = opCBVs[0];
809 mlir::Value firstBuf = getCharBoxBuffer(firstCBV);
810 auto firstLen = builder.createConvert(loc, builder.getCharacterLengthType(),
811 firstCBV.getLen());
812
813 mlir::Value resultBuf = firstBuf;
814 mlir::Value resultLen = firstLen;
815 mlir::Value biggestLen = firstLen;
816
817 // values for casting buf type and len type
818 auto typeLen = fir::CharacterType::unknownLen();
819 auto kind = recoverCharacterType(firstBuf.getType()).getFKind();
820 auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
821 auto type = fir::ReferenceType::get(charTy);
822
823 size_t numOperands = opCBVs.size();
824 for (size_t cbv_idx = 1; cbv_idx < numOperands; ++cbv_idx) {
825 auto currChar = opCBVs[cbv_idx];
826 auto currBuf = getCharBoxBuffer(currChar);
827 auto currLen = builder.createConvert(loc, builder.getCharacterLengthType(),
828 currChar.getLen());
829 // biggest len result
830 mlir::Value lhsBigger = builder.create<mlir::arith::CmpIOp>(
831 loc, mlir::arith::CmpIPredicate::uge, biggestLen, currLen);
832 biggestLen = builder.create<mlir::arith::SelectOp>(loc, lhsBigger,
833 biggestLen, currLen);
834
835 auto cmp = predIsMin ? mlir::arith::CmpIPredicate::slt
836 : mlir::arith::CmpIPredicate::sgt;
837
838 // lexical compare result
839 mlir::Value resultCmp = fir::runtime::genCharCompare(
840 builder, loc, cmp, currBuf, currLen, resultBuf, resultLen);
841
842 // it's casting (to unknown size) time!
843 resultBuf = builder.createConvert(loc, type, resultBuf);
844 currBuf = builder.createConvert(loc, type, currBuf);
845
846 resultBuf = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currBuf,
847 resultBuf);
848 resultLen = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currLen,
849 resultLen);
850 }
851
852 // now that we know the lexicographically biggest/smallest char and which char
853 // had the biggest len, we can populate a temp CBV and return it
854 fir::CharBoxValue temp = createCharacterTemp(resultBuf.getType(), biggestLen);
855 auto toBuf = temp;
856 fir::CharBoxValue fromBuf{resultBuf, resultLen};
857 createAssign(toBuf, fromBuf);
858 return temp;
859}
860
861fir::CharBoxValue
862fir::factory::convertCharacterKind(fir::FirOpBuilder &builder,
863 mlir::Location loc,
864 fir::CharBoxValue srcBoxChar, int toKind) {
865 // Use char_convert. Each code point is translated from a
866 // narrower/wider encoding to the target encoding. For example, 'A'
867 // may be translated from 0x41 : i8 to 0x0041 : i16. The symbol
868 // for euro (0x20AC : i16) may be translated from a wide character
869 // to "0xE2 0x82 0xAC" : UTF-8.
870 mlir::Value bufferSize = srcBoxChar.getLen();
871 auto kindMap = builder.getKindMap();
872 mlir::Value boxCharAddr = srcBoxChar.getAddr();
873 auto fromTy = boxCharAddr.getType();
874 if (auto charTy = mlir::dyn_cast<fir::CharacterType>(fromTy)) {
875 // boxchar is a value, not a variable. Turn it into a temporary.
876 // As a value, it ought to have a constant LEN value.
877 assert(charTy.hasConstantLen() && "must have constant length");
878 mlir::Value tmp = builder.createTemporary(loc, charTy);
879 builder.create<fir::StoreOp>(loc, boxCharAddr, tmp);
880 boxCharAddr = tmp;
881 }
882 auto fromBits = kindMap.getCharacterBitsize(
883 mlir::cast<fir::CharacterType>(fir::unwrapRefType(fromTy)).getFKind());
884 auto toBits = kindMap.getCharacterBitsize(toKind);
885 if (toBits < fromBits) {
886 // Scale by relative ratio to give a buffer of the same length.
887 auto ratio = builder.createIntegerConstant(loc, bufferSize.getType(),
888 fromBits / toBits);
889 bufferSize = builder.create<mlir::arith::MulIOp>(loc, bufferSize, ratio);
890 }
891 mlir::Type toType =
892 fir::CharacterType::getUnknownLen(builder.getContext(), toKind);
893 auto dest = builder.createTemporary(loc, toType, /*name=*/{}, /*shape=*/{},
894 mlir::ValueRange{bufferSize});
895 builder.create<fir::CharConvertOp>(loc, boxCharAddr, srcBoxChar.getLen(),
896 dest);
897 return fir::CharBoxValue{dest, srcBoxChar.getLen()};
898}
899

source code of flang/lib/Optimizer/Builder/Character.cpp