1//===-- Character.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10//
11//===----------------------------------------------------------------------===//
12
13#include "flang/Optimizer/Builder/Character.h"
14#include "flang/Optimizer/Builder/DoLoopHelper.h"
15#include "flang/Optimizer/Builder/FIRBuilder.h"
16#include "flang/Optimizer/Builder/Todo.h"
17#include "flang/Optimizer/Dialect/FIROpsSupport.h"
18#include "llvm/Support/Debug.h"
19#include <optional>
20
21#define DEBUG_TYPE "flang-lower-character"
22
23//===----------------------------------------------------------------------===//
24// CharacterExprHelper implementation
25//===----------------------------------------------------------------------===//
26
27/// Unwrap all the ref and box types and return the inner element type.
28static mlir::Type unwrapBoxAndRef(mlir::Type type) {
29 if (auto boxType = type.dyn_cast<fir::BoxCharType>())
30 return boxType.getEleTy();
31 while (true) {
32 type = fir::unwrapRefType(type);
33 if (auto boxTy = type.dyn_cast<fir::BoxType>())
34 type = boxTy.getEleTy();
35 else
36 break;
37 }
38 return type;
39}
40
41/// Unwrap base fir.char<kind,len> type.
42static fir::CharacterType recoverCharacterType(mlir::Type type) {
43 type = fir::unwrapSequenceType(unwrapBoxAndRef(type));
44 if (auto charTy = type.dyn_cast<fir::CharacterType>())
45 return charTy;
46 llvm::report_fatal_error("expected a character type");
47}
48
49bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
50 type = unwrapBoxAndRef(type);
51 return !type.isa<fir::SequenceType>() && fir::isa_char(type);
52}
53
54bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
55 type = unwrapBoxAndRef(type);
56 if (auto seqTy = type.dyn_cast<fir::SequenceType>())
57 return fir::isa_char(seqTy.getEleTy());
58 return false;
59}
60
61fir::CharacterType
62fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
63 assert(isCharacterScalar(type) && "expected scalar character");
64 return recoverCharacterType(type);
65}
66
67fir::CharacterType
68fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
69 return recoverCharacterType(type);
70}
71
72fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
73 const fir::CharBoxValue &box) {
74 return getCharacterType(box.getBuffer().getType());
75}
76
77fir::CharacterType
78fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
79 return getCharacterType(str.getType());
80}
81
82/// Determine the static size of the character. Returns the computed size, not
83/// an IR Value.
84static std::optional<fir::CharacterType::LenType>
85getCompileTimeLength(const fir::CharBoxValue &box) {
86 auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
87 if (len == fir::CharacterType::unknownLen())
88 return {};
89 return len;
90}
91
92/// Detect the precondition that the value `str` does not reside in memory. Such
93/// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
94LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
95 return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
96}
97
98/// This is called only if `str` does not reside in memory. Such a bare string
99/// value will be converted into a memory-based temporary and an extended
100/// boxchar value returned.
101fir::CharBoxValue
102fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
103 assert(needToMaterialize(str));
104 auto ty = str.getType();
105 assert(isCharacterScalar(ty) && "expected scalar character");
106 auto charTy = ty.dyn_cast<fir::CharacterType>();
107 if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
108 LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
109 llvm_unreachable("must be a !fir.char<N> type");
110 }
111 auto len = builder.createIntegerConstant(
112 loc, builder.getCharacterLengthType(), charTy.getLen());
113 auto temp = builder.create<fir::AllocaOp>(loc, charTy);
114 builder.create<fir::StoreOp>(loc, str, temp);
115 LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
116 << ", " << len << ")\n");
117 return {temp, len};
118}
119
120fir::ExtendedValue
121fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
122 mlir::Value len) {
123 auto lenType = builder.getCharacterLengthType();
124 auto type = character.getType();
125 auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
126 auto resultLen = len;
127 llvm::SmallVector<mlir::Value> extents;
128
129 if (auto eleType = fir::dyn_cast_ptrEleTy(type))
130 type = eleType;
131
132 if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
133 type = arrayType.getEleTy();
134 auto indexType = builder.getIndexType();
135 for (auto extent : arrayType.getShape()) {
136 if (extent == fir::SequenceType::getUnknownExtent())
137 break;
138 extents.emplace_back(
139 builder.createIntegerConstant(loc, indexType, extent));
140 }
141 // Last extent might be missing in case of assumed-size. If more extents
142 // could not be deduced from type, that's an error (a fir.box should
143 // have been used in the interface).
144 if (extents.size() + 1 < arrayType.getShape().size())
145 mlir::emitError(loc, "cannot retrieve array extents from type");
146 }
147
148 if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
149 if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
150 resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
151 } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
152 auto refType = builder.getRefType(boxCharType.getEleTy());
153 // If the embox is accessible, use its operand to avoid filling
154 // the generated fir with embox/unbox.
155 mlir::Value boxCharLen;
156 if (auto definingOp = character.getDefiningOp()) {
157 if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) {
158 base = box.getMemref();
159 boxCharLen = box.getLen();
160 }
161 }
162 if (!boxCharLen) {
163 auto unboxed =
164 builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
165 base = builder.createConvert(loc, refType, unboxed.getResult(0));
166 boxCharLen = unboxed.getResult(1);
167 }
168 if (!resultLen) {
169 resultLen = boxCharLen;
170 }
171 } else if (type.isa<fir::BoxType>()) {
172 mlir::emitError(loc, "descriptor or derived type not yet handled");
173 } else {
174 llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
175 }
176
177 if (!base) {
178 if (auto load =
179 mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
180 base = load.getOperand();
181 } else {
182 return materializeValue(fir::getBase(character));
183 }
184 }
185 if (!resultLen)
186 llvm::report_fatal_error("no dynamic length found for character");
187 if (!extents.empty())
188 return fir::CharArrayBoxValue{base, resultLen, extents};
189 return fir::CharBoxValue{base, resultLen};
190}
191
192static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
193 return fir::CharacterType::getSingleton(ctxt, kind);
194}
195
196mlir::Value
197fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
198 // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
199 // type)
200 auto charTy = recoverCharacterType(box.getBuffer().getType());
201 auto boxCharType =
202 fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
203 auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
204 mlir::Value buff = box.getBuffer();
205 // fir.boxchar requires a memory reference. Allocate temp if the character is
206 // not in memory.
207 if (!fir::isa_ref_type(buff.getType())) {
208 auto temp = builder.createTemporary(loc, buff.getType());
209 builder.create<fir::StoreOp>(loc, buff, temp);
210 buff = temp;
211 }
212 // fir.emboxchar only accepts scalar, cast array buffer to a scalar buffer.
213 if (mlir::isa<fir::SequenceType>(fir::dyn_cast_ptrEleTy(buff.getType())))
214 buff = builder.createConvert(loc, refType, buff);
215 // Convert in case the provided length is not of the integer type that must
216 // be used in boxchar.
217 auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
218 box.getLen());
219 return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
220}
221
222fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
223 const fir::CharArrayBoxValue &box) {
224 if (box.getBuffer().getType().isa<fir::PointerType>())
225 TODO(loc, "concatenating non contiguous character array into a scalar");
226
227 // TODO: add a fast path multiplying new length at compile time if the info is
228 // in the array type.
229 auto lenType = builder.getCharacterLengthType();
230 auto len = builder.createConvert(loc, lenType, box.getLen());
231 for (auto extent : box.getExtents())
232 len = builder.create<mlir::arith::MulIOp>(
233 loc, len, builder.createConvert(loc, lenType, extent));
234
235 // TODO: typeLen can be improved in compiled constant cases
236 // TODO: allow bare fir.array<> (no ref) conversion here ?
237 auto typeLen = fir::CharacterType::unknownLen();
238 auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
239 auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
240 auto type = fir::ReferenceType::get(charTy);
241 auto buffer = builder.createConvert(loc, type, box.getBuffer());
242 return {buffer, len};
243}
244
245mlir::Value fir::factory::CharacterExprHelper::createEmbox(
246 const fir::CharArrayBoxValue &box) {
247 // Use same embox as for scalar. It's losing the actual data size information
248 // (We do not multiply the length by the array size), but that is what Fortran
249 // call interfaces using boxchar expect.
250 return createEmbox(static_cast<const fir::CharBoxValue &>(box));
251}
252
253/// Get the address of the element at position \p index of the scalar character
254/// \p buffer.
255/// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
256/// unknown. \p index must have any integer type, and is zero based. The return
257/// value is a singleton address (!fir.ref<!fir.char<kind>>)
258mlir::Value
259fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
260 mlir::Value index) {
261 // The only way to address an element of a fir.ref<char<kind, len>> is to cast
262 // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
263 auto bufferType = buffer.getType();
264 assert(fir::isa_ref_type(bufferType));
265 assert(isCharacterScalar(bufferType));
266 auto charTy = recoverCharacterType(bufferType);
267 auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
268 auto singleRefTy = builder.getRefType(singleTy);
269 auto extent = fir::SequenceType::getUnknownExtent();
270 if (charTy.getLen() != fir::CharacterType::unknownLen())
271 extent = charTy.getLen();
272 auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
273
274 auto coor = builder.createConvert(loc, coorTy, buffer);
275 auto i = builder.createConvert(loc, builder.getIndexType(), index);
276 return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
277}
278
279/// Load a character out of `buff` from offset `index`.
280/// `buff` must be a reference to memory.
281mlir::Value
282fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
283 mlir::Value index) {
284 LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
285 << buff.getType() << " at: " << index << '\n');
286 return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
287}
288
289/// Store the singleton character `c` to `str` at offset `index`.
290/// `str` must be a reference to memory.
291void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
292 mlir::Value index,
293 mlir::Value c) {
294 LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
295 << " type: " << str.getType() << " at: " << index
296 << '\n');
297 auto addr = createElementAddr(str, index);
298 builder.create<fir::StoreOp>(loc, c, addr);
299}
300
301// FIXME: this temp is useless... either fir.coordinate_of needs to
302// work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
303// character should never be loaded.
304// If this is a fir.array<>, allocate and store the value so that
305// fir.cooridnate_of can be use on the value.
306mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
307 const fir::CharBoxValue &box) {
308 auto buff = box.getBuffer();
309 if (fir::isa_char(buff.getType())) {
310 auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
311 builder.create<fir::StoreOp>(loc, buff, newBuff);
312 return newBuff;
313 }
314 return buff;
315}
316
317/// Create a loop to copy `count` characters from `src` to `dest`. Note that the
318/// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
319void fir::factory::CharacterExprHelper::createCopy(
320 const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
321 mlir::Value count) {
322 auto fromBuff = getCharBoxBuffer(src);
323 auto toBuff = getCharBoxBuffer(dest);
324 LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
325 llvm::dbgs() << " to: "; dest.dump();
326 llvm::dbgs() << " count: " << count << '\n');
327 auto kind = getCharacterKind(src.getBuffer().getType());
328 // If the src and dest are the same KIND, then use memmove to move the bits.
329 // We don't have to worry about overlapping ranges with memmove.
330 if (getCharacterKind(dest.getBuffer().getType()) == kind) {
331 auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
332 auto i64Ty = builder.getI64Type();
333 auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
334 auto castCount = builder.createConvert(loc, i64Ty, count);
335 auto totalBytes =
336 builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount);
337 auto notVolatile = builder.createBool(loc, false);
338 auto memmv = getLlvmMemmove(builder);
339 auto argTys = memmv.getFunctionType().getInputs();
340 auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
341 auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
342 builder.create<fir::CallOp>(
343 loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
344 return;
345 }
346
347 // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
348 builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
349 dest.getBuffer());
350}
351
352void fir::factory::CharacterExprHelper::createPadding(
353 const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
354 auto blank = createBlankConstant(getCharacterType(str));
355 // Always create the loop, if upper < lower, no iteration will be
356 // executed.
357 auto toBuff = getCharBoxBuffer(str);
358 fir::factory::DoLoopHelper{builder, loc}.createLoop(
359 lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
360 createStoreCharAt(toBuff, index, blank);
361 });
362}
363
364fir::CharBoxValue
365fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
366 mlir::Value len) {
367 auto kind = recoverCharacterType(type).getFKind();
368 auto typeLen = fir::CharacterType::unknownLen();
369 // If len is a constant, reflect the length in the type.
370 if (auto cstLen = getIntIfConstant(len))
371 typeLen = *cstLen;
372 auto *ctxt = builder.getContext();
373 auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
374 llvm::SmallVector<mlir::Value> lenParams;
375 if (typeLen == fir::CharacterType::unknownLen())
376 lenParams.push_back(len);
377 auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
378 /*shape=*/std::nullopt, lenParams);
379 return {ref, len};
380}
381
382fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
383 const fir::ExtendedValue &source) {
384 const auto *charBox = source.getCharBox();
385 if (!charBox)
386 fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
387 auto len = charBox->getLen();
388 auto sourceTy = charBox->getBuffer().getType();
389 auto temp = createCharacterTemp(sourceTy, len);
390 if (fir::isa_ref_type(sourceTy)) {
391 createCopy(temp, *charBox, len);
392 } else {
393 auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
394 temp.getBuffer());
395 builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
396 }
397 return temp;
398}
399
400// Simple length one character assignment without loops.
401void fir::factory::CharacterExprHelper::createLengthOneAssign(
402 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
403 auto addr = lhs.getBuffer();
404 auto toTy = fir::unwrapRefType(addr.getType());
405 mlir::Value val = rhs.getBuffer();
406 if (fir::isa_ref_type(val.getType())) {
407 auto fromCharLen1RefTy = builder.getRefType(getSingletonCharType(
408 builder.getContext(),
409 getCharacterKind(fir::unwrapRefType(val.getType()))));
410 val = builder.create<fir::LoadOp>(
411 loc, builder.createConvert(loc, fromCharLen1RefTy, val));
412 }
413 auto toCharLen1Ty =
414 getSingletonCharType(builder.getContext(), getCharacterKind(toTy));
415 val = builder.createConvert(loc, toCharLen1Ty, val);
416 builder.create<fir::StoreOp>(
417 loc, val,
418 builder.createConvert(loc, builder.getRefType(toCharLen1Ty), addr));
419}
420
421/// Returns the minimum of integer mlir::Value \p a and \b.
422mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
423 mlir::Value a, mlir::Value b) {
424 auto cmp = builder.create<mlir::arith::CmpIOp>(
425 loc, mlir::arith::CmpIPredicate::slt, a, b);
426 return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
427}
428
429void fir::factory::CharacterExprHelper::createAssign(
430 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
431 auto rhsCstLen = getCompileTimeLength(rhs);
432 auto lhsCstLen = getCompileTimeLength(lhs);
433 bool compileTimeSameLength = false;
434 bool isLengthOneAssign = false;
435
436 if (lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen) {
437 compileTimeSameLength = true;
438 if (*lhsCstLen == 1)
439 isLengthOneAssign = true;
440 } else if (rhs.getLen() == lhs.getLen()) {
441 compileTimeSameLength = true;
442
443 // If the length values are the same for LHS and RHS,
444 // then we can rely on the constant length deduced from
445 // any of the two types.
446 if (lhsCstLen && *lhsCstLen == 1)
447 isLengthOneAssign = true;
448 if (rhsCstLen && *rhsCstLen == 1)
449 isLengthOneAssign = true;
450
451 // We could have recognized constant operations here (e.g.
452 // two different arith.constant ops may produce the same value),
453 // but for now leave it to CSE to get rid of the duplicates.
454 }
455 if (isLengthOneAssign) {
456 createLengthOneAssign(lhs, rhs);
457 return;
458 }
459
460 // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
461 // if needed.
462 auto copyCount = lhs.getLen();
463 auto idxTy = builder.getIndexType();
464 if (!compileTimeSameLength) {
465 auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
466 auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
467 copyCount = genMin(builder, loc, lhsLen, rhsLen);
468 }
469
470 // Actual copy
471 createCopy(lhs, rhs, copyCount);
472
473 // Pad if needed.
474 if (!compileTimeSameLength) {
475 auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
476 auto maxPadding =
477 builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one);
478 createPadding(lhs, copyCount, maxPadding);
479 }
480}
481
482fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
483 const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
484 auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
485 lhs.getLen());
486 auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
487 rhs.getLen());
488 mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen);
489 auto temp = createCharacterTemp(getCharacterType(rhs), len);
490 createCopy(temp, lhs, lhsLen);
491 auto one = builder.createIntegerConstant(loc, len.getType(), 1);
492 auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one);
493 auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
494 auto fromBuff = getCharBoxBuffer(rhs);
495 auto toBuff = getCharBoxBuffer(temp);
496 fir::factory::DoLoopHelper{builder, loc}.createLoop(
497 lhsLenIdx, upperBound, one,
498 [&](fir::FirOpBuilder &bldr, mlir::Value index) {
499 auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx);
500 auto charVal = createLoadCharAt(fromBuff, rhsIndex);
501 createStoreCharAt(toBuff, index, charVal);
502 });
503 return temp;
504}
505
506mlir::Value fir::factory::CharacterExprHelper::genSubstringBase(
507 mlir::Value stringRawAddr, mlir::Value lowerBound,
508 mlir::Type substringAddrType, mlir::Value one) {
509 if (!one)
510 one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
511 auto offset =
512 builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult();
513 auto addr = createElementAddr(stringRawAddr, offset);
514 return builder.createConvert(loc, substringAddrType, addr);
515}
516
517fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
518 const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
519 // Constant need to be materialize in memory to use fir.coordinate_of.
520 auto nbounds = bounds.size();
521 if (nbounds < 1 || nbounds > 2) {
522 mlir::emitError(loc, "Incorrect number of bounds in substring");
523 return {mlir::Value{}, mlir::Value{}};
524 }
525 mlir::SmallVector<mlir::Value> castBounds;
526 // Convert bounds to length type to do safe arithmetic on it.
527 for (auto bound : bounds)
528 castBounds.push_back(
529 builder.createConvert(loc, builder.getCharacterLengthType(), bound));
530 auto lowerBound = castBounds[0];
531 // FIR CoordinateOp is zero based but Fortran substring are one based.
532 auto kind = getCharacterKind(box.getBuffer().getType());
533 auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
534 auto resultType = builder.getRefType(charTy);
535 auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
536 auto substringRef =
537 genSubstringBase(box.getBuffer(), lowerBound, resultType, one);
538
539 // Compute the length.
540 mlir::Value substringLen;
541 if (nbounds < 2) {
542 substringLen =
543 builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]);
544 } else {
545 substringLen =
546 builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]);
547 }
548 substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one);
549
550 // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
551 auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
552 auto cdt = builder.create<mlir::arith::CmpIOp>(
553 loc, mlir::arith::CmpIPredicate::slt, substringLen, zero);
554 substringLen =
555 builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
556
557 return {substringRef, substringLen};
558}
559
560mlir::Value
561fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
562 // Note: Runtime for LEN_TRIM should also be available at some
563 // point. For now use an inlined implementation.
564 auto indexType = builder.getIndexType();
565 auto len = builder.createConvert(loc, indexType, str.getLen());
566 auto one = builder.createIntegerConstant(loc, indexType, 1);
567 auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
568 auto zero = builder.createIntegerConstant(loc, indexType, 0);
569 auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
570 auto blank = createBlankConstantCode(getCharacterType(str));
571 mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one);
572
573 auto iterWhile =
574 builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
575 /*returnFinalCount=*/false, lastChar);
576 auto insPt = builder.saveInsertionPoint();
577 builder.setInsertionPointToStart(iterWhile.getBody());
578 auto index = iterWhile.getInductionVar();
579 // Look for first non-blank from the right of the character.
580 auto fromBuff = getCharBoxBuffer(str);
581 auto elemAddr = createElementAddr(fromBuff, index);
582 auto codeAddr =
583 builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
584 auto c = builder.create<fir::LoadOp>(loc, codeAddr);
585 auto isBlank = builder.create<mlir::arith::CmpIOp>(
586 loc, mlir::arith::CmpIPredicate::eq, blank, c);
587 llvm::SmallVector<mlir::Value> results = {isBlank, index};
588 builder.create<fir::ResultOp>(loc, results);
589 builder.restoreInsertionPoint(insPt);
590 // Compute length after iteration (zero if all blanks)
591 mlir::Value newLen =
592 builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one);
593 auto result = builder.create<mlir::arith::SelectOp>(
594 loc, iterWhile.getResult(0), zero, newLen);
595 return builder.createConvert(loc, builder.getCharacterLengthType(), result);
596}
597
598fir::CharBoxValue
599fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
600 int len) {
601 assert(len >= 0 && "expected positive length");
602 auto kind = recoverCharacterType(type).getFKind();
603 auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
604 auto addr = builder.create<fir::AllocaOp>(loc, charType);
605 auto mlirLen =
606 builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
607 return {addr, mlirLen};
608}
609
610// Returns integer with code for blank. The integer has the same
611// size as the character. Blank has ascii space code for all kinds.
612mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
613 fir::CharacterType type) {
614 auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
615 auto intType = builder.getIntegerType(bits);
616 return builder.createIntegerConstant(loc, intType, ' ');
617}
618
619mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
620 fir::CharacterType type) {
621 return createSingletonFromCode(createBlankConstantCode(type),
622 type.getFKind());
623}
624
625void fir::factory::CharacterExprHelper::createAssign(
626 const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
627 if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
628 if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
629 createAssign(*to, *str);
630 return;
631 }
632 }
633 TODO(loc, "character array assignment");
634 // Note that it is not sure the array aspect should be handled
635 // by this utility.
636}
637
638mlir::Value
639fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
640 mlir::Value len) {
641 return createEmbox(fir::CharBoxValue{addr, len});
642}
643
644std::pair<mlir::Value, mlir::Value>
645fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
646 using T = std::pair<mlir::Value, mlir::Value>;
647 return toExtendedValue(boxChar).match(
648 [](const fir::CharBoxValue &b) -> T {
649 return {b.getBuffer(), b.getLen()};
650 },
651 [](const fir::CharArrayBoxValue &b) -> T {
652 return {b.getBuffer(), b.getLen()};
653 },
654 [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
655}
656
657bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
658 if (auto seqType = type.dyn_cast<fir::SequenceType>())
659 return (seqType.getShape().size() == 1) &&
660 fir::isa_char(seqType.getEleTy());
661 return false;
662}
663
664fir::KindTy
665fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
666 assert(isCharacterScalar(type) && "expected scalar character");
667 return recoverCharacterType(type).getFKind();
668}
669
670fir::KindTy
671fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
672 return recoverCharacterType(type).getFKind();
673}
674
675bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
676 const fir::ExtendedValue &exv) {
677 auto charTy = recoverCharacterType(fir::getBase(exv).getType());
678 return charTy.hasConstantLen();
679}
680
681mlir::Value
682fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
683 int kind) {
684 auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
685 auto bits = builder.getKindMap().getCharacterBitsize(kind);
686 auto intType = builder.getIntegerType(bits);
687 auto cast = builder.createConvert(loc, intType, code);
688 auto undef = builder.create<fir::UndefOp>(loc, charType);
689 auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
690 return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
691 builder.getArrayAttr(zero));
692}
693
694mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
695 mlir::Value singleton) {
696 auto type = getCharacterType(singleton);
697 assert(type.getLen() == 1);
698 auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
699 auto intType = builder.getIntegerType(bits);
700 auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
701 return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
702 builder.getArrayAttr(zero));
703}
704
705mlir::Value
706fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
707 auto charTy = recoverCharacterType(box.getType());
708 return readLengthFromBox(box, charTy);
709}
710
711mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox(
712 mlir::Value box, fir::CharacterType charTy) {
713 auto lenTy = builder.getCharacterLengthType();
714 auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
715 auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
716 auto width = bits / 8;
717 if (width > 1) {
718 auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
719 return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal);
720 }
721 return size;
722}
723
724mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
725 auto memrefType = memref.getType();
726 auto charType = recoverCharacterType(memrefType);
727 assert(charType && "must be a character type");
728 if (charType.hasConstantLen())
729 return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
730 charType.getLen());
731 if (memrefType.isa<fir::BoxType>())
732 return readLengthFromBox(memref);
733 if (memrefType.isa<fir::BoxCharType>())
734 return createUnboxChar(memref).second;
735
736 // Length cannot be deduced from memref.
737 return {};
738}
739
740std::pair<mlir::Value, mlir::Value>
741fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
742 mlir::Location loc,
743 mlir::Value tuple,
744 bool openBoxProc) {
745 mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>();
746 mlir::Value addr = builder.create<fir::ExtractValueOp>(
747 loc, tupleType.getType(0), tuple,
748 builder.getArrayAttr(
749 {builder.getIntegerAttr(builder.getIndexType(), 0)}));
750 mlir::Value proc = [&]() -> mlir::Value {
751 if (openBoxProc)
752 if (auto addrTy = addr.getType().dyn_cast<fir::BoxProcType>())
753 return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr);
754 return addr;
755 }();
756 mlir::Value len = builder.create<fir::ExtractValueOp>(
757 loc, tupleType.getType(1), tuple,
758 builder.getArrayAttr(
759 {builder.getIntegerAttr(builder.getIndexType(), 1)}));
760 return {proc, len};
761}
762
763mlir::Value fir::factory::createCharacterProcedureTuple(
764 fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
765 mlir::Value addr, mlir::Value len) {
766 mlir::TupleType tupleType = argTy.cast<mlir::TupleType>();
767 addr = builder.createConvert(loc, tupleType.getType(0), addr);
768 if (len)
769 len = builder.createConvert(loc, tupleType.getType(1), len);
770 else
771 len = builder.create<fir::UndefOp>(loc, tupleType.getType(1));
772 mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
773 tuple = builder.create<fir::InsertValueOp>(
774 loc, tupleType, tuple, addr,
775 builder.getArrayAttr(
776 {builder.getIntegerAttr(builder.getIndexType(), 0)}));
777 tuple = builder.create<fir::InsertValueOp>(
778 loc, tupleType, tuple, len,
779 builder.getArrayAttr(
780 {builder.getIntegerAttr(builder.getIndexType(), 1)}));
781 return tuple;
782}
783
784mlir::Type
785fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
786 mlir::MLIRContext *context = funcPointerType.getContext();
787 mlir::Type lenType = mlir::IntegerType::get(context, 64);
788 return mlir::TupleType::get(context, {funcPointerType, lenType});
789}
790
791fir::CharBoxValue fir::factory::CharacterExprHelper::createCharExtremum(
792 bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs) {
793 // inputs: we are given a vector of all of the charboxes of the arguments
794 // passed to hlfir.char_extremum, as well as the predicate for whether we
795 // want llt or lgt
796 //
797 // note: we know that, regardless of whether we're looking at smallest or
798 // largest char, the size of the output buffer will be the same size as the
799 // largest character out of all of the operands. so, we find the biggest
800 // length first. It's okay if these char lengths are not known at compile
801 // time.
802
803 fir::CharBoxValue firstCBV = opCBVs[0];
804 mlir::Value firstBuf = getCharBoxBuffer(firstCBV);
805 auto firstLen = builder.createConvert(loc, builder.getCharacterLengthType(),
806 firstCBV.getLen());
807
808 mlir::Value resultBuf = firstBuf;
809 mlir::Value resultLen = firstLen;
810 mlir::Value biggestLen = firstLen;
811
812 // values for casting buf type and len type
813 auto typeLen = fir::CharacterType::unknownLen();
814 auto kind = recoverCharacterType(firstBuf.getType()).getFKind();
815 auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
816 auto type = fir::ReferenceType::get(charTy);
817
818 size_t numOperands = opCBVs.size();
819 for (size_t cbv_idx = 1; cbv_idx < numOperands; ++cbv_idx) {
820 auto currChar = opCBVs[cbv_idx];
821 auto currBuf = getCharBoxBuffer(currChar);
822 auto currLen = builder.createConvert(loc, builder.getCharacterLengthType(),
823 currChar.getLen());
824 // biggest len result
825 mlir::Value lhsBigger = builder.create<mlir::arith::CmpIOp>(
826 loc, mlir::arith::CmpIPredicate::uge, biggestLen, currLen);
827 biggestLen = builder.create<mlir::arith::SelectOp>(loc, lhsBigger,
828 biggestLen, currLen);
829
830 auto cmp = predIsMin ? mlir::arith::CmpIPredicate::slt
831 : mlir::arith::CmpIPredicate::sgt;
832
833 // lexical compare result
834 mlir::Value resultCmp = fir::runtime::genCharCompare(
835 builder, loc, cmp, currBuf, currLen, resultBuf, resultLen);
836
837 // it's casting (to unknown size) time!
838 resultBuf = builder.createConvert(loc, type, resultBuf);
839 currBuf = builder.createConvert(loc, type, currBuf);
840
841 resultBuf = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currBuf,
842 resultBuf);
843 resultLen = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currLen,
844 resultLen);
845 }
846
847 // now that we know the lexicographically biggest/smallest char and which char
848 // had the biggest len, we can populate a temp CBV and return it
849 fir::CharBoxValue temp = createCharacterTemp(resultBuf.getType(), biggestLen);
850 auto toBuf = temp;
851 fir::CharBoxValue fromBuf{resultBuf, resultLen};
852 createAssign(toBuf, fromBuf);
853 return temp;
854}
855
856fir::CharBoxValue
857fir::factory::convertCharacterKind(fir::FirOpBuilder &builder,
858 mlir::Location loc,
859 fir::CharBoxValue srcBoxChar, int toKind) {
860 // Use char_convert. Each code point is translated from a
861 // narrower/wider encoding to the target encoding. For example, 'A'
862 // may be translated from 0x41 : i8 to 0x0041 : i16. The symbol
863 // for euro (0x20AC : i16) may be translated from a wide character
864 // to "0xE2 0x82 0xAC" : UTF-8.
865 mlir::Value bufferSize = srcBoxChar.getLen();
866 auto kindMap = builder.getKindMap();
867 mlir::Value boxCharAddr = srcBoxChar.getAddr();
868 auto fromTy = boxCharAddr.getType();
869 if (auto charTy = fromTy.dyn_cast<fir::CharacterType>()) {
870 // boxchar is a value, not a variable. Turn it into a temporary.
871 // As a value, it ought to have a constant LEN value.
872 assert(charTy.hasConstantLen() && "must have constant length");
873 mlir::Value tmp = builder.createTemporary(loc, charTy);
874 builder.create<fir::StoreOp>(loc, boxCharAddr, tmp);
875 boxCharAddr = tmp;
876 }
877 auto fromBits = kindMap.getCharacterBitsize(
878 fir::unwrapRefType(fromTy).cast<fir::CharacterType>().getFKind());
879 auto toBits = kindMap.getCharacterBitsize(toKind);
880 if (toBits < fromBits) {
881 // Scale by relative ratio to give a buffer of the same length.
882 auto ratio = builder.createIntegerConstant(loc, bufferSize.getType(),
883 fromBits / toBits);
884 bufferSize = builder.create<mlir::arith::MulIOp>(loc, bufferSize, ratio);
885 }
886 mlir::Type toType =
887 fir::CharacterType::getUnknownLen(builder.getContext(), toKind);
888 auto dest = builder.createTemporary(loc, toType, /*name=*/{}, /*shape=*/{},
889 mlir::ValueRange{bufferSize});
890 builder.create<fir::CharConvertOp>(loc, boxCharAddr, srcBoxChar.getLen(),
891 dest);
892 return fir::CharBoxValue{dest, srcBoxChar.getLen()};
893}
894

source code of flang/lib/Optimizer/Builder/Character.cpp