1 | //===-- Character.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "flang/Optimizer/Builder/Character.h" |
14 | #include "flang/Optimizer/Builder/DoLoopHelper.h" |
15 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
16 | #include "flang/Optimizer/Builder/Todo.h" |
17 | #include "flang/Optimizer/Dialect/FIROpsSupport.h" |
18 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
19 | #include "llvm/Support/Debug.h" |
20 | #include <optional> |
21 | |
22 | #define DEBUG_TYPE "flang-lower-character" |
23 | |
24 | //===----------------------------------------------------------------------===// |
25 | // CharacterExprHelper implementation |
26 | //===----------------------------------------------------------------------===// |
27 | |
28 | /// Unwrap all the ref and box types and return the inner element type. |
29 | static mlir::Type unwrapBoxAndRef(mlir::Type type) { |
30 | if (auto boxType = mlir::dyn_cast<fir::BoxCharType>(type)) |
31 | return boxType.getEleTy(); |
32 | while (true) { |
33 | type = fir::unwrapRefType(type); |
34 | if (auto boxTy = mlir::dyn_cast<fir::BoxType>(type)) |
35 | type = boxTy.getEleTy(); |
36 | else |
37 | break; |
38 | } |
39 | return type; |
40 | } |
41 | |
42 | /// Unwrap base fir.char<kind,len> type. |
43 | static fir::CharacterType recoverCharacterType(mlir::Type type) { |
44 | type = fir::unwrapSequenceType(unwrapBoxAndRef(type)); |
45 | if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type)) |
46 | return charTy; |
47 | llvm::report_fatal_error(reason: "expected a character type" ); |
48 | } |
49 | |
50 | bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) { |
51 | type = unwrapBoxAndRef(type); |
52 | return !mlir::isa<fir::SequenceType>(type) && fir::isa_char(type); |
53 | } |
54 | |
55 | bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) { |
56 | type = unwrapBoxAndRef(type); |
57 | if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(type)) |
58 | return fir::isa_char(seqTy.getEleTy()); |
59 | return false; |
60 | } |
61 | |
62 | fir::CharacterType |
63 | fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) { |
64 | assert(isCharacterScalar(type) && "expected scalar character" ); |
65 | return recoverCharacterType(type); |
66 | } |
67 | |
68 | fir::CharacterType |
69 | fir::factory::CharacterExprHelper::getCharType(mlir::Type type) { |
70 | return recoverCharacterType(type); |
71 | } |
72 | |
73 | fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType( |
74 | const fir::CharBoxValue &box) { |
75 | return getCharacterType(box.getBuffer().getType()); |
76 | } |
77 | |
78 | fir::CharacterType |
79 | fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) { |
80 | return getCharacterType(str.getType()); |
81 | } |
82 | |
83 | /// Determine the static size of the character. Returns the computed size, not |
84 | /// an IR Value. |
85 | static std::optional<fir::CharacterType::LenType> |
86 | getCompileTimeLength(const fir::CharBoxValue &box) { |
87 | auto len = recoverCharacterType(box.getBuffer().getType()).getLen(); |
88 | if (len == fir::CharacterType::unknownLen()) |
89 | return {}; |
90 | return len; |
91 | } |
92 | |
93 | /// Detect the precondition that the value `str` does not reside in memory. Such |
94 | /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`. |
95 | LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) { |
96 | return mlir::isa<fir::SequenceType>(str.getType()) || |
97 | fir::isa_char(str.getType()); |
98 | } |
99 | |
100 | /// This is called only if `str` does not reside in memory. Such a bare string |
101 | /// value will be converted into a memory-based temporary and an extended |
102 | /// boxchar value returned. |
103 | fir::CharBoxValue |
104 | fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) { |
105 | assert(needToMaterialize(str)); |
106 | auto ty = str.getType(); |
107 | assert(isCharacterScalar(ty) && "expected scalar character" ); |
108 | auto charTy = mlir::dyn_cast<fir::CharacterType>(ty); |
109 | if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) { |
110 | LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n'); |
111 | llvm_unreachable("must be a !fir.char<N> type" ); |
112 | } |
113 | auto len = builder.createIntegerConstant( |
114 | loc, builder.getCharacterLengthType(), charTy.getLen()); |
115 | auto temp = builder.create<fir::AllocaOp>(loc, charTy); |
116 | builder.create<fir::StoreOp>(loc, str, temp); |
117 | LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp |
118 | << ", " << len << ")\n" ); |
119 | return {temp, len}; |
120 | } |
121 | |
122 | fir::ExtendedValue |
123 | fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character, |
124 | mlir::Value len) { |
125 | auto lenType = builder.getCharacterLengthType(); |
126 | auto type = character.getType(); |
127 | auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{}; |
128 | auto resultLen = len; |
129 | llvm::SmallVector<mlir::Value> extents; |
130 | |
131 | if (auto eleType = fir::dyn_cast_ptrEleTy(type)) |
132 | type = eleType; |
133 | |
134 | if (auto arrayType = mlir::dyn_cast<fir::SequenceType>(type)) { |
135 | type = arrayType.getEleTy(); |
136 | auto indexType = builder.getIndexType(); |
137 | for (auto extent : arrayType.getShape()) { |
138 | if (extent == fir::SequenceType::getUnknownExtent()) |
139 | break; |
140 | extents.emplace_back( |
141 | builder.createIntegerConstant(loc, indexType, extent)); |
142 | } |
143 | // Last extent might be missing in case of assumed-size. If more extents |
144 | // could not be deduced from type, that's an error (a fir.box should |
145 | // have been used in the interface). |
146 | if (extents.size() + 1 < arrayType.getShape().size()) |
147 | mlir::emitError(loc, "cannot retrieve array extents from type" ); |
148 | } |
149 | |
150 | if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type)) { |
151 | if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen()) |
152 | resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen()); |
153 | } else if (auto boxCharType = mlir::dyn_cast<fir::BoxCharType>(type)) { |
154 | auto refType = builder.getRefType(boxCharType.getEleTy()); |
155 | // If the embox is accessible, use its operand to avoid filling |
156 | // the generated fir with embox/unbox. |
157 | mlir::Value boxCharLen; |
158 | if (auto definingOp = character.getDefiningOp()) { |
159 | if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) { |
160 | base = box.getMemref(); |
161 | boxCharLen = box.getLen(); |
162 | } |
163 | } |
164 | if (!boxCharLen) { |
165 | auto unboxed = |
166 | builder.create<fir::UnboxCharOp>(loc, refType, lenType, character); |
167 | base = builder.createConvert(loc, refType, unboxed.getResult(0)); |
168 | boxCharLen = unboxed.getResult(1); |
169 | } |
170 | if (!resultLen) { |
171 | resultLen = boxCharLen; |
172 | } |
173 | } else if (mlir::isa<fir::BoxType>(type)) { |
174 | mlir::emitError(loc, "descriptor or derived type not yet handled" ); |
175 | } else { |
176 | llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue" ); |
177 | } |
178 | |
179 | if (!base) { |
180 | if (auto load = |
181 | mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) { |
182 | base = load.getOperand(); |
183 | } else { |
184 | return materializeValue(fir::getBase(character)); |
185 | } |
186 | } |
187 | if (!resultLen) |
188 | llvm::report_fatal_error("no dynamic length found for character" ); |
189 | if (!extents.empty()) |
190 | return fir::CharArrayBoxValue{base, resultLen, extents}; |
191 | return fir::CharBoxValue{base, resultLen}; |
192 | } |
193 | |
194 | static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) { |
195 | return fir::CharacterType::getSingleton(ctxt, kind); |
196 | } |
197 | |
198 | mlir::Value |
199 | fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) { |
200 | // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar |
201 | // type) |
202 | auto charTy = recoverCharacterType(box.getBuffer().getType()); |
203 | auto boxCharType = |
204 | fir::BoxCharType::get(builder.getContext(), charTy.getFKind()); |
205 | auto refType = fir::ReferenceType::get(boxCharType.getEleTy()); |
206 | mlir::Value buff = box.getBuffer(); |
207 | // fir.boxchar requires a memory reference. Allocate temp if the character is |
208 | // not in memory. |
209 | if (!fir::isa_ref_type(buff.getType())) { |
210 | auto temp = builder.createTemporary(loc, buff.getType()); |
211 | builder.create<fir::StoreOp>(loc, buff, temp); |
212 | buff = temp; |
213 | } |
214 | // fir.emboxchar only accepts scalar, cast array buffer to a scalar buffer. |
215 | if (mlir::isa<fir::SequenceType>(fir::dyn_cast_ptrEleTy(buff.getType()))) |
216 | buff = builder.createConvert(loc, refType, buff); |
217 | // Convert in case the provided length is not of the integer type that must |
218 | // be used in boxchar. |
219 | auto len = builder.createConvert(loc, builder.getCharacterLengthType(), |
220 | box.getLen()); |
221 | return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len); |
222 | } |
223 | |
224 | fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter( |
225 | const fir::CharArrayBoxValue &box) { |
226 | if (mlir::isa<fir::PointerType>(box.getBuffer().getType())) |
227 | TODO(loc, "concatenating non contiguous character array into a scalar" ); |
228 | |
229 | // TODO: add a fast path multiplying new length at compile time if the info is |
230 | // in the array type. |
231 | auto lenType = builder.getCharacterLengthType(); |
232 | auto len = builder.createConvert(loc, lenType, box.getLen()); |
233 | for (auto extent : box.getExtents()) |
234 | len = builder.create<mlir::arith::MulIOp>( |
235 | loc, len, builder.createConvert(loc, lenType, extent)); |
236 | |
237 | // TODO: typeLen can be improved in compiled constant cases |
238 | // TODO: allow bare fir.array<> (no ref) conversion here ? |
239 | auto typeLen = fir::CharacterType::unknownLen(); |
240 | auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind(); |
241 | auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen); |
242 | auto type = fir::ReferenceType::get(charTy); |
243 | auto buffer = builder.createConvert(loc, type, box.getBuffer()); |
244 | return {buffer, len}; |
245 | } |
246 | |
247 | mlir::Value fir::factory::CharacterExprHelper::createEmbox( |
248 | const fir::CharArrayBoxValue &box) { |
249 | // Use same embox as for scalar. It's losing the actual data size information |
250 | // (We do not multiply the length by the array size), but that is what Fortran |
251 | // call interfaces using boxchar expect. |
252 | return createEmbox(static_cast<const fir::CharBoxValue &>(box)); |
253 | } |
254 | |
255 | /// Get the address of the element at position \p index of the scalar character |
256 | /// \p buffer. |
257 | /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be |
258 | /// unknown. \p index must have any integer type, and is zero based. The return |
259 | /// value is a singleton address (!fir.ref<!fir.char<kind>>) |
260 | mlir::Value |
261 | fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer, |
262 | mlir::Value index) { |
263 | // The only way to address an element of a fir.ref<char<kind, len>> is to cast |
264 | // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of. |
265 | auto bufferType = buffer.getType(); |
266 | assert(fir::isa_ref_type(bufferType)); |
267 | assert(isCharacterScalar(bufferType)); |
268 | auto charTy = recoverCharacterType(bufferType); |
269 | auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind()); |
270 | auto singleRefTy = builder.getRefType(singleTy); |
271 | auto extent = fir::SequenceType::getUnknownExtent(); |
272 | if (charTy.getLen() != fir::CharacterType::unknownLen()) |
273 | extent = charTy.getLen(); |
274 | const bool isVolatile = fir::isa_volatile_type(buffer.getType()); |
275 | auto sequenceType = fir::SequenceType::get({extent}, singleTy); |
276 | auto coorTy = builder.getRefType(sequenceType, isVolatile); |
277 | |
278 | auto coor = builder.createConvert(loc, coorTy, buffer); |
279 | auto i = builder.createConvert(loc, builder.getIndexType(), index); |
280 | return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i); |
281 | } |
282 | |
283 | /// Load a character out of `buff` from offset `index`. |
284 | /// `buff` must be a reference to memory. |
285 | mlir::Value |
286 | fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff, |
287 | mlir::Value index) { |
288 | LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: " |
289 | << buff.getType() << " at: " << index << '\n'); |
290 | return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index)); |
291 | } |
292 | |
293 | /// Store the singleton character `c` to `str` at offset `index`. |
294 | /// `str` must be a reference to memory. |
295 | void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str, |
296 | mlir::Value index, |
297 | mlir::Value c) { |
298 | LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str |
299 | << " type: " << str.getType() << " at: " << index |
300 | << '\n'); |
301 | auto addr = createElementAddr(str, index); |
302 | builder.create<fir::StoreOp>(loc, c, addr); |
303 | } |
304 | |
305 | // FIXME: this temp is useless... either fir.coordinate_of needs to |
306 | // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or |
307 | // character should never be loaded. |
308 | // If this is a fir.array<>, allocate and store the value so that |
309 | // fir.cooridnate_of can be use on the value. |
310 | mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer( |
311 | const fir::CharBoxValue &box) { |
312 | auto buff = box.getBuffer(); |
313 | if (fir::isa_char(buff.getType())) { |
314 | auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType()); |
315 | builder.create<fir::StoreOp>(loc, buff, newBuff); |
316 | return newBuff; |
317 | } |
318 | return buff; |
319 | } |
320 | |
321 | /// Create a loop to copy `count` characters from `src` to `dest`. Note that the |
322 | /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.) |
323 | void fir::factory::CharacterExprHelper::createCopy( |
324 | const fir::CharBoxValue &dest, const fir::CharBoxValue &src, |
325 | mlir::Value count) { |
326 | auto fromBuff = getCharBoxBuffer(src); |
327 | auto toBuff = getCharBoxBuffer(dest); |
328 | LLVM_DEBUG(llvm::dbgs() << "create char copy from: " ; src.dump(); |
329 | llvm::dbgs() << " to: " ; dest.dump(); |
330 | llvm::dbgs() << " count: " << count << '\n'); |
331 | auto kind = getCharacterKind(src.getBuffer().getType()); |
332 | // If the src and dest are the same KIND, then use memmove to move the bits. |
333 | // We don't have to worry about overlapping ranges with memmove. |
334 | if (getCharacterKind(dest.getBuffer().getType()) == kind) { |
335 | const bool isVolatile = fir::isa_volatile_type(fromBuff.getType()) || |
336 | fir::isa_volatile_type(toBuff.getType()); |
337 | auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8; |
338 | auto i64Ty = builder.getI64Type(); |
339 | auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes); |
340 | auto castCount = builder.createConvert(loc, i64Ty, count); |
341 | auto totalBytes = |
342 | builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount); |
343 | auto llvmPointerType = |
344 | mlir::LLVM::LLVMPointerType::get(builder.getContext()); |
345 | auto toPtr = builder.createConvert(loc, llvmPointerType, toBuff); |
346 | auto fromPtr = builder.createConvert(loc, llvmPointerType, fromBuff); |
347 | builder.create<mlir::LLVM::MemmoveOp>(loc, toPtr, fromPtr, totalBytes, |
348 | isVolatile); |
349 | return; |
350 | } |
351 | |
352 | // Convert a CHARACTER of one KIND into a CHARACTER of another KIND. |
353 | builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count, |
354 | dest.getBuffer()); |
355 | } |
356 | |
357 | void fir::factory::CharacterExprHelper::createPadding( |
358 | const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) { |
359 | auto blank = createBlankConstant(getCharacterType(str)); |
360 | // Always create the loop, if upper < lower, no iteration will be |
361 | // executed. |
362 | auto toBuff = getCharBoxBuffer(str); |
363 | fir::factory::DoLoopHelper{builder, loc}.createLoop( |
364 | lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) { |
365 | createStoreCharAt(toBuff, index, blank); |
366 | }); |
367 | } |
368 | |
369 | fir::CharBoxValue |
370 | fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type, |
371 | mlir::Value len) { |
372 | auto kind = recoverCharacterType(type).getFKind(); |
373 | auto typeLen = fir::CharacterType::unknownLen(); |
374 | // If len is a constant, reflect the length in the type. |
375 | if (auto cstLen = getIntIfConstant(len)) |
376 | typeLen = *cstLen; |
377 | auto *ctxt = builder.getContext(); |
378 | auto charTy = fir::CharacterType::get(ctxt, kind, typeLen); |
379 | llvm::SmallVector<mlir::Value> lenParams; |
380 | if (typeLen == fir::CharacterType::unknownLen()) |
381 | lenParams.push_back(len); |
382 | auto ref = builder.allocateLocal(loc, charTy, "" , ".chrtmp" , |
383 | /*shape=*/std::nullopt, lenParams); |
384 | return {ref, len}; |
385 | } |
386 | |
387 | fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom( |
388 | const fir::ExtendedValue &source) { |
389 | const auto *charBox = source.getCharBox(); |
390 | if (!charBox) |
391 | fir::emitFatalError(loc, "source must be a fir::CharBoxValue" ); |
392 | auto len = charBox->getLen(); |
393 | auto sourceTy = charBox->getBuffer().getType(); |
394 | auto temp = createCharacterTemp(sourceTy, len); |
395 | if (fir::isa_ref_type(sourceTy)) { |
396 | createCopy(temp, *charBox, len); |
397 | } else { |
398 | auto ref = builder.createConvert(loc, builder.getRefType(sourceTy), |
399 | temp.getBuffer()); |
400 | builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref); |
401 | } |
402 | return temp; |
403 | } |
404 | |
405 | // Simple length one character assignment without loops. |
406 | void fir::factory::CharacterExprHelper::createLengthOneAssign( |
407 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
408 | auto addr = lhs.getBuffer(); |
409 | auto toTy = fir::unwrapRefType(addr.getType()); |
410 | mlir::Value val = rhs.getBuffer(); |
411 | if (fir::isa_ref_type(val.getType())) { |
412 | auto fromCharLen1RefTy = builder.getRefType(getSingletonCharType( |
413 | builder.getContext(), |
414 | getCharacterKind(fir::unwrapRefType(val.getType())))); |
415 | val = builder.create<fir::LoadOp>( |
416 | loc, builder.createConvert(loc, fromCharLen1RefTy, val)); |
417 | } |
418 | auto toCharLen1Ty = |
419 | getSingletonCharType(builder.getContext(), getCharacterKind(toTy)); |
420 | val = builder.createConvert(loc, toCharLen1Ty, val); |
421 | builder.create<fir::StoreOp>( |
422 | loc, val, |
423 | builder.createConvert(loc, builder.getRefType(toCharLen1Ty), addr)); |
424 | } |
425 | |
426 | /// Returns the minimum of integer mlir::Value \p a and \b. |
427 | mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc, |
428 | mlir::Value a, mlir::Value b) { |
429 | auto cmp = builder.create<mlir::arith::CmpIOp>( |
430 | loc, mlir::arith::CmpIPredicate::slt, a, b); |
431 | return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b); |
432 | } |
433 | |
434 | void fir::factory::CharacterExprHelper::createAssign( |
435 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
436 | auto rhsCstLen = getCompileTimeLength(rhs); |
437 | auto lhsCstLen = getCompileTimeLength(lhs); |
438 | bool compileTimeSameLength = false; |
439 | bool isLengthOneAssign = false; |
440 | |
441 | if (lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen) { |
442 | compileTimeSameLength = true; |
443 | if (*lhsCstLen == 1) |
444 | isLengthOneAssign = true; |
445 | } else if (rhs.getLen() == lhs.getLen()) { |
446 | compileTimeSameLength = true; |
447 | |
448 | // If the length values are the same for LHS and RHS, |
449 | // then we can rely on the constant length deduced from |
450 | // any of the two types. |
451 | if (lhsCstLen && *lhsCstLen == 1) |
452 | isLengthOneAssign = true; |
453 | if (rhsCstLen && *rhsCstLen == 1) |
454 | isLengthOneAssign = true; |
455 | |
456 | // We could have recognized constant operations here (e.g. |
457 | // two different arith.constant ops may produce the same value), |
458 | // but for now leave it to CSE to get rid of the duplicates. |
459 | } |
460 | if (isLengthOneAssign) { |
461 | createLengthOneAssign(lhs, rhs); |
462 | return; |
463 | } |
464 | |
465 | // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder |
466 | // if needed. |
467 | auto copyCount = lhs.getLen(); |
468 | auto idxTy = builder.getIndexType(); |
469 | if (!compileTimeSameLength) { |
470 | auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen()); |
471 | auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen()); |
472 | copyCount = genMin(builder, loc, lhsLen, rhsLen); |
473 | } |
474 | |
475 | // Actual copy |
476 | createCopy(lhs, rhs, copyCount); |
477 | |
478 | // Pad if needed. |
479 | if (!compileTimeSameLength) { |
480 | auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1); |
481 | auto maxPadding = |
482 | builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one); |
483 | createPadding(lhs, copyCount, maxPadding); |
484 | } |
485 | } |
486 | |
487 | fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate( |
488 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
489 | auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
490 | lhs.getLen()); |
491 | auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
492 | rhs.getLen()); |
493 | mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen); |
494 | auto temp = createCharacterTemp(getCharacterType(rhs), len); |
495 | createCopy(temp, lhs, lhsLen); |
496 | auto one = builder.createIntegerConstant(loc, len.getType(), 1); |
497 | auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one); |
498 | auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen); |
499 | auto fromBuff = getCharBoxBuffer(rhs); |
500 | auto toBuff = getCharBoxBuffer(temp); |
501 | fir::factory::DoLoopHelper{builder, loc}.createLoop( |
502 | lhsLenIdx, upperBound, one, |
503 | [&](fir::FirOpBuilder &bldr, mlir::Value index) { |
504 | auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx); |
505 | auto charVal = createLoadCharAt(fromBuff, rhsIndex); |
506 | createStoreCharAt(toBuff, index, charVal); |
507 | }); |
508 | return temp; |
509 | } |
510 | |
511 | mlir::Value fir::factory::CharacterExprHelper::genSubstringBase( |
512 | mlir::Value stringRawAddr, mlir::Value lowerBound, |
513 | mlir::Type substringAddrType, mlir::Value one) { |
514 | if (!one) |
515 | one = builder.createIntegerConstant(loc, lowerBound.getType(), 1); |
516 | auto offset = |
517 | builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult(); |
518 | auto addr = createElementAddr(stringRawAddr, offset); |
519 | return builder.createConvert(loc, substringAddrType, addr); |
520 | } |
521 | |
522 | fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring( |
523 | const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) { |
524 | // Constant need to be materialize in memory to use fir.coordinate_of. |
525 | auto nbounds = bounds.size(); |
526 | if (nbounds < 1 || nbounds > 2) { |
527 | mlir::emitError(loc, "Incorrect number of bounds in substring" ); |
528 | return {mlir::Value{}, mlir::Value{}}; |
529 | } |
530 | mlir::SmallVector<mlir::Value> castBounds; |
531 | // Convert bounds to length type to do safe arithmetic on it. |
532 | for (auto bound : bounds) |
533 | castBounds.push_back( |
534 | builder.createConvert(loc, builder.getCharacterLengthType(), bound)); |
535 | auto lowerBound = castBounds[0]; |
536 | // FIR CoordinateOp is zero based but Fortran substring are one based. |
537 | auto kind = getCharacterKind(box.getBuffer().getType()); |
538 | auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind); |
539 | auto resultType = builder.getRefType(charTy); |
540 | auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1); |
541 | auto substringRef = |
542 | genSubstringBase(box.getBuffer(), lowerBound, resultType, one); |
543 | |
544 | // Compute the length. |
545 | mlir::Value substringLen; |
546 | if (nbounds < 2) { |
547 | substringLen = |
548 | builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]); |
549 | } else { |
550 | substringLen = |
551 | builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]); |
552 | } |
553 | substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one); |
554 | |
555 | // Set length to zero if bounds were reversed (Fortran 2018 9.4.1) |
556 | auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0); |
557 | auto cdt = builder.create<mlir::arith::CmpIOp>( |
558 | loc, mlir::arith::CmpIPredicate::slt, substringLen, zero); |
559 | substringLen = |
560 | builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen); |
561 | |
562 | return {substringRef, substringLen}; |
563 | } |
564 | |
565 | mlir::Value |
566 | fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) { |
567 | // Note: Runtime for LEN_TRIM should also be available at some |
568 | // point. For now use an inlined implementation. |
569 | auto indexType = builder.getIndexType(); |
570 | auto len = builder.createConvert(loc, indexType, str.getLen()); |
571 | auto one = builder.createIntegerConstant(loc, indexType, 1); |
572 | auto minusOne = builder.createIntegerConstant(loc, indexType, -1); |
573 | auto zero = builder.createIntegerConstant(loc, indexType, 0); |
574 | auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1); |
575 | auto blank = createBlankConstantCode(getCharacterType(str)); |
576 | mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one); |
577 | |
578 | auto iterWhile = |
579 | builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal, |
580 | /*returnFinalCount=*/false, lastChar); |
581 | auto insPt = builder.saveInsertionPoint(); |
582 | builder.setInsertionPointToStart(iterWhile.getBody()); |
583 | auto index = iterWhile.getInductionVar(); |
584 | // Look for first non-blank from the right of the character. |
585 | auto fromBuff = getCharBoxBuffer(str); |
586 | auto elemAddr = createElementAddr(fromBuff, index); |
587 | auto codeAddr = |
588 | builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr); |
589 | auto c = builder.create<fir::LoadOp>(loc, codeAddr); |
590 | auto isBlank = builder.create<mlir::arith::CmpIOp>( |
591 | loc, mlir::arith::CmpIPredicate::eq, blank, c); |
592 | llvm::SmallVector<mlir::Value> results = {isBlank, index}; |
593 | builder.create<fir::ResultOp>(loc, results); |
594 | builder.restoreInsertionPoint(insPt); |
595 | // Compute length after iteration (zero if all blanks) |
596 | mlir::Value newLen = |
597 | builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one); |
598 | auto result = builder.create<mlir::arith::SelectOp>( |
599 | loc, iterWhile.getResult(0), zero, newLen); |
600 | return builder.createConvert(loc, builder.getCharacterLengthType(), result); |
601 | } |
602 | |
603 | fir::CharBoxValue |
604 | fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type, |
605 | int len) { |
606 | assert(len >= 0 && "expected positive length" ); |
607 | auto kind = recoverCharacterType(type).getFKind(); |
608 | auto charType = fir::CharacterType::get(builder.getContext(), kind, len); |
609 | auto addr = builder.create<fir::AllocaOp>(loc, charType); |
610 | auto mlirLen = |
611 | builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len); |
612 | return {addr, mlirLen}; |
613 | } |
614 | |
615 | // Returns integer with code for blank. The integer has the same |
616 | // size as the character. Blank has ascii space code for all kinds. |
617 | mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode( |
618 | fir::CharacterType type) { |
619 | auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind()); |
620 | auto intType = builder.getIntegerType(bits); |
621 | return builder.createIntegerConstant(loc, intType, ' '); |
622 | } |
623 | |
624 | mlir::Value fir::factory::CharacterExprHelper::createBlankConstant( |
625 | fir::CharacterType type) { |
626 | return createSingletonFromCode(createBlankConstantCode(type), |
627 | type.getFKind()); |
628 | } |
629 | |
630 | void fir::factory::CharacterExprHelper::createAssign( |
631 | const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) { |
632 | if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) { |
633 | if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) { |
634 | createAssign(*to, *str); |
635 | return; |
636 | } |
637 | } |
638 | TODO(loc, "character array assignment" ); |
639 | // Note that it is not sure the array aspect should be handled |
640 | // by this utility. |
641 | } |
642 | |
643 | mlir::Value |
644 | fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr, |
645 | mlir::Value len) { |
646 | return createEmbox(fir::CharBoxValue{addr, len}); |
647 | } |
648 | |
649 | std::pair<mlir::Value, mlir::Value> |
650 | fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) { |
651 | using T = std::pair<mlir::Value, mlir::Value>; |
652 | return toExtendedValue(boxChar).match( |
653 | [](const fir::CharBoxValue &b) -> T { |
654 | return {b.getBuffer(), b.getLen()}; |
655 | }, |
656 | [](const fir::CharArrayBoxValue &b) -> T { |
657 | return {b.getBuffer(), b.getLen()}; |
658 | }, |
659 | [](const auto &) -> T { llvm::report_fatal_error("not a character" ); }); |
660 | } |
661 | |
662 | bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) { |
663 | if (auto seqType = mlir::dyn_cast<fir::SequenceType>(type)) |
664 | return (seqType.getShape().size() == 1) && |
665 | fir::isa_char(seqType.getEleTy()); |
666 | return false; |
667 | } |
668 | |
669 | fir::KindTy |
670 | fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) { |
671 | assert(isCharacterScalar(type) && "expected scalar character" ); |
672 | return recoverCharacterType(type).getFKind(); |
673 | } |
674 | |
675 | fir::KindTy |
676 | fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) { |
677 | return recoverCharacterType(type).getFKind(); |
678 | } |
679 | |
680 | bool fir::factory::CharacterExprHelper::hasConstantLengthInType( |
681 | const fir::ExtendedValue &exv) { |
682 | auto charTy = recoverCharacterType(fir::getBase(exv).getType()); |
683 | return charTy.hasConstantLen(); |
684 | } |
685 | |
686 | mlir::Value |
687 | fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code, |
688 | int kind) { |
689 | auto charType = fir::CharacterType::get(builder.getContext(), kind, 1); |
690 | auto bits = builder.getKindMap().getCharacterBitsize(kind); |
691 | auto intType = builder.getIntegerType(bits); |
692 | auto cast = builder.createConvert(loc, intType, code); |
693 | auto undef = builder.create<fir::UndefOp>(loc, charType); |
694 | auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); |
695 | return builder.create<fir::InsertValueOp>(loc, charType, undef, cast, |
696 | builder.getArrayAttr(zero)); |
697 | } |
698 | |
699 | mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton( |
700 | mlir::Value singleton) { |
701 | auto type = getCharacterType(singleton); |
702 | assert(type.getLen() == 1); |
703 | auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind()); |
704 | auto intType = builder.getIntegerType(bits); |
705 | auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); |
706 | return builder.create<fir::ExtractValueOp>(loc, intType, singleton, |
707 | builder.getArrayAttr(zero)); |
708 | } |
709 | |
710 | mlir::Value |
711 | fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) { |
712 | auto charTy = recoverCharacterType(box.getType()); |
713 | return readLengthFromBox(box, charTy); |
714 | } |
715 | |
716 | mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox( |
717 | mlir::Value box, fir::CharacterType charTy) { |
718 | auto lenTy = builder.getCharacterLengthType(); |
719 | auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box); |
720 | auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind()); |
721 | auto width = bits / 8; |
722 | if (width > 1) { |
723 | auto widthVal = builder.createIntegerConstant(loc, lenTy, width); |
724 | return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal); |
725 | } |
726 | return size; |
727 | } |
728 | |
729 | mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) { |
730 | auto memrefType = memref.getType(); |
731 | auto charType = recoverCharacterType(memrefType); |
732 | assert(charType && "must be a character type" ); |
733 | if (charType.hasConstantLen()) |
734 | return builder.createIntegerConstant(loc, builder.getCharacterLengthType(), |
735 | charType.getLen()); |
736 | if (mlir::isa<fir::BoxType>(memrefType)) |
737 | return readLengthFromBox(memref); |
738 | if (mlir::isa<fir::BoxCharType>(memrefType)) |
739 | return createUnboxChar(memref).second; |
740 | |
741 | // Length cannot be deduced from memref. |
742 | return {}; |
743 | } |
744 | |
745 | std::pair<mlir::Value, mlir::Value> |
746 | fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder, |
747 | mlir::Location loc, |
748 | mlir::Value tuple, |
749 | bool openBoxProc) { |
750 | mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(tuple.getType()); |
751 | mlir::Value addr = builder.create<fir::ExtractValueOp>( |
752 | loc, tupleType.getType(0), tuple, |
753 | builder.getArrayAttr( |
754 | {builder.getIntegerAttr(builder.getIndexType(), 0)})); |
755 | mlir::Value proc = [&]() -> mlir::Value { |
756 | if (openBoxProc) |
757 | if (auto addrTy = mlir::dyn_cast<fir::BoxProcType>(addr.getType())) |
758 | return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr); |
759 | return addr; |
760 | }(); |
761 | mlir::Value len = builder.create<fir::ExtractValueOp>( |
762 | loc, tupleType.getType(1), tuple, |
763 | builder.getArrayAttr( |
764 | {builder.getIntegerAttr(builder.getIndexType(), 1)})); |
765 | return {proc, len}; |
766 | } |
767 | |
768 | mlir::Value fir::factory::createCharacterProcedureTuple( |
769 | fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy, |
770 | mlir::Value addr, mlir::Value len) { |
771 | mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(argTy); |
772 | addr = builder.createConvert(loc, tupleType.getType(0), addr); |
773 | if (len) |
774 | len = builder.createConvert(loc, tupleType.getType(1), len); |
775 | else |
776 | len = builder.create<fir::UndefOp>(loc, tupleType.getType(1)); |
777 | mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType); |
778 | tuple = builder.create<fir::InsertValueOp>( |
779 | loc, tupleType, tuple, addr, |
780 | builder.getArrayAttr( |
781 | {builder.getIntegerAttr(builder.getIndexType(), 0)})); |
782 | tuple = builder.create<fir::InsertValueOp>( |
783 | loc, tupleType, tuple, len, |
784 | builder.getArrayAttr( |
785 | {builder.getIntegerAttr(builder.getIndexType(), 1)})); |
786 | return tuple; |
787 | } |
788 | |
789 | mlir::Type |
790 | fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) { |
791 | mlir::MLIRContext *context = funcPointerType.getContext(); |
792 | mlir::Type lenType = mlir::IntegerType::get(context, 64); |
793 | return mlir::TupleType::get(context, {funcPointerType, lenType}); |
794 | } |
795 | |
796 | fir::CharBoxValue fir::factory::CharacterExprHelper::createCharExtremum( |
797 | bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs) { |
798 | // inputs: we are given a vector of all of the charboxes of the arguments |
799 | // passed to hlfir.char_extremum, as well as the predicate for whether we |
800 | // want llt or lgt |
801 | // |
802 | // note: we know that, regardless of whether we're looking at smallest or |
803 | // largest char, the size of the output buffer will be the same size as the |
804 | // largest character out of all of the operands. so, we find the biggest |
805 | // length first. It's okay if these char lengths are not known at compile |
806 | // time. |
807 | |
808 | fir::CharBoxValue firstCBV = opCBVs[0]; |
809 | mlir::Value firstBuf = getCharBoxBuffer(firstCBV); |
810 | auto firstLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
811 | firstCBV.getLen()); |
812 | |
813 | mlir::Value resultBuf = firstBuf; |
814 | mlir::Value resultLen = firstLen; |
815 | mlir::Value biggestLen = firstLen; |
816 | |
817 | // values for casting buf type and len type |
818 | auto typeLen = fir::CharacterType::unknownLen(); |
819 | auto kind = recoverCharacterType(firstBuf.getType()).getFKind(); |
820 | auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen); |
821 | auto type = fir::ReferenceType::get(charTy); |
822 | |
823 | size_t numOperands = opCBVs.size(); |
824 | for (size_t cbv_idx = 1; cbv_idx < numOperands; ++cbv_idx) { |
825 | auto currChar = opCBVs[cbv_idx]; |
826 | auto currBuf = getCharBoxBuffer(currChar); |
827 | auto currLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
828 | currChar.getLen()); |
829 | // biggest len result |
830 | mlir::Value lhsBigger = builder.create<mlir::arith::CmpIOp>( |
831 | loc, mlir::arith::CmpIPredicate::uge, biggestLen, currLen); |
832 | biggestLen = builder.create<mlir::arith::SelectOp>(loc, lhsBigger, |
833 | biggestLen, currLen); |
834 | |
835 | auto cmp = predIsMin ? mlir::arith::CmpIPredicate::slt |
836 | : mlir::arith::CmpIPredicate::sgt; |
837 | |
838 | // lexical compare result |
839 | mlir::Value resultCmp = fir::runtime::genCharCompare( |
840 | builder, loc, cmp, currBuf, currLen, resultBuf, resultLen); |
841 | |
842 | // it's casting (to unknown size) time! |
843 | resultBuf = builder.createConvert(loc, type, resultBuf); |
844 | currBuf = builder.createConvert(loc, type, currBuf); |
845 | |
846 | resultBuf = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currBuf, |
847 | resultBuf); |
848 | resultLen = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currLen, |
849 | resultLen); |
850 | } |
851 | |
852 | // now that we know the lexicographically biggest/smallest char and which char |
853 | // had the biggest len, we can populate a temp CBV and return it |
854 | fir::CharBoxValue temp = createCharacterTemp(resultBuf.getType(), biggestLen); |
855 | auto toBuf = temp; |
856 | fir::CharBoxValue fromBuf{resultBuf, resultLen}; |
857 | createAssign(toBuf, fromBuf); |
858 | return temp; |
859 | } |
860 | |
861 | fir::CharBoxValue |
862 | fir::factory::convertCharacterKind(fir::FirOpBuilder &builder, |
863 | mlir::Location loc, |
864 | fir::CharBoxValue srcBoxChar, int toKind) { |
865 | // Use char_convert. Each code point is translated from a |
866 | // narrower/wider encoding to the target encoding. For example, 'A' |
867 | // may be translated from 0x41 : i8 to 0x0041 : i16. The symbol |
868 | // for euro (0x20AC : i16) may be translated from a wide character |
869 | // to "0xE2 0x82 0xAC" : UTF-8. |
870 | mlir::Value bufferSize = srcBoxChar.getLen(); |
871 | auto kindMap = builder.getKindMap(); |
872 | mlir::Value boxCharAddr = srcBoxChar.getAddr(); |
873 | auto fromTy = boxCharAddr.getType(); |
874 | if (auto charTy = mlir::dyn_cast<fir::CharacterType>(fromTy)) { |
875 | // boxchar is a value, not a variable. Turn it into a temporary. |
876 | // As a value, it ought to have a constant LEN value. |
877 | assert(charTy.hasConstantLen() && "must have constant length" ); |
878 | mlir::Value tmp = builder.createTemporary(loc, charTy); |
879 | builder.create<fir::StoreOp>(loc, boxCharAddr, tmp); |
880 | boxCharAddr = tmp; |
881 | } |
882 | auto fromBits = kindMap.getCharacterBitsize( |
883 | mlir::cast<fir::CharacterType>(fir::unwrapRefType(fromTy)).getFKind()); |
884 | auto toBits = kindMap.getCharacterBitsize(toKind); |
885 | if (toBits < fromBits) { |
886 | // Scale by relative ratio to give a buffer of the same length. |
887 | auto ratio = builder.createIntegerConstant(loc, bufferSize.getType(), |
888 | fromBits / toBits); |
889 | bufferSize = builder.create<mlir::arith::MulIOp>(loc, bufferSize, ratio); |
890 | } |
891 | mlir::Type toType = |
892 | fir::CharacterType::getUnknownLen(builder.getContext(), toKind); |
893 | auto dest = builder.createTemporary(loc, toType, /*name=*/{}, /*shape=*/{}, |
894 | mlir::ValueRange{bufferSize}); |
895 | builder.create<fir::CharConvertOp>(loc, boxCharAddr, srcBoxChar.getLen(), |
896 | dest); |
897 | return fir::CharBoxValue{dest, srcBoxChar.getLen()}; |
898 | } |
899 | |