1 | //===-- Character.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "flang/Optimizer/Builder/Character.h" |
14 | #include "flang/Optimizer/Builder/DoLoopHelper.h" |
15 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
16 | #include "flang/Optimizer/Builder/Todo.h" |
17 | #include "flang/Optimizer/Dialect/FIROpsSupport.h" |
18 | #include "llvm/Support/Debug.h" |
19 | #include <optional> |
20 | |
21 | #define DEBUG_TYPE "flang-lower-character" |
22 | |
23 | //===----------------------------------------------------------------------===// |
24 | // CharacterExprHelper implementation |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | /// Unwrap all the ref and box types and return the inner element type. |
28 | static mlir::Type unwrapBoxAndRef(mlir::Type type) { |
29 | if (auto boxType = type.dyn_cast<fir::BoxCharType>()) |
30 | return boxType.getEleTy(); |
31 | while (true) { |
32 | type = fir::unwrapRefType(type); |
33 | if (auto boxTy = type.dyn_cast<fir::BoxType>()) |
34 | type = boxTy.getEleTy(); |
35 | else |
36 | break; |
37 | } |
38 | return type; |
39 | } |
40 | |
41 | /// Unwrap base fir.char<kind,len> type. |
42 | static fir::CharacterType recoverCharacterType(mlir::Type type) { |
43 | type = fir::unwrapSequenceType(unwrapBoxAndRef(type)); |
44 | if (auto charTy = type.dyn_cast<fir::CharacterType>()) |
45 | return charTy; |
46 | llvm::report_fatal_error("expected a character type" ); |
47 | } |
48 | |
49 | bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) { |
50 | type = unwrapBoxAndRef(type); |
51 | return !type.isa<fir::SequenceType>() && fir::isa_char(type); |
52 | } |
53 | |
54 | bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) { |
55 | type = unwrapBoxAndRef(type); |
56 | if (auto seqTy = type.dyn_cast<fir::SequenceType>()) |
57 | return fir::isa_char(seqTy.getEleTy()); |
58 | return false; |
59 | } |
60 | |
61 | fir::CharacterType |
62 | fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) { |
63 | assert(isCharacterScalar(type) && "expected scalar character" ); |
64 | return recoverCharacterType(type); |
65 | } |
66 | |
67 | fir::CharacterType |
68 | fir::factory::CharacterExprHelper::getCharType(mlir::Type type) { |
69 | return recoverCharacterType(type); |
70 | } |
71 | |
72 | fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType( |
73 | const fir::CharBoxValue &box) { |
74 | return getCharacterType(box.getBuffer().getType()); |
75 | } |
76 | |
77 | fir::CharacterType |
78 | fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) { |
79 | return getCharacterType(str.getType()); |
80 | } |
81 | |
82 | /// Determine the static size of the character. Returns the computed size, not |
83 | /// an IR Value. |
84 | static std::optional<fir::CharacterType::LenType> |
85 | getCompileTimeLength(const fir::CharBoxValue &box) { |
86 | auto len = recoverCharacterType(box.getBuffer().getType()).getLen(); |
87 | if (len == fir::CharacterType::unknownLen()) |
88 | return {}; |
89 | return len; |
90 | } |
91 | |
92 | /// Detect the precondition that the value `str` does not reside in memory. Such |
93 | /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`. |
94 | LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) { |
95 | return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType()); |
96 | } |
97 | |
98 | /// This is called only if `str` does not reside in memory. Such a bare string |
99 | /// value will be converted into a memory-based temporary and an extended |
100 | /// boxchar value returned. |
101 | fir::CharBoxValue |
102 | fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) { |
103 | assert(needToMaterialize(str)); |
104 | auto ty = str.getType(); |
105 | assert(isCharacterScalar(ty) && "expected scalar character" ); |
106 | auto charTy = ty.dyn_cast<fir::CharacterType>(); |
107 | if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) { |
108 | LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n'); |
109 | llvm_unreachable("must be a !fir.char<N> type" ); |
110 | } |
111 | auto len = builder.createIntegerConstant( |
112 | loc, builder.getCharacterLengthType(), charTy.getLen()); |
113 | auto temp = builder.create<fir::AllocaOp>(loc, charTy); |
114 | builder.create<fir::StoreOp>(loc, str, temp); |
115 | LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp |
116 | << ", " << len << ")\n" ); |
117 | return {temp, len}; |
118 | } |
119 | |
120 | fir::ExtendedValue |
121 | fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character, |
122 | mlir::Value len) { |
123 | auto lenType = builder.getCharacterLengthType(); |
124 | auto type = character.getType(); |
125 | auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{}; |
126 | auto resultLen = len; |
127 | llvm::SmallVector<mlir::Value> extents; |
128 | |
129 | if (auto eleType = fir::dyn_cast_ptrEleTy(type)) |
130 | type = eleType; |
131 | |
132 | if (auto arrayType = type.dyn_cast<fir::SequenceType>()) { |
133 | type = arrayType.getEleTy(); |
134 | auto indexType = builder.getIndexType(); |
135 | for (auto extent : arrayType.getShape()) { |
136 | if (extent == fir::SequenceType::getUnknownExtent()) |
137 | break; |
138 | extents.emplace_back( |
139 | builder.createIntegerConstant(loc, indexType, extent)); |
140 | } |
141 | // Last extent might be missing in case of assumed-size. If more extents |
142 | // could not be deduced from type, that's an error (a fir.box should |
143 | // have been used in the interface). |
144 | if (extents.size() + 1 < arrayType.getShape().size()) |
145 | mlir::emitError(loc, "cannot retrieve array extents from type" ); |
146 | } |
147 | |
148 | if (auto charTy = type.dyn_cast<fir::CharacterType>()) { |
149 | if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen()) |
150 | resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen()); |
151 | } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) { |
152 | auto refType = builder.getRefType(boxCharType.getEleTy()); |
153 | // If the embox is accessible, use its operand to avoid filling |
154 | // the generated fir with embox/unbox. |
155 | mlir::Value boxCharLen; |
156 | if (auto definingOp = character.getDefiningOp()) { |
157 | if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) { |
158 | base = box.getMemref(); |
159 | boxCharLen = box.getLen(); |
160 | } |
161 | } |
162 | if (!boxCharLen) { |
163 | auto unboxed = |
164 | builder.create<fir::UnboxCharOp>(loc, refType, lenType, character); |
165 | base = builder.createConvert(loc, refType, unboxed.getResult(0)); |
166 | boxCharLen = unboxed.getResult(1); |
167 | } |
168 | if (!resultLen) { |
169 | resultLen = boxCharLen; |
170 | } |
171 | } else if (type.isa<fir::BoxType>()) { |
172 | mlir::emitError(loc, "descriptor or derived type not yet handled" ); |
173 | } else { |
174 | llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue" ); |
175 | } |
176 | |
177 | if (!base) { |
178 | if (auto load = |
179 | mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) { |
180 | base = load.getOperand(); |
181 | } else { |
182 | return materializeValue(fir::getBase(character)); |
183 | } |
184 | } |
185 | if (!resultLen) |
186 | llvm::report_fatal_error("no dynamic length found for character" ); |
187 | if (!extents.empty()) |
188 | return fir::CharArrayBoxValue{base, resultLen, extents}; |
189 | return fir::CharBoxValue{base, resultLen}; |
190 | } |
191 | |
192 | static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) { |
193 | return fir::CharacterType::getSingleton(ctxt, kind); |
194 | } |
195 | |
196 | mlir::Value |
197 | fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) { |
198 | // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar |
199 | // type) |
200 | auto charTy = recoverCharacterType(box.getBuffer().getType()); |
201 | auto boxCharType = |
202 | fir::BoxCharType::get(builder.getContext(), charTy.getFKind()); |
203 | auto refType = fir::ReferenceType::get(boxCharType.getEleTy()); |
204 | mlir::Value buff = box.getBuffer(); |
205 | // fir.boxchar requires a memory reference. Allocate temp if the character is |
206 | // not in memory. |
207 | if (!fir::isa_ref_type(buff.getType())) { |
208 | auto temp = builder.createTemporary(loc, buff.getType()); |
209 | builder.create<fir::StoreOp>(loc, buff, temp); |
210 | buff = temp; |
211 | } |
212 | // fir.emboxchar only accepts scalar, cast array buffer to a scalar buffer. |
213 | if (mlir::isa<fir::SequenceType>(fir::dyn_cast_ptrEleTy(buff.getType()))) |
214 | buff = builder.createConvert(loc, refType, buff); |
215 | // Convert in case the provided length is not of the integer type that must |
216 | // be used in boxchar. |
217 | auto len = builder.createConvert(loc, builder.getCharacterLengthType(), |
218 | box.getLen()); |
219 | return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len); |
220 | } |
221 | |
222 | fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter( |
223 | const fir::CharArrayBoxValue &box) { |
224 | if (box.getBuffer().getType().isa<fir::PointerType>()) |
225 | TODO(loc, "concatenating non contiguous character array into a scalar" ); |
226 | |
227 | // TODO: add a fast path multiplying new length at compile time if the info is |
228 | // in the array type. |
229 | auto lenType = builder.getCharacterLengthType(); |
230 | auto len = builder.createConvert(loc, lenType, box.getLen()); |
231 | for (auto extent : box.getExtents()) |
232 | len = builder.create<mlir::arith::MulIOp>( |
233 | loc, len, builder.createConvert(loc, lenType, extent)); |
234 | |
235 | // TODO: typeLen can be improved in compiled constant cases |
236 | // TODO: allow bare fir.array<> (no ref) conversion here ? |
237 | auto typeLen = fir::CharacterType::unknownLen(); |
238 | auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind(); |
239 | auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen); |
240 | auto type = fir::ReferenceType::get(charTy); |
241 | auto buffer = builder.createConvert(loc, type, box.getBuffer()); |
242 | return {buffer, len}; |
243 | } |
244 | |
245 | mlir::Value fir::factory::CharacterExprHelper::createEmbox( |
246 | const fir::CharArrayBoxValue &box) { |
247 | // Use same embox as for scalar. It's losing the actual data size information |
248 | // (We do not multiply the length by the array size), but that is what Fortran |
249 | // call interfaces using boxchar expect. |
250 | return createEmbox(static_cast<const fir::CharBoxValue &>(box)); |
251 | } |
252 | |
253 | /// Get the address of the element at position \p index of the scalar character |
254 | /// \p buffer. |
255 | /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be |
256 | /// unknown. \p index must have any integer type, and is zero based. The return |
257 | /// value is a singleton address (!fir.ref<!fir.char<kind>>) |
258 | mlir::Value |
259 | fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer, |
260 | mlir::Value index) { |
261 | // The only way to address an element of a fir.ref<char<kind, len>> is to cast |
262 | // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of. |
263 | auto bufferType = buffer.getType(); |
264 | assert(fir::isa_ref_type(bufferType)); |
265 | assert(isCharacterScalar(bufferType)); |
266 | auto charTy = recoverCharacterType(bufferType); |
267 | auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind()); |
268 | auto singleRefTy = builder.getRefType(singleTy); |
269 | auto extent = fir::SequenceType::getUnknownExtent(); |
270 | if (charTy.getLen() != fir::CharacterType::unknownLen()) |
271 | extent = charTy.getLen(); |
272 | auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy)); |
273 | |
274 | auto coor = builder.createConvert(loc, coorTy, buffer); |
275 | auto i = builder.createConvert(loc, builder.getIndexType(), index); |
276 | return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i); |
277 | } |
278 | |
279 | /// Load a character out of `buff` from offset `index`. |
280 | /// `buff` must be a reference to memory. |
281 | mlir::Value |
282 | fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff, |
283 | mlir::Value index) { |
284 | LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: " |
285 | << buff.getType() << " at: " << index << '\n'); |
286 | return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index)); |
287 | } |
288 | |
289 | /// Store the singleton character `c` to `str` at offset `index`. |
290 | /// `str` must be a reference to memory. |
291 | void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str, |
292 | mlir::Value index, |
293 | mlir::Value c) { |
294 | LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str |
295 | << " type: " << str.getType() << " at: " << index |
296 | << '\n'); |
297 | auto addr = createElementAddr(str, index); |
298 | builder.create<fir::StoreOp>(loc, c, addr); |
299 | } |
300 | |
301 | // FIXME: this temp is useless... either fir.coordinate_of needs to |
302 | // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or |
303 | // character should never be loaded. |
304 | // If this is a fir.array<>, allocate and store the value so that |
305 | // fir.cooridnate_of can be use on the value. |
306 | mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer( |
307 | const fir::CharBoxValue &box) { |
308 | auto buff = box.getBuffer(); |
309 | if (fir::isa_char(buff.getType())) { |
310 | auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType()); |
311 | builder.create<fir::StoreOp>(loc, buff, newBuff); |
312 | return newBuff; |
313 | } |
314 | return buff; |
315 | } |
316 | |
317 | /// Create a loop to copy `count` characters from `src` to `dest`. Note that the |
318 | /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.) |
319 | void fir::factory::CharacterExprHelper::createCopy( |
320 | const fir::CharBoxValue &dest, const fir::CharBoxValue &src, |
321 | mlir::Value count) { |
322 | auto fromBuff = getCharBoxBuffer(src); |
323 | auto toBuff = getCharBoxBuffer(dest); |
324 | LLVM_DEBUG(llvm::dbgs() << "create char copy from: " ; src.dump(); |
325 | llvm::dbgs() << " to: " ; dest.dump(); |
326 | llvm::dbgs() << " count: " << count << '\n'); |
327 | auto kind = getCharacterKind(src.getBuffer().getType()); |
328 | // If the src and dest are the same KIND, then use memmove to move the bits. |
329 | // We don't have to worry about overlapping ranges with memmove. |
330 | if (getCharacterKind(dest.getBuffer().getType()) == kind) { |
331 | auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8; |
332 | auto i64Ty = builder.getI64Type(); |
333 | auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes); |
334 | auto castCount = builder.createConvert(loc, i64Ty, count); |
335 | auto totalBytes = |
336 | builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount); |
337 | auto notVolatile = builder.createBool(loc, false); |
338 | auto memmv = getLlvmMemmove(builder); |
339 | auto argTys = memmv.getFunctionType().getInputs(); |
340 | auto toPtr = builder.createConvert(loc, argTys[0], toBuff); |
341 | auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff); |
342 | builder.create<fir::CallOp>( |
343 | loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile}); |
344 | return; |
345 | } |
346 | |
347 | // Convert a CHARACTER of one KIND into a CHARACTER of another KIND. |
348 | builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count, |
349 | dest.getBuffer()); |
350 | } |
351 | |
352 | void fir::factory::CharacterExprHelper::createPadding( |
353 | const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) { |
354 | auto blank = createBlankConstant(getCharacterType(str)); |
355 | // Always create the loop, if upper < lower, no iteration will be |
356 | // executed. |
357 | auto toBuff = getCharBoxBuffer(str); |
358 | fir::factory::DoLoopHelper{builder, loc}.createLoop( |
359 | lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) { |
360 | createStoreCharAt(toBuff, index, blank); |
361 | }); |
362 | } |
363 | |
364 | fir::CharBoxValue |
365 | fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type, |
366 | mlir::Value len) { |
367 | auto kind = recoverCharacterType(type).getFKind(); |
368 | auto typeLen = fir::CharacterType::unknownLen(); |
369 | // If len is a constant, reflect the length in the type. |
370 | if (auto cstLen = getIntIfConstant(len)) |
371 | typeLen = *cstLen; |
372 | auto *ctxt = builder.getContext(); |
373 | auto charTy = fir::CharacterType::get(ctxt, kind, typeLen); |
374 | llvm::SmallVector<mlir::Value> lenParams; |
375 | if (typeLen == fir::CharacterType::unknownLen()) |
376 | lenParams.push_back(len); |
377 | auto ref = builder.allocateLocal(loc, charTy, "" , ".chrtmp" , |
378 | /*shape=*/std::nullopt, lenParams); |
379 | return {ref, len}; |
380 | } |
381 | |
382 | fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom( |
383 | const fir::ExtendedValue &source) { |
384 | const auto *charBox = source.getCharBox(); |
385 | if (!charBox) |
386 | fir::emitFatalError(loc, "source must be a fir::CharBoxValue" ); |
387 | auto len = charBox->getLen(); |
388 | auto sourceTy = charBox->getBuffer().getType(); |
389 | auto temp = createCharacterTemp(sourceTy, len); |
390 | if (fir::isa_ref_type(sourceTy)) { |
391 | createCopy(temp, *charBox, len); |
392 | } else { |
393 | auto ref = builder.createConvert(loc, builder.getRefType(sourceTy), |
394 | temp.getBuffer()); |
395 | builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref); |
396 | } |
397 | return temp; |
398 | } |
399 | |
400 | // Simple length one character assignment without loops. |
401 | void fir::factory::CharacterExprHelper::createLengthOneAssign( |
402 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
403 | auto addr = lhs.getBuffer(); |
404 | auto toTy = fir::unwrapRefType(addr.getType()); |
405 | mlir::Value val = rhs.getBuffer(); |
406 | if (fir::isa_ref_type(val.getType())) { |
407 | auto fromCharLen1RefTy = builder.getRefType(getSingletonCharType( |
408 | builder.getContext(), |
409 | getCharacterKind(fir::unwrapRefType(val.getType())))); |
410 | val = builder.create<fir::LoadOp>( |
411 | loc, builder.createConvert(loc, fromCharLen1RefTy, val)); |
412 | } |
413 | auto toCharLen1Ty = |
414 | getSingletonCharType(builder.getContext(), getCharacterKind(toTy)); |
415 | val = builder.createConvert(loc, toCharLen1Ty, val); |
416 | builder.create<fir::StoreOp>( |
417 | loc, val, |
418 | builder.createConvert(loc, builder.getRefType(toCharLen1Ty), addr)); |
419 | } |
420 | |
421 | /// Returns the minimum of integer mlir::Value \p a and \b. |
422 | mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc, |
423 | mlir::Value a, mlir::Value b) { |
424 | auto cmp = builder.create<mlir::arith::CmpIOp>( |
425 | loc, mlir::arith::CmpIPredicate::slt, a, b); |
426 | return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b); |
427 | } |
428 | |
429 | void fir::factory::CharacterExprHelper::createAssign( |
430 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
431 | auto rhsCstLen = getCompileTimeLength(rhs); |
432 | auto lhsCstLen = getCompileTimeLength(lhs); |
433 | bool compileTimeSameLength = false; |
434 | bool isLengthOneAssign = false; |
435 | |
436 | if (lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen) { |
437 | compileTimeSameLength = true; |
438 | if (*lhsCstLen == 1) |
439 | isLengthOneAssign = true; |
440 | } else if (rhs.getLen() == lhs.getLen()) { |
441 | compileTimeSameLength = true; |
442 | |
443 | // If the length values are the same for LHS and RHS, |
444 | // then we can rely on the constant length deduced from |
445 | // any of the two types. |
446 | if (lhsCstLen && *lhsCstLen == 1) |
447 | isLengthOneAssign = true; |
448 | if (rhsCstLen && *rhsCstLen == 1) |
449 | isLengthOneAssign = true; |
450 | |
451 | // We could have recognized constant operations here (e.g. |
452 | // two different arith.constant ops may produce the same value), |
453 | // but for now leave it to CSE to get rid of the duplicates. |
454 | } |
455 | if (isLengthOneAssign) { |
456 | createLengthOneAssign(lhs, rhs); |
457 | return; |
458 | } |
459 | |
460 | // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder |
461 | // if needed. |
462 | auto copyCount = lhs.getLen(); |
463 | auto idxTy = builder.getIndexType(); |
464 | if (!compileTimeSameLength) { |
465 | auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen()); |
466 | auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen()); |
467 | copyCount = genMin(builder, loc, lhsLen, rhsLen); |
468 | } |
469 | |
470 | // Actual copy |
471 | createCopy(lhs, rhs, copyCount); |
472 | |
473 | // Pad if needed. |
474 | if (!compileTimeSameLength) { |
475 | auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1); |
476 | auto maxPadding = |
477 | builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one); |
478 | createPadding(lhs, copyCount, maxPadding); |
479 | } |
480 | } |
481 | |
482 | fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate( |
483 | const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) { |
484 | auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
485 | lhs.getLen()); |
486 | auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
487 | rhs.getLen()); |
488 | mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen); |
489 | auto temp = createCharacterTemp(getCharacterType(rhs), len); |
490 | createCopy(temp, lhs, lhsLen); |
491 | auto one = builder.createIntegerConstant(loc, len.getType(), 1); |
492 | auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one); |
493 | auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen); |
494 | auto fromBuff = getCharBoxBuffer(rhs); |
495 | auto toBuff = getCharBoxBuffer(temp); |
496 | fir::factory::DoLoopHelper{builder, loc}.createLoop( |
497 | lhsLenIdx, upperBound, one, |
498 | [&](fir::FirOpBuilder &bldr, mlir::Value index) { |
499 | auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx); |
500 | auto charVal = createLoadCharAt(fromBuff, rhsIndex); |
501 | createStoreCharAt(toBuff, index, charVal); |
502 | }); |
503 | return temp; |
504 | } |
505 | |
506 | mlir::Value fir::factory::CharacterExprHelper::genSubstringBase( |
507 | mlir::Value stringRawAddr, mlir::Value lowerBound, |
508 | mlir::Type substringAddrType, mlir::Value one) { |
509 | if (!one) |
510 | one = builder.createIntegerConstant(loc, lowerBound.getType(), 1); |
511 | auto offset = |
512 | builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult(); |
513 | auto addr = createElementAddr(stringRawAddr, offset); |
514 | return builder.createConvert(loc, substringAddrType, addr); |
515 | } |
516 | |
517 | fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring( |
518 | const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) { |
519 | // Constant need to be materialize in memory to use fir.coordinate_of. |
520 | auto nbounds = bounds.size(); |
521 | if (nbounds < 1 || nbounds > 2) { |
522 | mlir::emitError(loc, "Incorrect number of bounds in substring" ); |
523 | return {mlir::Value{}, mlir::Value{}}; |
524 | } |
525 | mlir::SmallVector<mlir::Value> castBounds; |
526 | // Convert bounds to length type to do safe arithmetic on it. |
527 | for (auto bound : bounds) |
528 | castBounds.push_back( |
529 | builder.createConvert(loc, builder.getCharacterLengthType(), bound)); |
530 | auto lowerBound = castBounds[0]; |
531 | // FIR CoordinateOp is zero based but Fortran substring are one based. |
532 | auto kind = getCharacterKind(box.getBuffer().getType()); |
533 | auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind); |
534 | auto resultType = builder.getRefType(charTy); |
535 | auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1); |
536 | auto substringRef = |
537 | genSubstringBase(box.getBuffer(), lowerBound, resultType, one); |
538 | |
539 | // Compute the length. |
540 | mlir::Value substringLen; |
541 | if (nbounds < 2) { |
542 | substringLen = |
543 | builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]); |
544 | } else { |
545 | substringLen = |
546 | builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]); |
547 | } |
548 | substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one); |
549 | |
550 | // Set length to zero if bounds were reversed (Fortran 2018 9.4.1) |
551 | auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0); |
552 | auto cdt = builder.create<mlir::arith::CmpIOp>( |
553 | loc, mlir::arith::CmpIPredicate::slt, substringLen, zero); |
554 | substringLen = |
555 | builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen); |
556 | |
557 | return {substringRef, substringLen}; |
558 | } |
559 | |
560 | mlir::Value |
561 | fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) { |
562 | // Note: Runtime for LEN_TRIM should also be available at some |
563 | // point. For now use an inlined implementation. |
564 | auto indexType = builder.getIndexType(); |
565 | auto len = builder.createConvert(loc, indexType, str.getLen()); |
566 | auto one = builder.createIntegerConstant(loc, indexType, 1); |
567 | auto minusOne = builder.createIntegerConstant(loc, indexType, -1); |
568 | auto zero = builder.createIntegerConstant(loc, indexType, 0); |
569 | auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1); |
570 | auto blank = createBlankConstantCode(getCharacterType(str)); |
571 | mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one); |
572 | |
573 | auto iterWhile = |
574 | builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal, |
575 | /*returnFinalCount=*/false, lastChar); |
576 | auto insPt = builder.saveInsertionPoint(); |
577 | builder.setInsertionPointToStart(iterWhile.getBody()); |
578 | auto index = iterWhile.getInductionVar(); |
579 | // Look for first non-blank from the right of the character. |
580 | auto fromBuff = getCharBoxBuffer(str); |
581 | auto elemAddr = createElementAddr(fromBuff, index); |
582 | auto codeAddr = |
583 | builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr); |
584 | auto c = builder.create<fir::LoadOp>(loc, codeAddr); |
585 | auto isBlank = builder.create<mlir::arith::CmpIOp>( |
586 | loc, mlir::arith::CmpIPredicate::eq, blank, c); |
587 | llvm::SmallVector<mlir::Value> results = {isBlank, index}; |
588 | builder.create<fir::ResultOp>(loc, results); |
589 | builder.restoreInsertionPoint(insPt); |
590 | // Compute length after iteration (zero if all blanks) |
591 | mlir::Value newLen = |
592 | builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one); |
593 | auto result = builder.create<mlir::arith::SelectOp>( |
594 | loc, iterWhile.getResult(0), zero, newLen); |
595 | return builder.createConvert(loc, builder.getCharacterLengthType(), result); |
596 | } |
597 | |
598 | fir::CharBoxValue |
599 | fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type, |
600 | int len) { |
601 | assert(len >= 0 && "expected positive length" ); |
602 | auto kind = recoverCharacterType(type).getFKind(); |
603 | auto charType = fir::CharacterType::get(builder.getContext(), kind, len); |
604 | auto addr = builder.create<fir::AllocaOp>(loc, charType); |
605 | auto mlirLen = |
606 | builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len); |
607 | return {addr, mlirLen}; |
608 | } |
609 | |
610 | // Returns integer with code for blank. The integer has the same |
611 | // size as the character. Blank has ascii space code for all kinds. |
612 | mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode( |
613 | fir::CharacterType type) { |
614 | auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind()); |
615 | auto intType = builder.getIntegerType(bits); |
616 | return builder.createIntegerConstant(loc, intType, ' '); |
617 | } |
618 | |
619 | mlir::Value fir::factory::CharacterExprHelper::createBlankConstant( |
620 | fir::CharacterType type) { |
621 | return createSingletonFromCode(createBlankConstantCode(type), |
622 | type.getFKind()); |
623 | } |
624 | |
625 | void fir::factory::CharacterExprHelper::createAssign( |
626 | const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) { |
627 | if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) { |
628 | if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) { |
629 | createAssign(*to, *str); |
630 | return; |
631 | } |
632 | } |
633 | TODO(loc, "character array assignment" ); |
634 | // Note that it is not sure the array aspect should be handled |
635 | // by this utility. |
636 | } |
637 | |
638 | mlir::Value |
639 | fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr, |
640 | mlir::Value len) { |
641 | return createEmbox(fir::CharBoxValue{addr, len}); |
642 | } |
643 | |
644 | std::pair<mlir::Value, mlir::Value> |
645 | fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) { |
646 | using T = std::pair<mlir::Value, mlir::Value>; |
647 | return toExtendedValue(boxChar).match( |
648 | [](const fir::CharBoxValue &b) -> T { |
649 | return {b.getBuffer(), b.getLen()}; |
650 | }, |
651 | [](const fir::CharArrayBoxValue &b) -> T { |
652 | return {b.getBuffer(), b.getLen()}; |
653 | }, |
654 | [](const auto &) -> T { llvm::report_fatal_error("not a character" ); }); |
655 | } |
656 | |
657 | bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) { |
658 | if (auto seqType = type.dyn_cast<fir::SequenceType>()) |
659 | return (seqType.getShape().size() == 1) && |
660 | fir::isa_char(seqType.getEleTy()); |
661 | return false; |
662 | } |
663 | |
664 | fir::KindTy |
665 | fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) { |
666 | assert(isCharacterScalar(type) && "expected scalar character" ); |
667 | return recoverCharacterType(type).getFKind(); |
668 | } |
669 | |
670 | fir::KindTy |
671 | fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) { |
672 | return recoverCharacterType(type).getFKind(); |
673 | } |
674 | |
675 | bool fir::factory::CharacterExprHelper::hasConstantLengthInType( |
676 | const fir::ExtendedValue &exv) { |
677 | auto charTy = recoverCharacterType(fir::getBase(exv).getType()); |
678 | return charTy.hasConstantLen(); |
679 | } |
680 | |
681 | mlir::Value |
682 | fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code, |
683 | int kind) { |
684 | auto charType = fir::CharacterType::get(builder.getContext(), kind, 1); |
685 | auto bits = builder.getKindMap().getCharacterBitsize(kind); |
686 | auto intType = builder.getIntegerType(bits); |
687 | auto cast = builder.createConvert(loc, intType, code); |
688 | auto undef = builder.create<fir::UndefOp>(loc, charType); |
689 | auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); |
690 | return builder.create<fir::InsertValueOp>(loc, charType, undef, cast, |
691 | builder.getArrayAttr(zero)); |
692 | } |
693 | |
694 | mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton( |
695 | mlir::Value singleton) { |
696 | auto type = getCharacterType(singleton); |
697 | assert(type.getLen() == 1); |
698 | auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind()); |
699 | auto intType = builder.getIntegerType(bits); |
700 | auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); |
701 | return builder.create<fir::ExtractValueOp>(loc, intType, singleton, |
702 | builder.getArrayAttr(zero)); |
703 | } |
704 | |
705 | mlir::Value |
706 | fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) { |
707 | auto charTy = recoverCharacterType(box.getType()); |
708 | return readLengthFromBox(box, charTy); |
709 | } |
710 | |
711 | mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox( |
712 | mlir::Value box, fir::CharacterType charTy) { |
713 | auto lenTy = builder.getCharacterLengthType(); |
714 | auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box); |
715 | auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind()); |
716 | auto width = bits / 8; |
717 | if (width > 1) { |
718 | auto widthVal = builder.createIntegerConstant(loc, lenTy, width); |
719 | return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal); |
720 | } |
721 | return size; |
722 | } |
723 | |
724 | mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) { |
725 | auto memrefType = memref.getType(); |
726 | auto charType = recoverCharacterType(memrefType); |
727 | assert(charType && "must be a character type" ); |
728 | if (charType.hasConstantLen()) |
729 | return builder.createIntegerConstant(loc, builder.getCharacterLengthType(), |
730 | charType.getLen()); |
731 | if (memrefType.isa<fir::BoxType>()) |
732 | return readLengthFromBox(memref); |
733 | if (memrefType.isa<fir::BoxCharType>()) |
734 | return createUnboxChar(memref).second; |
735 | |
736 | // Length cannot be deduced from memref. |
737 | return {}; |
738 | } |
739 | |
740 | std::pair<mlir::Value, mlir::Value> |
741 | fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder, |
742 | mlir::Location loc, |
743 | mlir::Value tuple, |
744 | bool openBoxProc) { |
745 | mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>(); |
746 | mlir::Value addr = builder.create<fir::ExtractValueOp>( |
747 | loc, tupleType.getType(0), tuple, |
748 | builder.getArrayAttr( |
749 | {builder.getIntegerAttr(builder.getIndexType(), 0)})); |
750 | mlir::Value proc = [&]() -> mlir::Value { |
751 | if (openBoxProc) |
752 | if (auto addrTy = addr.getType().dyn_cast<fir::BoxProcType>()) |
753 | return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr); |
754 | return addr; |
755 | }(); |
756 | mlir::Value len = builder.create<fir::ExtractValueOp>( |
757 | loc, tupleType.getType(1), tuple, |
758 | builder.getArrayAttr( |
759 | {builder.getIntegerAttr(builder.getIndexType(), 1)})); |
760 | return {proc, len}; |
761 | } |
762 | |
763 | mlir::Value fir::factory::createCharacterProcedureTuple( |
764 | fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy, |
765 | mlir::Value addr, mlir::Value len) { |
766 | mlir::TupleType tupleType = argTy.cast<mlir::TupleType>(); |
767 | addr = builder.createConvert(loc, tupleType.getType(0), addr); |
768 | if (len) |
769 | len = builder.createConvert(loc, tupleType.getType(1), len); |
770 | else |
771 | len = builder.create<fir::UndefOp>(loc, tupleType.getType(1)); |
772 | mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType); |
773 | tuple = builder.create<fir::InsertValueOp>( |
774 | loc, tupleType, tuple, addr, |
775 | builder.getArrayAttr( |
776 | {builder.getIntegerAttr(builder.getIndexType(), 0)})); |
777 | tuple = builder.create<fir::InsertValueOp>( |
778 | loc, tupleType, tuple, len, |
779 | builder.getArrayAttr( |
780 | {builder.getIntegerAttr(builder.getIndexType(), 1)})); |
781 | return tuple; |
782 | } |
783 | |
784 | mlir::Type |
785 | fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) { |
786 | mlir::MLIRContext *context = funcPointerType.getContext(); |
787 | mlir::Type lenType = mlir::IntegerType::get(context, 64); |
788 | return mlir::TupleType::get(context, {funcPointerType, lenType}); |
789 | } |
790 | |
791 | fir::CharBoxValue fir::factory::CharacterExprHelper::createCharExtremum( |
792 | bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs) { |
793 | // inputs: we are given a vector of all of the charboxes of the arguments |
794 | // passed to hlfir.char_extremum, as well as the predicate for whether we |
795 | // want llt or lgt |
796 | // |
797 | // note: we know that, regardless of whether we're looking at smallest or |
798 | // largest char, the size of the output buffer will be the same size as the |
799 | // largest character out of all of the operands. so, we find the biggest |
800 | // length first. It's okay if these char lengths are not known at compile |
801 | // time. |
802 | |
803 | fir::CharBoxValue firstCBV = opCBVs[0]; |
804 | mlir::Value firstBuf = getCharBoxBuffer(firstCBV); |
805 | auto firstLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
806 | firstCBV.getLen()); |
807 | |
808 | mlir::Value resultBuf = firstBuf; |
809 | mlir::Value resultLen = firstLen; |
810 | mlir::Value biggestLen = firstLen; |
811 | |
812 | // values for casting buf type and len type |
813 | auto typeLen = fir::CharacterType::unknownLen(); |
814 | auto kind = recoverCharacterType(firstBuf.getType()).getFKind(); |
815 | auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen); |
816 | auto type = fir::ReferenceType::get(charTy); |
817 | |
818 | size_t numOperands = opCBVs.size(); |
819 | for (size_t cbv_idx = 1; cbv_idx < numOperands; ++cbv_idx) { |
820 | auto currChar = opCBVs[cbv_idx]; |
821 | auto currBuf = getCharBoxBuffer(currChar); |
822 | auto currLen = builder.createConvert(loc, builder.getCharacterLengthType(), |
823 | currChar.getLen()); |
824 | // biggest len result |
825 | mlir::Value lhsBigger = builder.create<mlir::arith::CmpIOp>( |
826 | loc, mlir::arith::CmpIPredicate::uge, biggestLen, currLen); |
827 | biggestLen = builder.create<mlir::arith::SelectOp>(loc, lhsBigger, |
828 | biggestLen, currLen); |
829 | |
830 | auto cmp = predIsMin ? mlir::arith::CmpIPredicate::slt |
831 | : mlir::arith::CmpIPredicate::sgt; |
832 | |
833 | // lexical compare result |
834 | mlir::Value resultCmp = fir::runtime::genCharCompare( |
835 | builder, loc, cmp, currBuf, currLen, resultBuf, resultLen); |
836 | |
837 | // it's casting (to unknown size) time! |
838 | resultBuf = builder.createConvert(loc, type, resultBuf); |
839 | currBuf = builder.createConvert(loc, type, currBuf); |
840 | |
841 | resultBuf = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currBuf, |
842 | resultBuf); |
843 | resultLen = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currLen, |
844 | resultLen); |
845 | } |
846 | |
847 | // now that we know the lexicographically biggest/smallest char and which char |
848 | // had the biggest len, we can populate a temp CBV and return it |
849 | fir::CharBoxValue temp = createCharacterTemp(resultBuf.getType(), biggestLen); |
850 | auto toBuf = temp; |
851 | fir::CharBoxValue fromBuf{resultBuf, resultLen}; |
852 | createAssign(toBuf, fromBuf); |
853 | return temp; |
854 | } |
855 | |
856 | fir::CharBoxValue |
857 | fir::factory::convertCharacterKind(fir::FirOpBuilder &builder, |
858 | mlir::Location loc, |
859 | fir::CharBoxValue srcBoxChar, int toKind) { |
860 | // Use char_convert. Each code point is translated from a |
861 | // narrower/wider encoding to the target encoding. For example, 'A' |
862 | // may be translated from 0x41 : i8 to 0x0041 : i16. The symbol |
863 | // for euro (0x20AC : i16) may be translated from a wide character |
864 | // to "0xE2 0x82 0xAC" : UTF-8. |
865 | mlir::Value bufferSize = srcBoxChar.getLen(); |
866 | auto kindMap = builder.getKindMap(); |
867 | mlir::Value boxCharAddr = srcBoxChar.getAddr(); |
868 | auto fromTy = boxCharAddr.getType(); |
869 | if (auto charTy = fromTy.dyn_cast<fir::CharacterType>()) { |
870 | // boxchar is a value, not a variable. Turn it into a temporary. |
871 | // As a value, it ought to have a constant LEN value. |
872 | assert(charTy.hasConstantLen() && "must have constant length" ); |
873 | mlir::Value tmp = builder.createTemporary(loc, charTy); |
874 | builder.create<fir::StoreOp>(loc, boxCharAddr, tmp); |
875 | boxCharAddr = tmp; |
876 | } |
877 | auto fromBits = kindMap.getCharacterBitsize( |
878 | fir::unwrapRefType(fromTy).cast<fir::CharacterType>().getFKind()); |
879 | auto toBits = kindMap.getCharacterBitsize(toKind); |
880 | if (toBits < fromBits) { |
881 | // Scale by relative ratio to give a buffer of the same length. |
882 | auto ratio = builder.createIntegerConstant(loc, bufferSize.getType(), |
883 | fromBits / toBits); |
884 | bufferSize = builder.create<mlir::arith::MulIOp>(loc, bufferSize, ratio); |
885 | } |
886 | mlir::Type toType = |
887 | fir::CharacterType::getUnknownLen(builder.getContext(), toKind); |
888 | auto dest = builder.createTemporary(loc, toType, /*name=*/{}, /*shape=*/{}, |
889 | mlir::ValueRange{bufferSize}); |
890 | builder.create<fir::CharConvertOp>(loc, boxCharAddr, srcBoxChar.getLen(), |
891 | dest); |
892 | return fir::CharBoxValue{dest, srcBoxChar.getLen()}; |
893 | } |
894 | |