1 | #include <mbgl/text/bidi.hpp> |
2 | #include <mbgl/util/traits.hpp> |
3 | |
4 | #include <unicode/ubidi.h> |
5 | #include <unicode/ushape.h> |
6 | |
7 | #include <memory> |
8 | #include <stdexcept> |
9 | |
10 | namespace mbgl { |
11 | |
12 | class BiDiImpl { |
13 | public: |
14 | BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) { |
15 | } |
16 | ~BiDiImpl() { |
17 | ubidi_close(pBiDi: bidiText); |
18 | ubidi_close(pBiDi: bidiLine); |
19 | } |
20 | |
21 | UBiDi* bidiText = nullptr; |
22 | UBiDi* bidiLine = nullptr; |
23 | }; |
24 | |
25 | BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {} |
26 | BiDi::~BiDi() = default; |
27 | |
28 | // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining |
29 | // logical order. Output won't be intelligible until the bidirectional algorithm is applied |
30 | std::u16string applyArabicShaping(const std::u16string& input) { |
31 | UErrorCode errorCode = U_ZERO_ERROR; |
32 | |
33 | const int32_t outputLength = |
34 | u_shapeArabic(source: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), sourceLength: static_cast<int32_t>(input.size()), dest: nullptr, destSize: 0, |
35 | options: (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | |
36 | (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), |
37 | pErrorCode: &errorCode); |
38 | |
39 | // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR |
40 | errorCode = U_ZERO_ERROR; |
41 | |
42 | std::u16string outputText(outputLength, 0); |
43 | |
44 | u_shapeArabic(source: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), sourceLength: static_cast<int32_t>(input.size()), dest: mbgl::utf16char_cast<UChar*>(in: &outputText[0]), destSize: outputLength, |
45 | options: (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) | |
46 | (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK), |
47 | pErrorCode: &errorCode); |
48 | |
49 | // If the algorithm fails for any reason, fall back to non-transformed text |
50 | if (U_FAILURE(code: errorCode)) |
51 | return input; |
52 | |
53 | return outputText; |
54 | } |
55 | |
56 | void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) { |
57 | int32_t paragraphCount = ubidi_countParagraphs(pBiDi: impl->bidiText); |
58 | for (int32_t i = 0; i < paragraphCount; i++) { |
59 | UErrorCode errorCode = U_ZERO_ERROR; |
60 | int32_t paragraphEndIndex; |
61 | ubidi_getParagraphByIndex(pBiDi: impl->bidiText, paraIndex: i, pParaStart: nullptr, pParaLimit: ¶graphEndIndex, pParaLevel: nullptr, pErrorCode: &errorCode); |
62 | |
63 | if (U_FAILURE(code: errorCode)) { |
64 | throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: " ) + |
65 | u_errorName(code: errorCode)); |
66 | } |
67 | |
68 | lineBreakPoints.insert(x: static_cast<std::size_t>(paragraphEndIndex)); |
69 | } |
70 | } |
71 | |
72 | std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) { |
73 | // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all |
74 | // paragraph boundaries are included in the set of line break points. The calling code might not |
75 | // include the line break because it didn't need to wrap at that point, or because the text was |
76 | // separated with a more exotic code point such as (U+001C) |
77 | mergeParagraphLineBreaks(lineBreakPoints); |
78 | |
79 | std::vector<std::u16string> transformedLines; |
80 | transformedLines.reserve(n: lineBreakPoints.size()); |
81 | |
82 | std::size_t start = 0; |
83 | for (std::size_t lineBreakPoint : lineBreakPoints) { |
84 | transformedLines.push_back(x: getLine(start, end: lineBreakPoint)); |
85 | start = lineBreakPoint; |
86 | } |
87 | |
88 | return transformedLines; |
89 | } |
90 | |
91 | std::vector<std::u16string> BiDi::processText(const std::u16string& input, |
92 | std::set<std::size_t> lineBreakPoints) { |
93 | UErrorCode errorCode = U_ZERO_ERROR; |
94 | |
95 | ubidi_setPara(pBiDi: impl->bidiText, text: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), length: static_cast<int32_t>(input.size()), |
96 | UBIDI_DEFAULT_LTR, embeddingLevels: nullptr, pErrorCode: &errorCode); |
97 | |
98 | if (U_FAILURE(code: errorCode)) { |
99 | throw std::runtime_error(std::string("BiDi::processText: " ) + u_errorName(code: errorCode)); |
100 | } |
101 | |
102 | return applyLineBreaking(lineBreakPoints); |
103 | } |
104 | |
105 | std::u16string BiDi::getLine(std::size_t start, std::size_t end) { |
106 | UErrorCode errorCode = U_ZERO_ERROR; |
107 | ubidi_setLine(pParaBiDi: impl->bidiText, start: static_cast<int32_t>(start), limit: static_cast<int32_t>(end), pLineBiDi: impl->bidiLine, pErrorCode: &errorCode); |
108 | |
109 | if (U_FAILURE(code: errorCode)) { |
110 | throw std::runtime_error(std::string("BiDi::getLine (setLine): " ) + u_errorName(code: errorCode)); |
111 | } |
112 | |
113 | // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve |
114 | // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require |
115 | // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi) |
116 | const int32_t outputLength = ubidi_getProcessedLength(pBiDi: impl->bidiLine); |
117 | std::u16string outputText(outputLength, 0); |
118 | |
119 | // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses |
120 | // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that |
121 | // they don't show up on screen (some fonts have glyphs representing them) |
122 | ubidi_writeReordered(pBiDi: impl->bidiLine, dest: mbgl::utf16char_cast<UChar*>(in: &outputText[0]), destSize: outputLength, |
123 | UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, pErrorCode: &errorCode); |
124 | |
125 | if (U_FAILURE(code: errorCode)) { |
126 | throw std::runtime_error(std::string("BiDi::getLine (writeReordered): " ) + |
127 | u_errorName(code: errorCode)); |
128 | } |
129 | |
130 | return outputText; |
131 | } |
132 | |
133 | } // end namespace mbgl |
134 | |